Skip to content

Commit 5fc3f26

Browse files
authored
Rollup merge of #141004 - matthewjasper:unicode-before-expansion, r=davidtwco
Report text_direction_codepoint_in_literal when parsing The lint is now reported in code that gets removed/modified/duplicated by macro expansion, and spans are more accurate so we don't get ICEs from trying to split a span in the middle of a character. This removes support for lint level attributes for `text_direction_codepoint_in_literal` except at the crate level, I don't think that there's an easy way around this when the lint can be reported on code that's removed by `cfg` or that is only in the input of a macro. Fixes #140281
2 parents 7aba37d + f652067 commit 5fc3f26

File tree

15 files changed

+311
-180
lines changed

15 files changed

+311
-180
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,6 +2171,7 @@ dependencies = [
21712171
name = "lint-docs"
21722172
version = "0.1.0"
21732173
dependencies = [
2174+
"rustc-literal-escaper",
21742175
"serde_json",
21752176
"tempfile",
21762177
"walkdir",

compiler/rustc_lint/src/early/diagnostics.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,27 @@ pub fn decorate_builtin_lint(
187187
lints::ReservedMultihash { suggestion }.decorate_lint(diag);
188188
}
189189
}
190+
BuiltinLintDiag::HiddenUnicodeCodepoints {
191+
label,
192+
count,
193+
span_label,
194+
labels,
195+
escape,
196+
spans,
197+
} => {
198+
lints::HiddenUnicodeCodepointsDiag {
199+
label: &label,
200+
count,
201+
span_label,
202+
labels: labels.map(|spans| lints::HiddenUnicodeCodepointsDiagLabels { spans }),
203+
sub: if escape {
204+
lints::HiddenUnicodeCodepointsDiagSub::Escape { spans }
205+
} else {
206+
lints::HiddenUnicodeCodepointsDiagSub::NoEscape { spans }
207+
},
208+
}
209+
.decorate_lint(diag);
210+
}
190211
BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => {
191212
lints::UnusedBuiltinAttribute { invoc_span, attr_name, macro_name }.decorate_lint(diag);
192213
}

compiler/rustc_lint/src/hidden_unicode_codepoints.rs

Lines changed: 0 additions & 136 deletions
This file was deleted.

compiler/rustc_lint/src/lib.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ mod errors;
4848
mod expect;
4949
mod for_loops_over_fallibles;
5050
mod foreign_modules;
51-
pub mod hidden_unicode_codepoints;
5251
mod if_let_rescope;
5352
mod impl_trait_overcaptures;
5453
mod internal;
@@ -92,7 +91,6 @@ use deref_into_dyn_supertrait::*;
9291
use drop_forget_useless::*;
9392
use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
9493
use for_loops_over_fallibles::*;
95-
use hidden_unicode_codepoints::*;
9694
use if_let_rescope::IfLetRescope;
9795
use impl_trait_overcaptures::ImplTraitOvercaptures;
9896
use internal::*;
@@ -177,7 +175,6 @@ early_lint_methods!(
177175
DeprecatedAttr: DeprecatedAttr::default(),
178176
WhileTrue: WhileTrue,
179177
NonAsciiIdents: NonAsciiIdents,
180-
HiddenUnicodeCodepoints: HiddenUnicodeCodepoints,
181178
IncompleteInternalFeatures: IncompleteInternalFeatures,
182179
RedundantSemicolons: RedundantSemicolons,
183180
UnusedDocComment: UnusedDocComment,

compiler/rustc_lint_defs/src/builtin.rs

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ declare_lint_pass! {
103103
TAIL_EXPR_DROP_ORDER,
104104
TEST_UNSTABLE_LINT,
105105
TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
106+
TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
106107
TRIVIAL_CASTS,
107108
TRIVIAL_NUMERIC_CASTS,
108109
TYVAR_BEHIND_RAW_POINTER,
@@ -3782,7 +3783,6 @@ declare_lint! {
37823783
}
37833784

37843785
declare_lint! {
3785-
#[allow(text_direction_codepoint_in_literal)]
37863786
/// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that
37873787
/// change the visual representation of text on screen in a way that does not correspond to
37883788
/// their on memory representation.
@@ -3792,7 +3792,7 @@ declare_lint! {
37923792
/// ```rust,compile_fail
37933793
/// #![deny(text_direction_codepoint_in_comment)]
37943794
/// fn main() {
3795-
/// println!("{:?}"); // '');
3795+
#[doc = " println!(\"{:?}\"); // '\u{202E}');"]
37963796
/// }
37973797
/// ```
37983798
///
@@ -3807,7 +3807,43 @@ declare_lint! {
38073807
/// their use.
38083808
pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
38093809
Deny,
3810-
"invisible directionality-changing codepoints in comment"
3810+
"invisible directionality-changing codepoints in comment",
3811+
crate_level_only
3812+
}
3813+
3814+
declare_lint! {
3815+
/// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the
3816+
/// visual representation of text on screen in a way that does not correspond to their on
3817+
/// memory representation.
3818+
///
3819+
/// ### Explanation
3820+
///
3821+
/// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`,
3822+
/// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change
3823+
/// its direction on software that supports these codepoints. This makes the text "abc" display
3824+
/// as "cba" on screen. By leveraging software that supports these, people can write specially
3825+
/// crafted literals that make the surrounding code seem like it's performing one action, when
3826+
/// in reality it is performing another. Because of this, we proactively lint against their
3827+
/// presence to avoid surprises.
3828+
///
3829+
/// ### Example
3830+
///
3831+
/// ```rust,compile_fail
3832+
/// #![deny(text_direction_codepoint_in_literal)]
3833+
/// fn main() {
3834+
// ` - convince tidy that backticks match
3835+
#[doc = " println!(\"{:?}\", '\u{202E}');"]
3836+
// `
3837+
/// }
3838+
/// ```
3839+
///
3840+
/// {{produces}}
3841+
///
3842+
pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL,
3843+
Deny,
3844+
"detect special Unicode codepoints that affect the visual representation of text on screen, \
3845+
changing the direction in which text flows",
3846+
crate_level_only
38113847
}
38123848

38133849
declare_lint! {

compiler/rustc_lint_defs/src/lib.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,14 @@ pub enum BuiltinLintDiag {
698698
is_string: bool,
699699
suggestion: Span,
700700
},
701+
HiddenUnicodeCodepoints {
702+
label: String,
703+
count: usize,
704+
span_label: Span,
705+
labels: Option<Vec<(char, Span)>>,
706+
escape: bool,
707+
spans: Vec<(char, Span)>,
708+
},
701709
TrailingMacro(bool, Ident),
702710
BreakWithLabelAndLoop(Span),
703711
UnicodeTextFlow(Span, String),

0 commit comments

Comments
 (0)