From 4925f364849d024788f90b94d3fd38c5f586451f Mon Sep 17 00:00:00 2001 From: Boris Verkhovskiy Date: Thu, 28 Nov 2024 11:55:36 -0700 Subject: [PATCH 1/3] Ruby single quote strings don't do interpolation --- CHANGES.md | 5 +- src/languages/ruby.js | 96 ++++++++++++++++++++++++----- test/markup/erb/default.expect.txt | 2 +- test/markup/erb/default.txt | 2 +- test/markup/ruby/strings.expect.txt | 8 ++- test/markup/ruby/strings.txt | 8 ++- 6 files changed, 99 insertions(+), 22 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index c0fc84a43e..f3af883c99 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -41,7 +41,8 @@ Core Grammars: - fix(swift) - Fixed syntax highlighting for class func/var declarations [guuido] - fix(yaml) - Fixed wrong escaping behavior in single quoted strings [guuido] - enh(nim) - Add `concept` and `defer` to list of Nim keywords [Jake Leahy] - +- fix(ruby) - Fix non-interpolabale Ruby strings [Boris Verkhovskiy][] + New Grammars: - added 3rd party TTCN-3 grammar to SUPPORTED_LANGUAGES [Osmocom][] @@ -85,7 +86,7 @@ CONTRIBUTORS [guuido]: https://github.com/guuido [clsource]: https://github.com/clsource [Jake Leahy]: https://github.com/ire4ever1190 - +[Boris Verkhovskiy]: https://github.com/verhovsky ## Version 11.10.0 diff --git a/src/languages/ruby.js b/src/languages/ruby.js index 91b2cb527d..b50426553d 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -15,8 +15,7 @@ export default function(hljs) { /\b([A-Z]+[a-z0-9]+)+/, // ends in caps /\b([A-Z]+[a-z0-9]+)+[A-Z]+/, - ) - ; + ); const CLASS_NAME_WITH_NAMESPACE_RE = regex.concat(CLASS_NAME_RE, /(::\w+)*/) // very popular ruby built-ins that one might even assume // are actual keywords (despite that not being the case) @@ -124,54 +123,119 @@ export default function(hljs) { }; const STRING = { className: 'string', - contains: [ - hljs.BACKSLASH_ESCAPE, - SUBST - ], + contains: [ hljs.BACKSLASH_ESCAPE ], variants: [ { begin: /'/, end: /'/ }, + { + begin: /%q\(/, + end: /\)/ + }, + { + begin: /%q\[/, + end: /\]/ + }, + { + begin: /%q\{/, + end: /\}/ + }, + { + begin: /%q/ + }, + { + begin: /%q\//, + end: /\// + }, + { + begin: /%q%/, + end: /%/ + }, + { + begin: /%q-/, + end: /-/ + }, { begin: /"/, - end: /"/ + end: /"/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /`/, - end: /`/ + end: /`/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?\(/, - end: /\)/ + end: /\)/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?\[/, - end: /\]/ + end: /\]/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?\{/, - end: /\}/ + end: /\}/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?/ + end: />/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?\//, - end: /\// + end: /\//, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?%/, - end: /%/ + end: /%/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?-/, - end: /-/ + end: /-/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { begin: /%[qQwWx]?\|/, - end: /\|/ + end: /\|/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, // in the following expressions, \B in the beginning suppresses recognition of ?-sequences // where ? is the last character of a preceding identifier, as in: `func?4` diff --git a/test/markup/erb/default.expect.txt b/test/markup/erb/default.expect.txt index 2473ac1b61..2de768b933 100644 --- a/test/markup/erb/default.expect.txt +++ b/test/markup/erb/default.expect.txt @@ -6,6 +6,6 @@ <%- available_things = things.select(&:available?) -%> <%%- x = 1 + 2 -%%> -<%% value = 'real string #{@value}' %%> +<%% value = "real string #{@value}" %%> <%%= available_things.inspect %%> \ No newline at end of file diff --git a/test/markup/erb/default.txt b/test/markup/erb/default.txt index f7ea6203f5..e173dc16ae 100644 --- a/test/markup/erb/default.txt +++ b/test/markup/erb/default.txt @@ -6,5 +6,5 @@ <%- available_things = things.select(&:available?) -%> <%%- x = 1 + 2 -%%> -<%% value = 'real string #{@value}' %%> +<%% value = "real string #{@value}" %%> <%%= available_things.inspect %%> diff --git a/test/markup/ruby/strings.expect.txt b/test/markup/ruby/strings.expect.txt index 07b8c51197..a608364fe0 100644 --- a/test/markup/ruby/strings.expect.txt +++ b/test/markup/ruby/strings.expect.txt @@ -27,4 +27,10 @@ c = ?\u{00AF09} c = ?\u{0AF09} c = ?\u{AF9} c = ?\u{F9} -c = ?\u{F} \ No newline at end of file +c = ?\u{F} + +# Interpolation +c = 'a#{1}b' #=> "a\#{1}b" +c = "a#{1}b" #=> "a1b" +c = %q(a#{1}b) #=> "a\#{1}b" +c = %Q{a#{1}b} #=> "a1b" diff --git a/test/markup/ruby/strings.txt b/test/markup/ruby/strings.txt index 43d35d656b..a8e9746b6c 100644 --- a/test/markup/ruby/strings.txt +++ b/test/markup/ruby/strings.txt @@ -27,4 +27,10 @@ c = ?\u{00AF09} c = ?\u{0AF09} c = ?\u{AF9} c = ?\u{F9} -c = ?\u{F} \ No newline at end of file +c = ?\u{F} + +# Interpolation +c = 'a#{1}b' #=> "a\#{1}b" +c = "a#{1}b" #=> "a1b" +c = %q(a#{1}b) #=> "a\#{1}b" +c = %Q{a#{1}b} #=> "a1b" From f36f1bb62cd70dd6cf1c1d8403bded83a5096c66 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 29 Nov 2024 10:51:20 -0500 Subject: [PATCH 2/3] better string separation --- src/highlight.js | 1 + src/languages/ruby.js | 103 ++++++++++++++++++++++++------------------ 2 files changed, 59 insertions(+), 45 deletions(-) diff --git a/src/highlight.js b/src/highlight.js index ac44a1e115..e8aa0c60e2 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -1014,6 +1014,7 @@ const HLJS = function(hljs) { hljs.regex = { concat: regex.concat, lookahead: regex.lookahead, + escape: regex.escape, either: regex.either, optional: regex.optional, anyNumberOfTimes: regex.anyNumberOfTimes diff --git a/src/languages/ruby.js b/src/languages/ruby.js index b50426553d..9e5127c09e 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -121,7 +121,27 @@ export default function(hljs) { end: /\}/, keywords: RUBY_KEYWORDS }; - const STRING = { + + function string_variants(prefix, delimiters) { + return delimiters.map((d) => { + return { + begin: regex.concat(prefix, regex.escape(d.charAt(0))), + end: regex.escape(d.charAt(1)) + } + }) + } + + const STRING_DELIMITERS = [ + "()", + "[]", + "{}", + "<>", + "\\/\\/", + "%%", + "--" + ]; + + const SINGLE_QUOTED_STRING = { className: 'string', contains: [ hljs.BACKSLASH_ESCAPE ], variants: [ @@ -129,52 +149,38 @@ export default function(hljs) { begin: /'/, end: /'/ }, - { - begin: /%q\(/, - end: /\)/ - }, - { - begin: /%q\[/, - end: /\]/ - }, - { - begin: /%q\{/, - end: /\}/ - }, - { - begin: /%q/ - }, - { - begin: /%q\//, - end: /\// - }, - { - begin: /%q%/, - end: /%/ - }, - { - begin: /%q-/, - end: /-/ - }, + ...string_variants("%q", STRING_DELIMITERS) + ] + } + + const DOUBLE_QUOTED_STRING = { + className: 'string', + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ], + variants: [ { begin: /"/, - end: /"/, - contains: [ - hljs.BACKSLASH_ESCAPE, - SUBST - ] + end: /"/ }, + ...string_variants("%Q", STRING_DELIMITERS) + ] + } + + const OLD_STRINGS_TOO_MANY_VARIANTS = { + className: 'string', + contains: [ hljs.BACKSLASH_ESCAPE ], + variants: [ { begin: /`/, end: /`/, contains: [ - hljs.BACKSLASH_ESCAPE, SUBST ] }, { - begin: /%[qQwWx]?\(/, + begin: /%[wWx]?\(/, end: /\)/, contains: [ hljs.BACKSLASH_ESCAPE, @@ -182,7 +188,7 @@ export default function(hljs) { ] }, { - begin: /%[qQwWx]?\[/, + begin: /%[wWx]?\[/, end: /\]/, contains: [ hljs.BACKSLASH_ESCAPE, @@ -190,7 +196,7 @@ export default function(hljs) { ] }, { - begin: /%[qQwWx]?\{/, + begin: /%[wWx]?\{/, end: /\}/, contains: [ hljs.BACKSLASH_ESCAPE, @@ -198,7 +204,7 @@ export default function(hljs) { ] }, { - begin: /%[qQwWx]?/, contains: [ hljs.BACKSLASH_ESCAPE, @@ -206,7 +212,7 @@ export default function(hljs) { ] }, { - begin: /%[qQwWx]?\//, + begin: /%[wWx]?\//, end: /\//, contains: [ hljs.BACKSLASH_ESCAPE, @@ -214,7 +220,7 @@ export default function(hljs) { ] }, { - begin: /%[qQwWx]?%/, + begin: /%[wWx]?%/, end: /%/, contains: [ hljs.BACKSLASH_ESCAPE, @@ -222,7 +228,7 @@ export default function(hljs) { ] }, { - begin: /%[qQwWx]?-/, + begin: /%[wWx]?-/, end: /-/, contains: [ hljs.BACKSLASH_ESCAPE, @@ -230,7 +236,7 @@ export default function(hljs) { ] }, { - begin: /%[qQwWx]?\|/, + begin: /%[wWx]?\|/, end: /\|/, contains: [ hljs.BACKSLASH_ESCAPE, @@ -267,6 +273,12 @@ export default function(hljs) { ] }; + const STRINGS = [ + SINGLE_QUOTED_STRING, + DOUBLE_QUOTED_STRING, + OLD_STRINGS_TOO_MANY_VARIANTS + ] + // Ruby syntax is underdocumented, but this grammar seems to be accurate // as of version 2.7.2 (confirmed with (irb and `Ripper.sexp(...)`) // https://docs.ruby-lang.org/en/2.7.0/doc/syntax/literals_rdoc.html#label-Numbers @@ -381,7 +393,7 @@ export default function(hljs) { }; const RUBY_DEFAULT_CONTAINS = [ - STRING, + ...STRINGS, CLASS_DEFINITION, INCLUDE_EXTEND, OBJECT_CREATION, @@ -400,7 +412,8 @@ export default function(hljs) { className: 'symbol', begin: ':(?!\\s)', contains: [ - STRING, + // TODO: STRING, STRINGS, double quoted? + OLD_STRINGS_TOO_MANY_VARIANTS, { begin: RUBY_METHOD_RE } ], relevance: 0 From 314bb555cef57e915ac40eb702c3d43e9f3fbc83 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 29 Nov 2024 11:07:11 -0500 Subject: [PATCH 3/3] fixup symbols, split out heredoc --- src/languages/ruby.js | 75 ++++++++++++++++++----------- test/markup/ruby/strings.expect.txt | 6 +++ test/markup/ruby/strings.txt | 6 +++ 3 files changed, 59 insertions(+), 28 deletions(-) diff --git a/src/languages/ruby.js b/src/languages/ruby.js index 9e5127c09e..d4039d24d1 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -168,6 +168,7 @@ export default function(hljs) { ] } + // TODO: continue to break these out into smaller more discrete modes const OLD_STRINGS_TOO_MANY_VARIANTS = { className: 'string', contains: [ hljs.BACKSLASH_ESCAPE ], @@ -251,31 +252,33 @@ export default function(hljs) { { begin: /\B\?(\\M-\\C-|\\M-\\c|\\c\\M-|\\M-|\\C-\\M-)[\x20-\x7e]/ }, { begin: /\B\?\\(c|C-)[\x20-\x7e]/ }, { begin: /\B\?\\?\S/ }, - // heredocs - { - // this guard makes sure that we have an entire heredoc and not a false - // positive (auto-detect, etc.) - begin: regex.concat( - /<<[-~]?'?/, - regex.lookahead(/(\w+)(?=\W)[^\n]*\n(?:[^\n]*\n)*?\s*\1\b/) - ), + ] + }; + + const HEREDOC = { + scope: "string", + // this guard makes sure that we have an entire heredoc and not a false + // positive (auto-detect, etc.) + begin: regex.concat( + /<<[-~]?'?/, + regex.lookahead(/(\w+)(?=\W)[^\n]*\n(?:[^\n]*\n)*?\s*\1\b/) + ), + contains: [ + hljs.END_SAME_AS_BEGIN({ + begin: /(\w+)/, + end: /(\w+)/, contains: [ - hljs.END_SAME_AS_BEGIN({ - begin: /(\w+)/, - end: /(\w+)/, - contains: [ - hljs.BACKSLASH_ESCAPE, - SUBST - ] - }) + hljs.BACKSLASH_ESCAPE, + SUBST ] - } + }) ] - }; + } const STRINGS = [ SINGLE_QUOTED_STRING, DOUBLE_QUOTED_STRING, + HEREDOC, OLD_STRINGS_TOO_MANY_VARIANTS ] @@ -392,7 +395,33 @@ export default function(hljs) { scope: "title.class" }; + const SYMBOL = { + className: 'symbol', + variants: [ + { + begin: regex.concat(/:/, RUBY_METHOD_RE) + }, + { + begin: /:"/, + end: /"/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] + }, + { + begin: /:'/, + end: /'/, + contains: [ + hljs.BACKSLASH_ESCAPE + ] + } + ], + relevance: 0 + }; + const RUBY_DEFAULT_CONTAINS = [ + SYMBOL, ...STRINGS, CLASS_DEFINITION, INCLUDE_EXTEND, @@ -408,16 +437,6 @@ export default function(hljs) { begin: hljs.UNDERSCORE_IDENT_RE + '(!|\\?)?:', relevance: 0 }, - { - className: 'symbol', - begin: ':(?!\\s)', - contains: [ - // TODO: STRING, STRINGS, double quoted? - OLD_STRINGS_TOO_MANY_VARIANTS, - { begin: RUBY_METHOD_RE } - ], - relevance: 0 - }, NUMBER, { // negative-look forward attempts to prevent false matches like: diff --git a/test/markup/ruby/strings.expect.txt b/test/markup/ruby/strings.expect.txt index a608364fe0..7915798cbd 100644 --- a/test/markup/ruby/strings.expect.txt +++ b/test/markup/ruby/strings.expect.txt @@ -22,6 +22,12 @@ c = ?\c\M-x # me c = ?\c? # delete, ASCII 7Fh (DEL) c = ?\C-? # delete, ASCII 7Fh (DEL) +# symbols +c = :booger #=> :booger +c = :"booger" #=> :booger +c = :'booger' #=> :booger +c = :"b#{yum}ger" #=> :burger + # Unicode character(s) of type \u{nnnn ....}, where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F]) c = ?\u{00AF09} c = ?\u{0AF09} diff --git a/test/markup/ruby/strings.txt b/test/markup/ruby/strings.txt index a8e9746b6c..a2c89921fe 100644 --- a/test/markup/ruby/strings.txt +++ b/test/markup/ruby/strings.txt @@ -22,6 +22,12 @@ c = ?\c\M-x # meta control character, where x is an ASCII printable characte c = ?\c? # delete, ASCII 7Fh (DEL) c = ?\C-? # delete, ASCII 7Fh (DEL) +# symbols +c = :booger #=> :booger +c = :"booger" #=> :booger +c = :'booger' #=> :booger +c = :"b#{yum}ger" #=> :burger + # Unicode character(s) of type \u{nnnn ....}, where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F]) c = ?\u{00AF09} c = ?\u{0AF09}