Skip to content

Commit 5d2c47e

Browse files
committed
Revert "Refactor RDoc::Markup::Parser#tokenize"
This reverts commit 41ceae9.
1 parent 5e8443d commit 5d2c47e

File tree

2 files changed

+117
-118
lines changed

2 files changed

+117
-118
lines changed

lib/rdoc/markup/parser.rb

Lines changed: 106 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,44 @@ def build_verbatim margin
272272
end
273273

274274
case type
275+
when :HEADER then
276+
line << '=' * data
277+
_, _, peek_column, = peek_token
278+
peek_column ||= column + data
279+
indent = peek_column - column - data
280+
line << ' ' * indent
281+
when :RULE then
282+
width = 2 + data
283+
line << '-' * width
284+
_, _, peek_column, = peek_token
285+
peek_column ||= column + width
286+
indent = peek_column - column - width
287+
line << ' ' * indent
275288
when :BREAK, :TEXT then
276289
line << data
277-
else
278-
raise TypeError, "unexpected token under verbatim: #{type}"
290+
when :BLOCKQUOTE then
291+
line << '>>>'
292+
peek_type, _, peek_column = peek_token
293+
if peek_type != :NEWLINE and peek_column
294+
line << ' ' * (peek_column - column - 3)
295+
end
296+
else # *LIST_TOKENS
297+
list_marker = case type
298+
when :BULLET then data
299+
when :LABEL then "[#{data}]"
300+
when :NOTE then "#{data}::"
301+
else # :LALPHA, :NUMBER, :UALPHA
302+
"#{data}."
303+
end
304+
line << list_marker
305+
peek_type, _, peek_column = peek_token
306+
unless peek_type == :NEWLINE then
307+
peek_column ||= column + list_marker.length
308+
indent = peek_column - column - list_marker.length
309+
line << ' ' * indent
310+
end
279311
end
312+
280313
end
281314

282315
verbatim << line << "\n" unless line.empty?
@@ -448,37 +481,11 @@ def skip token_type, error = true
448481
##
449482
# Turns text +input+ into a stream of tokens
450483

451-
def tokenize(input)
484+
def tokenize input
452485
setup_scanner input
453-
margin = @s.pos[0]
454-
tokenize_indented(margin)
455-
tokenize_input(margin)
456-
end
457-
458-
def newline!(pos = nil)
459-
if pos or (@s.scan(/ *(?=\r?\n)/) and pos = @s.pos and @s.scan(/\r?\n/))
460-
@tokens << [:NEWLINE, @s.matched, *pos]
461-
@s.newline!
462-
end
463-
end
464486

465-
def tokenize_indented(column)
466-
indent = / {#{column+1},}(?=\S)| *(?=\r?\n)/
467-
while @s.scan(indent)
487+
until @s.eos? do
468488
pos = @s.pos
469-
if @s.scan(/(.+)(?=\r?\n)?/)
470-
@tokens << [:TEXT, @s.matched, *pos]
471-
end
472-
newline! or break
473-
end
474-
end
475-
476-
def tokenize_input(margin)
477-
column = 0
478-
479-
until @s.eos?
480-
pos = @s.pos
481-
break if pos[0] < (margin ||= pos[0])
482489

483490
# leading spaces will be reflected by the column of the next token
484491
# the only thing we loose are trailing spaces at the end of the file
@@ -487,84 +494,75 @@ def tokenize_input(margin)
487494
# note: after BULLET, LABEL, etc.,
488495
# indent will be the column of the next non-newline token
489496

490-
case
491-
# [CR]LF => :NEWLINE
492-
when @s.scan(/\r?\n/)
493-
newline!(pos)
494-
next
495-
496-
# === text => :HEADER then :TEXT
497-
when @s.scan(/(=+)(\s*)/)
498-
level = @s[1].length
499-
header = [:HEADER, level, *pos]
500-
501-
if @s[2] =~ /^\r?\n/
502-
@s.unscan(@s[2])
503-
@tokens << header
504-
else
505-
pos = @s.pos
506-
@s.scan(/.*/)
507-
@tokens << header
508-
@tokens << [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
509-
end
510-
511-
# --- (at least 3) and nothing else on the line => :RULE
512-
when @s.scan(/(-{3,}) *\r?$/)
513-
@tokens << [:RULE, @s[1].length - 2, *pos]
514-
515-
# * or - followed by white space and text => :BULLET
516-
when @s.scan(/([*-]) +(?=\S)/)
517-
@tokens << [:BULLET, @s[1], *pos]
518-
tokenize_input(nil)
519-
520-
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
521-
when @s.scan(/([a-z]|\d+)\. +(?=\S)/i)
522-
# FIXME if tab(s), the column will be wrong
523-
# either support tabs everywhere by first expanding them to
524-
# spaces, or assume that they will have been replaced
525-
# before (and provide a check for that at least in debug
526-
# mode)
527-
list_label = @s[1]
528-
list_type =
529-
case list_label
530-
when /[a-z]/ then :LALPHA
531-
when /[A-Z]/ then :UALPHA
532-
when /\d/ then :NUMBER
533-
else
534-
raise ParseError, "BUG token #{list_label}"
535-
end
536-
@tokens << [list_type, list_label, *pos]
537-
tokenize_input(nil)
538-
539-
# [text] followed by spaces or end of line => :LABEL
540-
when @s.scan(/\[(.*?)\]( +|\r?$)/)
541-
@tokens << [:LABEL, @s[1], *pos]
542-
tokenize_input(nil)
543-
544-
# text:: followed by spaces or end of line => :NOTE
545-
when @s.scan(/(.*?)::( +|\r?$)/)
546-
@tokens << [:NOTE, @s[1], *pos]
547-
tokenize_input(nil)
548-
549-
# >>> followed by end of line => :BLOCKQUOTE
550-
when @s.scan(/>>> *(\w+)?\r?$/)
551-
@tokens << [:BLOCKQUOTE, @s[1], *pos]
552-
newline!
553-
tokenize_input(nil)
554-
555-
# anything else: :TEXT
556-
else
557-
column = pos[0]
558-
@s.scan(/(.*?)( )?\r?$/)
559-
@tokens << [:TEXT, @s[1], *pos]
560-
561-
if @s[2]
562-
@tokens << [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
563-
end
564-
if newline!
565-
tokenize_indented(column)
566-
end
567-
end
497+
@tokens << case
498+
# [CR]LF => :NEWLINE
499+
when @s.scan(/\r?\n/) then
500+
token = [:NEWLINE, @s.matched, *pos]
501+
@s.newline!
502+
token
503+
# === text => :HEADER then :TEXT
504+
when @s.scan(/(=+)(\s*)/) then
505+
level = @s[1].length
506+
header = [:HEADER, level, *pos]
507+
508+
if @s[2] =~ /^\r?\n/ then
509+
@s.unscan(@s[2])
510+
header
511+
else
512+
pos = @s.pos
513+
@s.scan(/.*/)
514+
@tokens << header
515+
[:TEXT, @s.matched.sub(/\r$/, ''), *pos]
516+
end
517+
# --- (at least 3) and nothing else on the line => :RULE
518+
when @s.scan(/(-{3,}) *\r?$/) then
519+
[:RULE, @s[1].length - 2, *pos]
520+
# * or - followed by white space and text => :BULLET
521+
when @s.scan(/([*-]) +(\S)/) then
522+
@s.unscan(@s[2])
523+
[:BULLET, @s[1], *pos]
524+
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
525+
when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
526+
# FIXME if tab(s), the column will be wrong
527+
# either support tabs everywhere by first expanding them to
528+
# spaces, or assume that they will have been replaced
529+
# before (and provide a check for that at least in debug
530+
# mode)
531+
list_label = @s[1]
532+
@s.unscan(@s[2])
533+
list_type =
534+
case list_label
535+
when /[a-z]/ then :LALPHA
536+
when /[A-Z]/ then :UALPHA
537+
when /\d/ then :NUMBER
538+
else
539+
raise ParseError, "BUG token #{list_label}"
540+
end
541+
[list_type, list_label, *pos]
542+
# [text] followed by spaces or end of line => :LABEL
543+
when @s.scan(/\[(.*?)\]( +|\r?$)/) then
544+
[:LABEL, @s[1], *pos]
545+
# text:: followed by spaces or end of line => :NOTE
546+
when @s.scan(/(.*?)::( +|\r?$)/) then
547+
[:NOTE, @s[1], *pos]
548+
# >>> followed by end of line => :BLOCKQUOTE
549+
when @s.scan(/>>> *(\w+)?$/) then
550+
if word = @s[1]
551+
@s.unscan(word)
552+
end
553+
[:BLOCKQUOTE, word, *pos]
554+
# anything else: :TEXT
555+
else
556+
@s.scan(/(.*?)( )?\r?$/)
557+
token = [:TEXT, @s[1], *pos]
558+
559+
if @s[2] then
560+
@tokens << token
561+
[:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
562+
else
563+
token
564+
end
565+
end
568566
end
569567

570568
self

test/rdoc/test_rdoc_markup_parser.rb

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,7 +1591,8 @@ def test_tokenize_verbatim_heading
15911591
[:TEXT, 'Example heading:', 0, 0],
15921592
[:NEWLINE, "\n", 16, 0],
15931593
[:NEWLINE, "\n", 0, 1],
1594-
[:TEXT, '=== heading three', 3, 2],
1594+
[:HEADER, 3, 3, 2],
1595+
[:TEXT, 'heading three', 7, 2],
15951596
[:NEWLINE, "\n", 20, 2],
15961597
]
15971598

@@ -1607,7 +1608,7 @@ def test_tokenize_verbatim_rule
16071608
expected = [
16081609
[:TEXT, 'Verbatim section here that is double-underlined', 2, 0],
16091610
[:NEWLINE, "\n", 49, 0],
1610-
[:TEXT, '='*47, 2, 1],
1611+
[:HEADER, 47, 2, 1],
16111612
[:NEWLINE, "\n", 49, 1],
16121613
]
16131614

@@ -1623,14 +1624,14 @@ def test_tokenize_verbatim_rule_fancy
16231624
STR
16241625

16251626
expected = [
1626-
[:TEXT, 'A', 2, 0],
1627-
[:NEWLINE, "\n", 3, 0],
1628-
[:TEXT, 'b', 4, 1],
1629-
[:NEWLINE, "\n", 5, 1],
1630-
[:TEXT, '='*47, 2, 2],
1631-
[:NEWLINE, "\n", 49, 2],
1632-
[:TEXT, 'c', 4, 3],
1633-
[:NEWLINE, "\n", 5, 3],
1627+
[:TEXT, 'A', 2, 0],
1628+
[:NEWLINE, "\n", 3, 0],
1629+
[:TEXT, 'b', 4, 1],
1630+
[:NEWLINE, "\n", 5, 1],
1631+
[:HEADER, 47, 2, 2],
1632+
[:NEWLINE, "\n", 49, 2],
1633+
[:TEXT, 'c', 4, 3],
1634+
[:NEWLINE, "\n", 5, 3],
16341635
]
16351636

16361637
assert_equal expected, @RMP.tokenize(str)

0 commit comments

Comments
 (0)