@@ -272,11 +272,44 @@ def build_verbatim margin
272
272
end
273
273
274
274
case type
275
+ when :HEADER then
276
+ line << '=' * data
277
+ _ , _ , peek_column , = peek_token
278
+ peek_column ||= column + data
279
+ indent = peek_column - column - data
280
+ line << ' ' * indent
281
+ when :RULE then
282
+ width = 2 + data
283
+ line << '-' * width
284
+ _ , _ , peek_column , = peek_token
285
+ peek_column ||= column + width
286
+ indent = peek_column - column - width
287
+ line << ' ' * indent
275
288
when :BREAK , :TEXT then
276
289
line << data
277
- else
278
- raise TypeError , "unexpected token under verbatim: #{ type } "
290
+ when :BLOCKQUOTE then
291
+ line << '>>>'
292
+ peek_type , _ , peek_column = peek_token
293
+ if peek_type != :NEWLINE and peek_column
294
+ line << ' ' * ( peek_column - column - 3 )
295
+ end
296
+ else # *LIST_TOKENS
297
+ list_marker = case type
298
+ when :BULLET then data
299
+ when :LABEL then "[#{ data } ]"
300
+ when :NOTE then "#{ data } ::"
301
+ else # :LALPHA, :NUMBER, :UALPHA
302
+ "#{ data } ."
303
+ end
304
+ line << list_marker
305
+ peek_type , _ , peek_column = peek_token
306
+ unless peek_type == :NEWLINE then
307
+ peek_column ||= column + list_marker . length
308
+ indent = peek_column - column - list_marker . length
309
+ line << ' ' * indent
310
+ end
279
311
end
312
+
280
313
end
281
314
282
315
verbatim << line << "\n " unless line . empty?
@@ -448,37 +481,11 @@ def skip token_type, error = true
448
481
##
449
482
# Turns text +input+ into a stream of tokens
450
483
451
- def tokenize ( input )
484
+ def tokenize input
452
485
setup_scanner input
453
- margin = @s . pos [ 0 ]
454
- tokenize_indented ( margin )
455
- tokenize_input ( margin )
456
- end
457
-
458
- def newline! ( pos = nil )
459
- if pos or ( @s . scan ( / *(?=\r ?\n )/ ) and pos = @s . pos and @s . scan ( /\r ?\n / ) )
460
- @tokens << [ :NEWLINE , @s . matched , *pos ]
461
- @s . newline!
462
- end
463
- end
464
486
465
- def tokenize_indented ( column )
466
- indent = / {#{ column +1 } ,}(?=\S )| *(?=\r ?\n )/
467
- while @s . scan ( indent )
487
+ until @s . eos? do
468
488
pos = @s . pos
469
- if @s . scan ( /(.+)(?=\r ?\n )?/ )
470
- @tokens << [ :TEXT , @s . matched , *pos ]
471
- end
472
- newline! or break
473
- end
474
- end
475
-
476
- def tokenize_input ( margin )
477
- column = 0
478
-
479
- until @s . eos?
480
- pos = @s . pos
481
- break if pos [ 0 ] < ( margin ||= pos [ 0 ] )
482
489
483
490
# leading spaces will be reflected by the column of the next token
484
491
# the only thing we loose are trailing spaces at the end of the file
@@ -487,84 +494,75 @@ def tokenize_input(margin)
487
494
# note: after BULLET, LABEL, etc.,
488
495
# indent will be the column of the next non-newline token
489
496
490
- case
491
- # [CR]LF => :NEWLINE
492
- when @s . scan ( /\r ?\n / )
493
- newline! ( pos )
494
- next
495
-
496
- # === text => :HEADER then :TEXT
497
- when @s . scan ( /(=+)(\s *)/ )
498
- level = @s [ 1 ] . length
499
- header = [ :HEADER , level , *pos ]
500
-
501
- if @s [ 2 ] =~ /^\r ?\n /
502
- @s . unscan ( @s [ 2 ] )
503
- @tokens << header
504
- else
505
- pos = @s . pos
506
- @s . scan ( /.*/ )
507
- @tokens << header
508
- @tokens << [ :TEXT , @s . matched . sub ( /\r $/ , '' ) , *pos ]
509
- end
510
-
511
- # --- (at least 3) and nothing else on the line => :RULE
512
- when @s . scan ( /(-{3,}) *\r ?$/ )
513
- @tokens << [ :RULE , @s [ 1 ] . length - 2 , *pos ]
514
-
515
- # * or - followed by white space and text => :BULLET
516
- when @s . scan ( /([*-]) +(?=\S )/ )
517
- @tokens << [ :BULLET , @s [ 1 ] , *pos ]
518
- tokenize_input ( nil )
519
-
520
- # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
521
- when @s . scan ( /([a-z]|\d +)\. +(?=\S )/i )
522
- # FIXME if tab(s), the column will be wrong
523
- # either support tabs everywhere by first expanding them to
524
- # spaces, or assume that they will have been replaced
525
- # before (and provide a check for that at least in debug
526
- # mode)
527
- list_label = @s [ 1 ]
528
- list_type =
529
- case list_label
530
- when /[a-z]/ then :LALPHA
531
- when /[A-Z]/ then :UALPHA
532
- when /\d / then :NUMBER
533
- else
534
- raise ParseError , "BUG token #{ list_label } "
535
- end
536
- @tokens << [ list_type , list_label , *pos ]
537
- tokenize_input ( nil )
538
-
539
- # [text] followed by spaces or end of line => :LABEL
540
- when @s . scan ( /\[ (.*?)\] ( +|\r ?$)/ )
541
- @tokens << [ :LABEL , @s [ 1 ] , *pos ]
542
- tokenize_input ( nil )
543
-
544
- # text:: followed by spaces or end of line => :NOTE
545
- when @s . scan ( /(.*?)::( +|\r ?$)/ )
546
- @tokens << [ :NOTE , @s [ 1 ] , *pos ]
547
- tokenize_input ( nil )
548
-
549
- # >>> followed by end of line => :BLOCKQUOTE
550
- when @s . scan ( />>> *(\w +)?\r ?$/ )
551
- @tokens << [ :BLOCKQUOTE , @s [ 1 ] , *pos ]
552
- newline!
553
- tokenize_input ( nil )
554
-
555
- # anything else: :TEXT
556
- else
557
- column = pos [ 0 ]
558
- @s . scan ( /(.*?)( )?\r ?$/ )
559
- @tokens << [ :TEXT , @s [ 1 ] , *pos ]
560
-
561
- if @s [ 2 ]
562
- @tokens << [ :BREAK , @s [ 2 ] , pos [ 0 ] + @s [ 1 ] . length , pos [ 1 ] ]
563
- end
564
- if newline!
565
- tokenize_indented ( column )
566
- end
567
- end
497
+ @tokens << case
498
+ # [CR]LF => :NEWLINE
499
+ when @s . scan ( /\r ?\n / ) then
500
+ token = [ :NEWLINE , @s . matched , *pos ]
501
+ @s . newline!
502
+ token
503
+ # === text => :HEADER then :TEXT
504
+ when @s . scan ( /(=+)(\s *)/ ) then
505
+ level = @s [ 1 ] . length
506
+ header = [ :HEADER , level , *pos ]
507
+
508
+ if @s [ 2 ] =~ /^\r ?\n / then
509
+ @s . unscan ( @s [ 2 ] )
510
+ header
511
+ else
512
+ pos = @s . pos
513
+ @s . scan ( /.*/ )
514
+ @tokens << header
515
+ [ :TEXT , @s . matched . sub ( /\r $/ , '' ) , *pos ]
516
+ end
517
+ # --- (at least 3) and nothing else on the line => :RULE
518
+ when @s . scan ( /(-{3,}) *\r ?$/ ) then
519
+ [ :RULE , @s [ 1 ] . length - 2 , *pos ]
520
+ # * or - followed by white space and text => :BULLET
521
+ when @s . scan ( /([*-]) +(\S )/ ) then
522
+ @s . unscan ( @s [ 2 ] )
523
+ [ :BULLET , @s [ 1 ] , *pos ]
524
+ # A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
525
+ when @s . scan ( /([a-z]|\d +)\. +(\S )/i ) then
526
+ # FIXME if tab(s), the column will be wrong
527
+ # either support tabs everywhere by first expanding them to
528
+ # spaces, or assume that they will have been replaced
529
+ # before (and provide a check for that at least in debug
530
+ # mode)
531
+ list_label = @s [ 1 ]
532
+ @s . unscan ( @s [ 2 ] )
533
+ list_type =
534
+ case list_label
535
+ when /[a-z]/ then :LALPHA
536
+ when /[A-Z]/ then :UALPHA
537
+ when /\d / then :NUMBER
538
+ else
539
+ raise ParseError , "BUG token #{ list_label } "
540
+ end
541
+ [ list_type , list_label , *pos ]
542
+ # [text] followed by spaces or end of line => :LABEL
543
+ when @s . scan ( /\[ (.*?)\] ( +|\r ?$)/ ) then
544
+ [ :LABEL , @s [ 1 ] , *pos ]
545
+ # text:: followed by spaces or end of line => :NOTE
546
+ when @s . scan ( /(.*?)::( +|\r ?$)/ ) then
547
+ [ :NOTE , @s [ 1 ] , *pos ]
548
+ # >>> followed by end of line => :BLOCKQUOTE
549
+ when @s . scan ( />>> *(\w +)?$/ ) then
550
+ if word = @s [ 1 ]
551
+ @s . unscan ( word )
552
+ end
553
+ [ :BLOCKQUOTE , word , *pos ]
554
+ # anything else: :TEXT
555
+ else
556
+ @s . scan ( /(.*?)( )?\r ?$/ )
557
+ token = [ :TEXT , @s [ 1 ] , *pos ]
558
+
559
+ if @s [ 2 ] then
560
+ @tokens << token
561
+ [ :BREAK , @s [ 2 ] , pos [ 0 ] + @s [ 1 ] . length , pos [ 1 ] ]
562
+ else
563
+ token
564
+ end
565
+ end
568
566
end
569
567
570
568
self
0 commit comments