@@ -183,41 +183,6 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
183
183
$ delimiter = null ;
184
184
static ::$ last_error = null ;
185
185
186
- $ close_html_comment = function ( $ comment_starting_at ) use ( $ text , &$ at , $ end ) {
187
- // Find span-of-dashes comments which look like `<!----->`.
188
- $ span_of_dashes = strspn ( $ text , '- ' , $ comment_starting_at + 2 );
189
- if (
190
- $ comment_starting_at + 2 + $ span_of_dashes < $ end &&
191
- '> ' === $ text [ $ comment_starting_at + 2 + $ span_of_dashes ]
192
- ) {
193
- $ at = $ comment_starting_at + $ span_of_dashes + 1 ;
194
- return ;
195
- }
196
-
197
- // Otherwise, there are other characters inside the comment, find the first `-->` or `--!>`.
198
- $ now_at = $ comment_starting_at + 4 ;
199
- while ( $ now_at < $ end ) {
200
- $ dashes_at = strpos ( $ text , '-- ' , $ now_at );
201
- if ( false === $ dashes_at ) {
202
- static ::$ last_error = self ::INCOMPLETE_INPUT ;
203
- $ at = $ end ;
204
- return ;
205
- }
206
-
207
- $ closer_must_be_at = $ dashes_at + 2 + strspn ( $ text , '- ' , $ dashes_at + 2 );
208
- if ( $ closer_must_be_at < $ end && '! ' === $ text [ $ closer_must_be_at ] ) {
209
- $ closer_must_be_at ++;
210
- }
211
-
212
- if ( $ closer_must_be_at < $ end && '> ' === $ text [ $ closer_must_be_at ] ) {
213
- $ at = $ closer_must_be_at + 1 ;
214
- return ;
215
- }
216
-
217
- $ now_at ++;
218
- }
219
- };
220
-
221
186
while ( $ at < $ end ) {
222
187
/*
223
188
* Find the next possible opening.
@@ -238,7 +203,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
238
203
$ opening_whitespace_at = $ comment_opening_at + 4 ;
239
204
$ opening_whitespace_length = strspn ( $ text , " \t\f\r\n" , $ opening_whitespace_at );
240
205
if ( 0 === $ opening_whitespace_length ) {
241
- $ close_html_comment ( $ comment_opening_at );
206
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
242
207
continue ;
243
208
}
244
209
@@ -255,7 +220,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
255
220
}
256
221
257
222
if ( 0 !== substr_compare ( $ text , 'wp: ' , $ wp_prefix_at , 3 ) ) {
258
- $ close_html_comment ( $ comment_opening_at );
223
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
259
224
continue ;
260
225
}
261
226
@@ -269,7 +234,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
269
234
270
235
// The namespace must start with a-z.
271
236
if ( 'a ' > $ start_of_namespace || 'z ' < $ start_of_namespace ) {
272
- $ close_html_comment ( $ comment_opening_at );
237
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
273
238
continue ;
274
239
}
275
240
@@ -285,7 +250,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
285
250
$ name_at = $ separator_at + 1 ;
286
251
$ start_of_name = $ text [ $ name_at ];
287
252
if ( 'a ' > $ start_of_name || 'z ' < $ start_of_name ) {
288
- $ close_html_comment ( $ comment_opening_at );
253
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
289
254
continue ;
290
255
}
291
256
@@ -299,7 +264,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
299
264
$ after_name_whitespace_at = $ name_at + $ name_length ;
300
265
$ after_name_whitespace_length = strspn ( $ text , " \t\f\r\n" , $ after_name_whitespace_at );
301
266
if ( 0 === $ after_name_whitespace_length ) {
302
- $ close_html_comment ( $ comment_opening_at );
267
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
303
268
continue ;
304
269
}
305
270
@@ -349,13 +314,13 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
349
314
350
315
// This shouldn't be possible, but it can't be allowed regardless.
351
316
if ( $ max_whitespace_length < 0 ) {
352
- $ close_html_comment ( $ comment_opening_at );
317
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
353
318
continue ;
354
319
}
355
320
356
321
$ closing_whitespace_length = strspn ( $ text , " \t\f\r\n" , $ json_at , $ comment_closing_at - $ json_at - $ void_flag_length );
357
322
if ( 0 === $ after_name_whitespace_length + $ closing_whitespace_length ) {
358
- $ close_html_comment ( $ comment_opening_at );
323
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
359
324
continue ;
360
325
}
361
326
@@ -389,7 +354,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
389
354
}
390
355
391
356
if ( 0 === $ json_length || 0 === $ after_json_whitespace_length ) {
392
- $ close_html_comment ( $ comment_opening_at );
357
+ $ at = self :: find_html_comment_end ( $ text , $ comment_opening_at, $ end );
393
358
continue ;
394
359
}
395
360
@@ -508,6 +473,51 @@ private function __construct() {
508
473
// This is not to be called from the outside.
509
474
}
510
475
476
+ /**
477
+ * Returns the byte-offset after the ending character of an HTML comment,
478
+ * assuming the proper starting byte offset.
479
+ *
480
+ * @since $$next-version$$
481
+ *
482
+ * @param string $text Document in which to search for HTML comment end.
483
+ * @param int $comment_starting_at Where the HTML comment started, the leading `<`.
484
+ * @param int $search_end Last offset in which to search, for limiting search span.
485
+ * @return int Offset after the current HTML comment ends, or `$end` if no end was found.
486
+ */
487
+ private static function find_html_comment_end ( string $ text , int $ comment_starting_at , int $ search_end ): int {
488
+ // Find span-of-dashes comments which look like `<!----->`.
489
+ $ span_of_dashes = strspn ( $ text , '- ' , $ comment_starting_at + 2 );
490
+ if (
491
+ $ comment_starting_at + 2 + $ span_of_dashes < $ search_end &&
492
+ '> ' === $ text [ $ comment_starting_at + 2 + $ span_of_dashes ]
493
+ ) {
494
+ return $ comment_starting_at + $ span_of_dashes + 1 ;
495
+ }
496
+
497
+ // Otherwise, there are other characters inside the comment, find the first `-->` or `--!>`.
498
+ $ now_at = $ comment_starting_at + 4 ;
499
+ while ( $ now_at < $ search_end ) {
500
+ $ dashes_at = strpos ( $ text , '-- ' , $ now_at );
501
+ if ( false === $ dashes_at ) {
502
+ static ::$ last_error = self ::INCOMPLETE_INPUT ;
503
+ return $ search_end ;
504
+ }
505
+
506
+ $ closer_must_be_at = $ dashes_at + 2 + strspn ( $ text , '- ' , $ dashes_at + 2 );
507
+ if ( $ closer_must_be_at < $ search_end && '! ' === $ text [ $ closer_must_be_at ] ) {
508
+ $ closer_must_be_at ++;
509
+ }
510
+
511
+ if ( $ closer_must_be_at < $ search_end && '> ' === $ text [ $ closer_must_be_at ] ) {
512
+ return $ closer_must_be_at + 1 ;
513
+ }
514
+
515
+ $ now_at ++;
516
+ }
517
+
518
+ return $ search_end ;
519
+ }
520
+
511
521
/**
512
522
* Creates a pair of delimiters for freeform text content
513
523
* since there are no delimiters in a document for them.
0 commit comments