Skip to content

Commit 39c83c4

Browse files
jehervedmsnellkraftbj
authored
Blocks: In scan_delimiters extract static method (#43984)
Ports 185482-ghe-Automattic/wpcom When `scan_delimiters()` is attempting to find the end of an HTML comment, it’s calling a convenience method which is also a closure in the code (or an anonymous function). This carries a marginal performance overhead and limits reuse. In this change that method is extracted onto the class as its own static method; this should simplify reasoning about the method itself and provide a perofmrnace lift to the calling code. Co-authored-by: Dennis Snell <[email protected]> Co-authored-by: Brandon Kraft <[email protected]>
1 parent 976007b commit 39c83c4

File tree

2 files changed

+57
-43
lines changed

2 files changed

+57
-43
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Significance: patch
2+
Type: changed
3+
4+
Performance improvement: extract anonymous function into static method

projects/packages/block-delimiter/src/class-block-delimiter.php

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -183,41 +183,6 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
183183
$delimiter = null;
184184
static::$last_error = null;
185185

186-
$close_html_comment = function ( $comment_starting_at ) use ( $text, &$at, $end ) {
187-
// Find span-of-dashes comments which look like `<!----->`.
188-
$span_of_dashes = strspn( $text, '-', $comment_starting_at + 2 );
189-
if (
190-
$comment_starting_at + 2 + $span_of_dashes < $end &&
191-
'>' === $text[ $comment_starting_at + 2 + $span_of_dashes ]
192-
) {
193-
$at = $comment_starting_at + $span_of_dashes + 1;
194-
return;
195-
}
196-
197-
// Otherwise, there are other characters inside the comment, find the first `-->` or `--!>`.
198-
$now_at = $comment_starting_at + 4;
199-
while ( $now_at < $end ) {
200-
$dashes_at = strpos( $text, '--', $now_at );
201-
if ( false === $dashes_at ) {
202-
static::$last_error = self::INCOMPLETE_INPUT;
203-
$at = $end;
204-
return;
205-
}
206-
207-
$closer_must_be_at = $dashes_at + 2 + strspn( $text, '-', $dashes_at + 2 );
208-
if ( $closer_must_be_at < $end && '!' === $text[ $closer_must_be_at ] ) {
209-
$closer_must_be_at++;
210-
}
211-
212-
if ( $closer_must_be_at < $end && '>' === $text[ $closer_must_be_at ] ) {
213-
$at = $closer_must_be_at + 1;
214-
return;
215-
}
216-
217-
$now_at++;
218-
}
219-
};
220-
221186
while ( $at < $end ) {
222187
/*
223188
* Find the next possible opening.
@@ -238,7 +203,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
238203
$opening_whitespace_at = $comment_opening_at + 4;
239204
$opening_whitespace_length = strspn( $text, " \t\f\r\n", $opening_whitespace_at );
240205
if ( 0 === $opening_whitespace_length ) {
241-
$close_html_comment( $comment_opening_at );
206+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
242207
continue;
243208
}
244209

@@ -255,7 +220,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
255220
}
256221

257222
if ( 0 !== substr_compare( $text, 'wp:', $wp_prefix_at, 3 ) ) {
258-
$close_html_comment( $comment_opening_at );
223+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
259224
continue;
260225
}
261226

@@ -269,7 +234,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
269234

270235
// The namespace must start with a-z.
271236
if ( 'a' > $start_of_namespace || 'z' < $start_of_namespace ) {
272-
$close_html_comment( $comment_opening_at );
237+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
273238
continue;
274239
}
275240

@@ -285,7 +250,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
285250
$name_at = $separator_at + 1;
286251
$start_of_name = $text[ $name_at ];
287252
if ( 'a' > $start_of_name || 'z' < $start_of_name ) {
288-
$close_html_comment( $comment_opening_at );
253+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
289254
continue;
290255
}
291256

@@ -299,7 +264,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
299264
$after_name_whitespace_at = $name_at + $name_length;
300265
$after_name_whitespace_length = strspn( $text, " \t\f\r\n", $after_name_whitespace_at );
301266
if ( 0 === $after_name_whitespace_length ) {
302-
$close_html_comment( $comment_opening_at );
267+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
303268
continue;
304269
}
305270

@@ -349,13 +314,13 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
349314

350315
// This shouldn't be possible, but it can't be allowed regardless.
351316
if ( $max_whitespace_length < 0 ) {
352-
$close_html_comment( $comment_opening_at );
317+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
353318
continue;
354319
}
355320

356321
$closing_whitespace_length = strspn( $text, " \t\f\r\n", $json_at, $comment_closing_at - $json_at - $void_flag_length );
357322
if ( 0 === $after_name_whitespace_length + $closing_whitespace_length ) {
358-
$close_html_comment( $comment_opening_at );
323+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
359324
continue;
360325
}
361326

@@ -389,7 +354,7 @@ public static function next_delimiter( string $text, int $starting_byte_offset,
389354
}
390355

391356
if ( 0 === $json_length || 0 === $after_json_whitespace_length ) {
392-
$close_html_comment( $comment_opening_at );
357+
$at = self::find_html_comment_end( $text, $comment_opening_at, $end );
393358
continue;
394359
}
395360

@@ -508,6 +473,51 @@ private function __construct() {
508473
// This is not to be called from the outside.
509474
}
510475

476+
/**
477+
* Returns the byte-offset after the ending character of an HTML comment,
478+
* assuming the proper starting byte offset.
479+
*
480+
* @since $$next-version$$
481+
*
482+
* @param string $text Document in which to search for HTML comment end.
483+
* @param int $comment_starting_at Where the HTML comment started, the leading `<`.
484+
* @param int $search_end Last offset in which to search, for limiting search span.
485+
* @return int Offset after the current HTML comment ends, or `$end` if no end was found.
486+
*/
487+
private static function find_html_comment_end( string $text, int $comment_starting_at, int $search_end ): int {
488+
// Find span-of-dashes comments which look like `<!----->`.
489+
$span_of_dashes = strspn( $text, '-', $comment_starting_at + 2 );
490+
if (
491+
$comment_starting_at + 2 + $span_of_dashes < $search_end &&
492+
'>' === $text[ $comment_starting_at + 2 + $span_of_dashes ]
493+
) {
494+
return $comment_starting_at + $span_of_dashes + 1;
495+
}
496+
497+
// Otherwise, there are other characters inside the comment, find the first `-->` or `--!>`.
498+
$now_at = $comment_starting_at + 4;
499+
while ( $now_at < $search_end ) {
500+
$dashes_at = strpos( $text, '--', $now_at );
501+
if ( false === $dashes_at ) {
502+
static::$last_error = self::INCOMPLETE_INPUT;
503+
return $search_end;
504+
}
505+
506+
$closer_must_be_at = $dashes_at + 2 + strspn( $text, '-', $dashes_at + 2 );
507+
if ( $closer_must_be_at < $search_end && '!' === $text[ $closer_must_be_at ] ) {
508+
$closer_must_be_at++;
509+
}
510+
511+
if ( $closer_must_be_at < $search_end && '>' === $text[ $closer_must_be_at ] ) {
512+
return $closer_must_be_at + 1;
513+
}
514+
515+
$now_at++;
516+
}
517+
518+
return $search_end;
519+
}
520+
511521
/**
512522
* Creates a pair of delimiters for freeform text content
513523
* since there are no delimiters in a document for them.

0 commit comments

Comments
 (0)