Skip to content

Commit c5c3378

Browse files
authored
Cleanup edit groups after coalescing (#259)
Even with an optimal diffing algoritm, coalescing adjacent edit groups may cause the corresponding pair of strings for an edit group to have leading or trailing spans of equal elements. While this is technically a correct representation of a diff, it is a suboptimal outcome. As such, scan through all unequal groups and move leading/trailing equal spans to the preceding/succeeding equal group. Before this change: strings.Join({ "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa", - ",#=_value _value=2 ", + " _value=2 ", `11 org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb`, - ",#=_value _value=2 2", + " _value=2 2", `1 org-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc`, - ",#=_value", ` _value=1 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=a,tag2", - "=dd,#=_value", + "=dd", ` _value=3 31 org-4747474747474747,bucket-4242424242424242:m,tag1`, - "=c,#=_value", + "=c", ` _value=4 41 `, }, "") After this change: strings.Join({ "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa", - ",#=_value", ` _value=2 11 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=a,tag2=bb", - ",#=_value", ` _value=2 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=b,tag2=cc", - ",#=_value", ` _value=1 21 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=a,tag2=dd", - ",#=_value", ` _value=3 31 org-4747474747474747,bucket-4242424242424242:m,tag1`, "=c", - ",#=_value", ` _value=4 41 `, }, "")
1 parent 1ee4af8 commit c5c3378

File tree

3 files changed

+166
-10
lines changed

3 files changed

+166
-10
lines changed

cmp/compare_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,11 @@ using the AllowUnexported option.`, "\n"),
13021302
x: struct{ X interface{} }{[1]string{"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam sit amet pretium ligula, at gravida quam. Integer iaculis, velit at sagittis ultricies, lacus metus scelerisque turpis, ornare feugiat nulla nisl ac erat. Maecenas elementum ultricies libero, sed efficitur lacus molestie non. Nulla ac pretium dolor. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque mi lorem, consectetur id porttitor id, sollicitudin sit amet enim. Duis eu dolor magna. Nunc ut augue turpis."}},
13031303
y: struct{ X interface{} }{[1]string{"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nam sit amet pretium ligula, at gravida quam. Integer iaculis, velit at sagittis ultricies, lacus metus scelerisque turpis, ornare feugiat nulla nisl ac erat. Maecenas elementum ultricies libero, sed efficitur lacus molestie non. Nulla ac pretium dolor. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque mi lorem, consectetur id porttitor id, sollicitudin sit amet enim. Duis eu dolor magna. Nunc ut augue turpis,"}},
13041304
reason: "printing a large standalone string that is different should print enough context to see the difference",
1305+
}, {
1306+
label: label + "/SurroundingEqualElements",
1307+
x: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa,#=_value _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb,#=_value _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc,#=_value _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd,#=_value _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c,#=_value _value=4 41\t",
1308+
y: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c _value=4 41\t",
1309+
reason: "leading/trailing equal spans should not appear in diff lines",
13051310
}}
13061311
}
13071312

cmp/report_slices.go

Lines changed: 142 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,11 @@ func (opts formatOptions) formatDiffSlice(
338338
vx, vy reflect.Value, chunkSize int, name string,
339339
makeRec func(reflect.Value, diffMode) textRecord,
340340
) (list textList) {
341-
es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
342-
return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
341+
eq := func(ix, iy int) bool {
342+
return vx.Index(ix).Interface() == vy.Index(iy).Interface()
343+
}
344+
es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
345+
return diff.BoolResult(eq(ix, iy))
343346
})
344347

345348
appendChunks := func(v reflect.Value, d diffMode) int {
@@ -364,6 +367,7 @@ func (opts formatOptions) formatDiffSlice(
364367

365368
groups := coalesceAdjacentEdits(name, es)
366369
groups = coalesceInterveningIdentical(groups, chunkSize/4)
370+
groups = cleanupSurroundingIdentical(groups, eq)
367371
maxGroup := diffStats{Name: name}
368372
for i, ds := range groups {
369373
if maxLen >= 0 && numDiffs >= maxLen {
@@ -416,25 +420,36 @@ func (opts formatOptions) formatDiffSlice(
416420

417421
// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
418422
// equal or unequal counts.
423+
//
424+
// Example:
425+
//
426+
// Input: "..XXY...Y"
427+
// Output: [
428+
// {NumIdentical: 2},
429+
// {NumRemoved: 2, NumInserted 1},
430+
// {NumIdentical: 3},
431+
// {NumInserted: 1},
432+
// ]
433+
//
419434
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
420-
var prevCase int // Arbitrary index into which case last occurred
421-
lastStats := func(i int) *diffStats {
422-
if prevCase != i {
435+
var prevMode byte
436+
lastStats := func(mode byte) *diffStats {
437+
if prevMode != mode {
423438
groups = append(groups, diffStats{Name: name})
424-
prevCase = i
439+
prevMode = mode
425440
}
426441
return &groups[len(groups)-1]
427442
}
428443
for _, e := range es {
429444
switch e {
430445
case diff.Identity:
431-
lastStats(1).NumIdentical++
446+
lastStats('=').NumIdentical++
432447
case diff.UniqueX:
433-
lastStats(2).NumRemoved++
448+
lastStats('!').NumRemoved++
434449
case diff.UniqueY:
435-
lastStats(2).NumInserted++
450+
lastStats('!').NumInserted++
436451
case diff.Modified:
437-
lastStats(2).NumModified++
452+
lastStats('!').NumModified++
438453
}
439454
}
440455
return groups
@@ -444,6 +459,35 @@ func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats)
444459
// equal groups into adjacent unequal groups that currently result in a
445460
// dual inserted/removed printout. This acts as a high-pass filter to smooth
446461
// out high-frequency changes within the windowSize.
462+
//
463+
// Example:
464+
//
465+
// WindowSize: 16,
466+
// Input: [
467+
// {NumIdentical: 61}, // group 0
468+
// {NumRemoved: 3, NumInserted: 1}, // group 1
469+
// {NumIdentical: 6}, // ├── coalesce
470+
// {NumInserted: 2}, // ├── coalesce
471+
// {NumIdentical: 1}, // ├── coalesce
472+
// {NumRemoved: 9}, // └── coalesce
473+
// {NumIdentical: 64}, // group 2
474+
// {NumRemoved: 3, NumInserted: 1}, // group 3
475+
// {NumIdentical: 6}, // ├── coalesce
476+
// {NumInserted: 2}, // ├── coalesce
477+
// {NumIdentical: 1}, // ├── coalesce
478+
// {NumRemoved: 7}, // ├── coalesce
479+
// {NumIdentical: 1}, // ├── coalesce
480+
// {NumRemoved: 2}, // └── coalesce
481+
// {NumIdentical: 63}, // group 4
482+
// ]
483+
// Output: [
484+
// {NumIdentical: 61},
485+
// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3},
486+
// {NumIdentical: 64},
487+
// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3},
488+
// {NumIdentical: 63},
489+
// ]
490+
//
447491
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
448492
groups, groupsOrig := groups[:0], groups
449493
for i, ds := range groupsOrig {
@@ -463,3 +507,91 @@ func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStat
463507
}
464508
return groups
465509
}
510+
511+
// cleanupSurroundingIdentical scans through all unequal groups, and
512+
// moves any leading sequence of equal elements to the preceding equal group and
513+
// moves and trailing sequence of equal elements to the succeeding equal group.
514+
//
515+
// This is necessary since coalesceInterveningIdentical may coalesce edit groups
516+
// together such that leading/trailing spans of equal elements becomes possible.
517+
// Note that this can occur even with an optimal diffing algorithm.
518+
//
519+
// Example:
520+
//
521+
// Input: [
522+
// {NumIdentical: 61},
523+
// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements
524+
// {NumIdentical: 67},
525+
// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements
526+
// {NumIdentical: 54},
527+
// ]
528+
// Output: [
529+
// {NumIdentical: 64}, // incremented by 3
530+
// {NumRemoved: 9},
531+
// {NumIdentical: 67},
532+
// {NumRemoved: 9},
533+
// {NumIdentical: 64}, // incremented by 10
534+
// ]
535+
//
536+
func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats {
537+
var ix, iy int // indexes into sequence x and y
538+
for i, ds := range groups {
539+
// Handle equal group.
540+
if ds.NumDiff() == 0 {
541+
ix += ds.NumIdentical
542+
iy += ds.NumIdentical
543+
continue
544+
}
545+
546+
// Handle unequal group.
547+
nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified
548+
ny := ds.NumIdentical + ds.NumInserted + ds.NumModified
549+
var numLeadingIdentical, numTrailingIdentical int
550+
for i := 0; i < nx && i < ny && eq(ix+i, iy+i); i++ {
551+
numLeadingIdentical++
552+
}
553+
for i := 0; i < nx && i < ny && eq(ix+nx-1-i, iy+ny-1-i); i++ {
554+
numTrailingIdentical++
555+
}
556+
if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 {
557+
if numLeadingIdentical > 0 {
558+
// Remove leading identical span from this group and
559+
// insert it into the preceding group.
560+
if i-1 >= 0 {
561+
groups[i-1].NumIdentical += numLeadingIdentical
562+
} else {
563+
// No preceding group exists, so prepend a new group,
564+
// but do so after we finish iterating over all groups.
565+
defer func() {
566+
groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...)
567+
}()
568+
}
569+
// Increment indexes since the preceding group would have handled this.
570+
ix += numLeadingIdentical
571+
iy += numLeadingIdentical
572+
}
573+
if numTrailingIdentical > 0 {
574+
// Remove trailing identical span from this group and
575+
// insert it into the succeeding group.
576+
if i+1 < len(groups) {
577+
groups[i+1].NumIdentical += numTrailingIdentical
578+
} else {
579+
// No succeeding group exists, so append a new group,
580+
// but do so after we finish iterating over all groups.
581+
defer func() {
582+
groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical})
583+
}()
584+
}
585+
// Do not increment indexes since the succeeding group will handle this.
586+
}
587+
588+
// Update this group since some identical elements were removed.
589+
nx -= numIdentical
590+
ny -= numIdentical
591+
groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny}
592+
}
593+
ix += nx
594+
iy += ny
595+
}
596+
return groups
597+
}

cmp/testdata/diffs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,25 @@
10461046
+ },
10471047
}
10481048
>>> TestDiff/Reporter/LargeStandaloneString
1049+
<<< TestDiff/Reporter/SurroundingEqualElements
1050+
strings.Join({
1051+
"org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa",
1052+
- ",#=_value",
1053+
` _value=2 11 org-4747474747474747,bucket-4242424242424242:m,tag1`,
1054+
"=a,tag2=bb",
1055+
- ",#=_value",
1056+
` _value=2 21 org-4747474747474747,bucket-4242424242424242:m,tag1`,
1057+
"=b,tag2=cc",
1058+
- ",#=_value",
1059+
` _value=1 21 org-4747474747474747,bucket-4242424242424242:m,tag1`,
1060+
"=a,tag2=dd",
1061+
- ",#=_value",
1062+
` _value=3 31 org-4747474747474747,bucket-4242424242424242:m,tag1`,
1063+
"=c",
1064+
- ",#=_value",
1065+
` _value=4 41 `,
1066+
}, "")
1067+
>>> TestDiff/Reporter/SurroundingEqualElements
10491068
<<< TestDiff/EmbeddedStruct/ParentStructA/Inequal
10501069
teststructs.ParentStructA{
10511070
privateStruct: teststructs.privateStruct{

0 commit comments

Comments
 (0)