Skip to content

Commit 9181d1e

Browse files
authored
Avoid diffing by lines if inefficient (#260)
Avoid diffing by lines if it turns out to be significantly less efficient than diffing by bytes. Before this change: ( """ - d5c14bdf6bac81c27afc5429500ed750 - 25483503b557c606dad4f144d27ae10b - 90bdbcdbb6ea7156068e3dcfb7459244 - 978f480a6e3cced51e297fbff9a506b7 + Xd5c14bdf6bac81c27afc5429500ed750 + X25483503b557c606dad4f144d27ae10b + X90bdbcdbb6ea7156068e3dcfb7459244 + X978f480a6e3cced51e297fbff9a506b7 """ ) After this change: strings.Join({ + "X", "d5c14bdf6bac81c27afc5429500ed750\n", + "X", "25483503b557c606dad4f144d27ae10b\n", + "X", "90bdbcdbb6ea7156068e3dcfb7459244\n", + "X", "978f480a6e3cced51e297fbff9a506b7\n", }, "")
1 parent c5c3378 commit 9181d1e

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

cmp/compare_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,6 +1307,11 @@ using the AllowUnexported option.`, "\n"),
13071307
x: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa,#=_value _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb,#=_value _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc,#=_value _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd,#=_value _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c,#=_value _value=4 41\t",
13081308
y: "org-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=aa _value=2 11\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=bb _value=2 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=b,tag2=cc _value=1 21\torg-4747474747474747,bucket-4242424242424242:m,tag1=a,tag2=dd _value=3 31\torg-4747474747474747,bucket-4242424242424242:m,tag1=c _value=4 41\t",
13091309
reason: "leading/trailing equal spans should not appear in diff lines",
1310+
}, {
1311+
label: label + "/AllLinesDiffer",
1312+
x: "d5c14bdf6bac81c27afc5429500ed750\n25483503b557c606dad4f144d27ae10b\n90bdbcdbb6ea7156068e3dcfb7459244\n978f480a6e3cced51e297fbff9a506b7\n",
1313+
y: "Xd5c14bdf6bac81c27afc5429500ed750\nX25483503b557c606dad4f144d27ae10b\nX90bdbcdbb6ea7156068e3dcfb7459244\nX978f480a6e3cced51e297fbff9a506b7\n",
1314+
reason: "all lines are different, so diffing based on lines is pointless",
13101315
}}
13111316
}
13121317

cmp/report_slices.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
9898
// Auto-detect the type of the data.
9999
var isLinedText, isText, isBinary bool
100100
var sx, sy string
101+
var ssx, ssy []string
101102
switch {
102103
case t.Kind() == reflect.String:
103104
sx, sy = vx.String(), vy.String()
@@ -130,6 +131,22 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
130131
}
131132
isText = !isBinary
132133
isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
134+
135+
// Avoid diffing by lines if it produces a significantly more complex
136+
// edit script than diffing by bytes.
137+
if isLinedText {
138+
ssx = strings.Split(sx, "\n")
139+
ssy = strings.Split(sy, "\n")
140+
esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {
141+
return diff.BoolResult(ssx[ix] == ssy[iy])
142+
})
143+
esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {
144+
return diff.BoolResult(sx[ix] == sy[iy])
145+
})
146+
efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
147+
efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
148+
isLinedText = efficiencyLines < 4*efficiencyBytes
149+
}
133150
}
134151

135152
// Format the string into printable records.
@@ -139,8 +156,6 @@ func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
139156
// If the text appears to be multi-lined text,
140157
// then perform differencing across individual lines.
141158
case isLinedText:
142-
ssx := strings.Split(sx, "\n")
143-
ssy := strings.Split(sy, "\n")
144159
list = opts.formatDiffSlice(
145160
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
146161
func(v reflect.Value, d diffMode) textRecord {

cmp/testdata/diffs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,18 @@
10651065
` _value=4 41 `,
10661066
}, "")
10671067
>>> TestDiff/Reporter/SurroundingEqualElements
1068+
<<< TestDiff/Reporter/AllLinesDiffer
1069+
strings.Join({
1070+
+ "X",
1071+
"d5c14bdf6bac81c27afc5429500ed750\n",
1072+
+ "X",
1073+
"25483503b557c606dad4f144d27ae10b\n",
1074+
+ "X",
1075+
"90bdbcdbb6ea7156068e3dcfb7459244\n",
1076+
+ "X",
1077+
"978f480a6e3cced51e297fbff9a506b7\n",
1078+
}, "")
1079+
>>> TestDiff/Reporter/AllLinesDiffer
10681080
<<< TestDiff/EmbeddedStruct/ParentStructA/Inequal
10691081
teststructs.ParentStructA{
10701082
privateStruct: teststructs.privateStruct{

0 commit comments

Comments
 (0)