From 171bc483054b9f2802e856cc3485399ee3984d96 Mon Sep 17 00:00:00 2001 From: Boming Zhang Date: Wed, 26 Mar 2025 05:12:46 -0400 Subject: [PATCH] refactor(parser/diff): linear space diff --- internal/parser/diff/diff.go | 162 ++++------------------------- internal/parser/diff/diff_test.go | 69 ------------ internal/parser/diff/myers.go | 143 +++++++++++++++++++++++++ internal/parser/diff/myers_test.go | 48 +++++++++ internal/parser/diff/parser.go | 16 +-- 5 files changed, 217 insertions(+), 221 deletions(-) delete mode 100644 internal/parser/diff/diff_test.go create mode 100644 internal/parser/diff/myers.go create mode 100644 internal/parser/diff/myers_test.go diff --git a/internal/parser/diff/diff.go b/internal/parser/diff/diff.go index 6179082..8d5853e 100644 --- a/internal/parser/diff/diff.go +++ b/internal/parser/diff/diff.go @@ -50,148 +50,32 @@ func isWhitespace(b byte) bool { b == 0xA0 } -// myersDiff computes the Myers' diff between two slices of strings. -// src: https://github.com/cj1128/myers-diff/blob/master/main.go -// TODO: it has O(n^2) time complexity -func myersDiff(src, dst []string, compareSpace bool) []operation { - n := len(src) - m := len(dst) - maxLength := n + m - var trace []map[int]int - var x, y int - -loop: - for d := 0; d <= maxLength; d += 1 { - v := make(map[int]int, d+2) - trace = append(trace, v) - - if d == 0 { - t := 0 - for len(src) > t && - len(dst) > t && - compareStrings(src[t], dst[t], compareSpace) { - t += 1 - } - v[0] = t - if t == len(src) && len(src) == len(dst) { - break loop - } - continue - } - - lastV := trace[d-1] - - for k := -d; k <= d; k += 2 { - if k == -d || (k != d && lastV[k-1] < lastV[k+1]) { - x = lastV[k+1] - } else { - x = lastV[k-1] + 1 - } - - y = x - k - - for x < n && y < m && compareStrings(src[x], dst[y], compareSpace) { - x, y = x+1, y+1 - } - - v[k] = x - - if x == n && y == m { - break loop - } - } - } - - var script []operation - x = n - y = m - var k, prevK, prevX, prevY int - - for d := len(trace) - 1; d > 0; d -= 1 { - k = x - y - lastV := trace[d-1] - - if k == -d || (k != d && lastV[k-1] < lastV[k+1]) { - prevK = k + 1 - } else { - prevK = k - 1 - } - - prevX = lastV[prevK] - prevY = prevX - prevK - - for x > prevX && y > prevY { - script = append(script, MOVE) - x -= 1 - y -= 1 - } - - if x == prevX { - script = append(script, INSERT) - } else { - script = append(script, DELETE) - } - - x, y = prevX, prevY - } - - if trace[0][0] != 0 { - for i := 0; i < trace[0][0]; i += 1 { - script = append(script, MOVE) - } - } - - return reverse(script) -} - -// reverse reverses a slice of operations. -func reverse(s []operation) []operation { - result := make([]operation, len(s)) - for i, v := range s { - result[len(s)-1-i] = v - } - return result -} - -// generateDiffWithContext creates a diff block with surrounding context from stdout and result. -func generateDiffWithContext( - stdoutLines, resultLines []string, ops []operation, maxLength int, -) string { - var diffBuilder strings.Builder - - srcIndex, dstIndex, lineCount := 0, 0, 0 - +func formatDiff(oldList []string, newList []string, ops []Op) string { + var result []string + i, j := 0, 0 for _, op := range ops { - s := "" - switch op { - case INSERT: - if dstIndex < len(resultLines) { - s = fmt.Sprintf("+ %s\n", resultLines[dstIndex]) - dstIndex += 1 + if op.OpType == OpDelete { + for i < op.OldPos { + result = append(result, " "+oldList[i]) + i++ + j++ } - case MOVE: - if srcIndex < len(stdoutLines) { - s = fmt.Sprintf(" %s\n", stdoutLines[srcIndex]) - srcIndex += 1 - dstIndex += 1 - } - case DELETE: - if srcIndex < len(stdoutLines) { - s = fmt.Sprintf("- %s\n", stdoutLines[srcIndex]) - srcIndex += 1 - lineCount += 1 + result = append(result, "- "+fmt.Sprint(op.Elem)) + i++ + } else if op.OpType == OpInsert { + for j < op.NewPos { + result = append(result, " "+newList[j]) + i++ + j++ } + result = append(result, "+ "+fmt.Sprint(op.Elem)) + j++ } - if maxLength > 0 && diffBuilder.Len()+len(s) > maxLength { - remaining := maxLength - diffBuilder.Len() - if remaining > 0 { - diffBuilder.WriteString(s[:remaining]) - } - diffBuilder.WriteString("\n\n(truncated)") - break - } - diffBuilder.WriteString(s) } - - return diffBuilder.String() + for i < len(oldList) && j < len(newList) { + result = append(result, " "+oldList[i]) + i++ + j++ + } + return strings.Join(result, "\n") } diff --git a/internal/parser/diff/diff_test.go b/internal/parser/diff/diff_test.go deleted file mode 100644 index 09ba6fe..0000000 --- a/internal/parser/diff/diff_test.go +++ /dev/null @@ -1,69 +0,0 @@ -package diff - -import ( - "reflect" - "testing" -) - -func TestMyersDiff(t *testing.T) { - tests := []struct { - name string - src []string - dst []string - compareSpace bool - expected []operation - }{ - { - name: "Insert operation", - src: []string{"a", "b"}, - dst: []string{"a", "b", "c"}, - compareSpace: true, - expected: []operation{MOVE, MOVE, INSERT}, - }, - { - name: "Delete operation", - src: []string{"a", "b", "c"}, - dst: []string{"a", "b"}, - compareSpace: true, - expected: []operation{MOVE, MOVE, DELETE}, - }, - { - name: "No changes", - src: []string{"a", "b", "c"}, - dst: []string{"a", "b", "c"}, - compareSpace: true, - expected: []operation{MOVE, MOVE, MOVE}, - }, - { - name: "Move operation", - src: []string{"a", "b", "c"}, - dst: []string{"c", "a", "b"}, - compareSpace: true, - expected: []operation{INSERT, MOVE, MOVE, DELETE}, - }, - { - name: "Ignore whitespace differences", - src: []string{"a ", "b"}, - dst: []string{"a", "b"}, - compareSpace: false, - expected: []operation{MOVE, MOVE}, - }, - { - name: "Consider whitespace differences", - src: []string{"a ", "b"}, - dst: []string{"a", "b"}, - compareSpace: true, - expected: []operation{DELETE, INSERT, MOVE}, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - result := myersDiff(test.src, test.dst, test.compareSpace) - if !reflect.DeepEqual(result, test.expected) { - t.Errorf("myersDiff(%v, %v, %v) = %v; want %v", - test.src, test.dst, test.compareSpace, result, test.expected) - } - }) - } -} diff --git a/internal/parser/diff/myers.go b/internal/parser/diff/myers.go new file mode 100644 index 0000000..7ded268 --- /dev/null +++ b/internal/parser/diff/myers.go @@ -0,0 +1,143 @@ +package diff + +// source: https://github.com/MFAshby/myers +// Myer's diff algorithm in golang +// Ported from https://blog.robertelder.org/diff-algorithm/ + +type OpType int + +const ( + OpInsert OpType = iota + OpDelete +) + +type Op struct { + OpType OpType // Insert or delete, as above + OldPos int // Position in the old list of item to be inserted or deleted + NewPos int // Position in the _new_ list of item to be inserted + Elem any // Actual value to be inserted or deleted +} + +// Returns a minimal list of differences between 2 lists e and f +// requiring O(min(len(e),len(f))) space and O(min(len(e),len(f)) * D) +// worst-case execution time where D is the number of differences. +func myersDiff(e, f []any, equals func(any, any) bool) []Op { + return diffInternal(e, f, equals, 0, 0) +} + +func diffInternal(e, f []any, equals func(any, any) bool, i, j int) []Op { + N := len(e) + M := len(f) + L := N + M + Z := 2*min(N, M) + 2 + switch { + case N > 0 && M > 0: + w := N - M + g := make([]int, Z) + p := make([]int, Z) + + hMax := L/2 + L%2 + 1 + for h := range hMax { + for r := range 2 { + var c, d []int + var o, m int + if r == 0 { + c = g + d = p + o = 1 + m = 1 + } else { + c = p + d = g + o = 0 + m = -1 + } + kMin := -(h - 2*max(0, h-M)) + kMax := h - 2*max(0, h-N) + 1 + for k := kMin; k < kMax; k += 2 { + var a int + if k == -h || k != h && c[pyMod((k-1), Z)] < c[pyMod((k+1), Z)] { + a = c[pyMod((k+1), Z)] + } else { + a = c[pyMod((k-1), Z)] + 1 + } + b := a - k + s, t := a, b + + for a < N && b < M && equals(e[(1-o)*N+m*a+(o-1)], f[(1-o)*M+m*b+(o-1)]) { + a, b = a+1, b+1 + } + c[pyMod(k, Z)] = a + z := -(k - w) + if pyMod(L, 2) == o && z >= -(h-o) && z <= h-o && c[pyMod(k, Z)]+d[pyMod(z, Z)] >= N { + var D, x, y, u, v int + if o == 1 { + D = 2*h - 1 + x = s + y = t + u = a + v = b + } else { + D = 2 * h + x = N - a + y = M - b + u = N - s + v = M - t + } + switch { + case D > 1 || (x != u && y != v): + return append(diffInternal(e[0:x], f[0:y], equals, i, j), diffInternal(e[u:N], f[v:M], equals, i+u, j+v)...) + case M > N: + return diffInternal(make([]any, 0), f[N:M], equals, i+N, j+N) + case M < N: + return diffInternal(e[M:N], make([]any, 0), equals, i+M, j+M) + default: + return make([]Op, 0) + } + } + } + } + } + case N > 0: + res := make([]Op, N) + for n := range N { + res[n] = Op{OpDelete, i + n, -1, e[n]} + } + return res + default: + res := make([]Op, M) + for n := range M { + res[n] = Op{OpInsert, i, j + n, f[n]} + } + return res + } + panic("Should never hit this!") +} + +/** + * The remainder op in python always matches the sign of the _denominator_ + * e.g -1%3 = 2. + * In golang it matches the sign of the numerator. + * See https://en.wikipedia.org/wiki/Modulo_operation#Variants_of_the_definition + * Since we always have a positive denominator here, we can emulate the + * pyMod x%y as (x+y) % y + */ +func pyMod(x, y int) int { + return (x + y) % y +} + +// Let us map element in same way as in + +// Convenient wrapper for string lists +func myersDiffStr(e, f []string, compareSpace bool) []Op { + e1, f1 := make([]any, len(e)), make([]any, len(f)) + for i, ee := range e { + e1[i] = ee + } + for i, fe := range f { + f1[i] = fe + } + return myersDiff(e1, f1, func(s1, s2 any) bool { + return compareStrings(s1.(string), s2.(string), compareSpace) + }) +} diff --git a/internal/parser/diff/myers_test.go b/internal/parser/diff/myers_test.go new file mode 100644 index 0000000..3b0d4ed --- /dev/null +++ b/internal/parser/diff/myers_test.go @@ -0,0 +1,48 @@ +package diff + +import ( + "reflect" + t "testing" +) + +type TestCase struct { + l1 []string + l2 []string + exp []Op +} + +func TestDiff(t *t.T) { + A := "A" + B := "B" + C := "C" + testCases := []TestCase{ + {[]string{}, []string{}, []Op{}}, + {[]string{}, []string{"foo"}, []Op{{OpInsert, 0, 0, "foo"}}}, + {[]string{"foo", "bar", "baz"}, []string{"foo", "bar", "baz"}, []Op{}}, + {[]string{"foo", "bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 1, -1, "bar"}}}, + {[]string{"baz"}, []string{"foo", "baz"}, []Op{{OpInsert, 0, 0, "foo"}}}, + {[]string{"bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 0, -1, "bar"}, {OpInsert, 1, 0, "foo"}}}, + {[]string{"foo", "bar", "baz"}, []string{"foo", "bar"}, []Op{{OpDelete, 2, -1, "baz"}}}, + { + []string{A, B, C, A, B, B, A}, + []string{C, B, A, B, A, C}, + []Op{{OpDelete, 0, -1, A}, {OpInsert, 1, 0, C}, {OpDelete, 2, -1, C}, {OpDelete, 5, -1, B}, {OpInsert, 7, 5, C}}, + }, + { + []string{C, A, B, A, B, A, B, A, B, A, B, A, B, C}, + []string{B, A, B, A, B, A, B, A, B, A, B, A, B, A}, + []Op{{OpDelete, 0, -1, C}, {OpInsert, 1, 0, B}, {OpDelete, 13, -1, C}, {OpInsert, 14, 13, A}}, + }, + { + []string{B}, + []string{A, B, C, B, A}, + []Op{{OpInsert, 0, 0, A}, {OpInsert, 0, 1, B}, {OpInsert, 0, 2, C}, {OpInsert, 1, 4, A}}, + }, + } + for _, c := range testCases { + act := myersDiffStr(c.l1, c.l2, true) + if !reflect.DeepEqual(c.exp, act) { + t.Errorf("Failed diff, expected %v actual %v\n", c.exp, act) + } + } +} diff --git a/internal/parser/diff/parser.go b/internal/parser/diff/parser.go index 71e174a..8d7d6d1 100644 --- a/internal/parser/diff/parser.go +++ b/internal/parser/diff/parser.go @@ -9,15 +9,6 @@ import ( "github.com/joint-online-judge/JOJ3/internal/stage" ) -// operation represents the type of edit operation. -type operation uint - -const ( - INSERT operation = iota + 1 - DELETE - MOVE -) - func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( []stage.ParserResult, bool, error, ) { @@ -89,16 +80,15 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( answerLines := strings.Split(answerStr, "\n") resultLines := strings.Split(resultStr, "\n") // Generate Myers diff - diffOps := myersDiff(answerLines, resultLines, + diffOps := myersDiffStr(answerLines, resultLines, output.CompareSpace) // Generate diff block with surrounding context - diffOutput := generateDiffWithContext( + diffOutput := formatDiff( answerLines, resultLines, diffOps, - output.MaxDiffLength, ) - diffOutput = strings.TrimSuffix(diffOutput, "\n \n") + diffOutput = strings.TrimSuffix(diffOutput, "\n ") comment += fmt.Sprintf( "```diff\n%s\n```\n", diffOutput,