refactor(parser/diff): linear space diff
Some checks failed
build / trigger-build-image (push) Blocked by required conditions
build / build (push) Has been cancelled
submodules sync / sync (push) Has been cancelled

This commit is contained in:
张泊明518370910136 2025-03-26 05:12:46 -04:00
parent fd583c5b7a
commit 171bc48305
GPG Key ID: D47306D7062CDA9D
5 changed files with 217 additions and 221 deletions

View File

@ -50,148 +50,32 @@ func isWhitespace(b byte) bool {
b == 0xA0 b == 0xA0
} }
// myersDiff computes the Myers' diff between two slices of strings. func formatDiff(oldList []string, newList []string, ops []Op) string {
// src: https://github.com/cj1128/myers-diff/blob/master/main.go var result []string
// TODO: it has O(n^2) time complexity i, j := 0, 0
func myersDiff(src, dst []string, compareSpace bool) []operation {
n := len(src)
m := len(dst)
maxLength := n + m
var trace []map[int]int
var x, y int
loop:
for d := 0; d <= maxLength; d += 1 {
v := make(map[int]int, d+2)
trace = append(trace, v)
if d == 0 {
t := 0
for len(src) > t &&
len(dst) > t &&
compareStrings(src[t], dst[t], compareSpace) {
t += 1
}
v[0] = t
if t == len(src) && len(src) == len(dst) {
break loop
}
continue
}
lastV := trace[d-1]
for k := -d; k <= d; k += 2 {
if k == -d || (k != d && lastV[k-1] < lastV[k+1]) {
x = lastV[k+1]
} else {
x = lastV[k-1] + 1
}
y = x - k
for x < n && y < m && compareStrings(src[x], dst[y], compareSpace) {
x, y = x+1, y+1
}
v[k] = x
if x == n && y == m {
break loop
}
}
}
var script []operation
x = n
y = m
var k, prevK, prevX, prevY int
for d := len(trace) - 1; d > 0; d -= 1 {
k = x - y
lastV := trace[d-1]
if k == -d || (k != d && lastV[k-1] < lastV[k+1]) {
prevK = k + 1
} else {
prevK = k - 1
}
prevX = lastV[prevK]
prevY = prevX - prevK
for x > prevX && y > prevY {
script = append(script, MOVE)
x -= 1
y -= 1
}
if x == prevX {
script = append(script, INSERT)
} else {
script = append(script, DELETE)
}
x, y = prevX, prevY
}
if trace[0][0] != 0 {
for i := 0; i < trace[0][0]; i += 1 {
script = append(script, MOVE)
}
}
return reverse(script)
}
// reverse reverses a slice of operations.
func reverse(s []operation) []operation {
result := make([]operation, len(s))
for i, v := range s {
result[len(s)-1-i] = v
}
return result
}
// generateDiffWithContext creates a diff block with surrounding context from stdout and result.
func generateDiffWithContext(
stdoutLines, resultLines []string, ops []operation, maxLength int,
) string {
var diffBuilder strings.Builder
srcIndex, dstIndex, lineCount := 0, 0, 0
for _, op := range ops { for _, op := range ops {
s := "" if op.OpType == OpDelete {
switch op { for i < op.OldPos {
case INSERT: result = append(result, " "+oldList[i])
if dstIndex < len(resultLines) { i++
s = fmt.Sprintf("+ %s\n", resultLines[dstIndex]) j++
dstIndex += 1
} }
case MOVE: result = append(result, "- "+fmt.Sprint(op.Elem))
if srcIndex < len(stdoutLines) { i++
s = fmt.Sprintf(" %s\n", stdoutLines[srcIndex]) } else if op.OpType == OpInsert {
srcIndex += 1 for j < op.NewPos {
dstIndex += 1 result = append(result, " "+newList[j])
} i++
case DELETE: j++
if srcIndex < len(stdoutLines) {
s = fmt.Sprintf("- %s\n", stdoutLines[srcIndex])
srcIndex += 1
lineCount += 1
} }
result = append(result, "+ "+fmt.Sprint(op.Elem))
j++
} }
if maxLength > 0 && diffBuilder.Len()+len(s) > maxLength {
remaining := maxLength - diffBuilder.Len()
if remaining > 0 {
diffBuilder.WriteString(s[:remaining])
}
diffBuilder.WriteString("\n\n(truncated)")
break
}
diffBuilder.WriteString(s)
} }
for i < len(oldList) && j < len(newList) {
return diffBuilder.String() result = append(result, " "+oldList[i])
i++
j++
}
return strings.Join(result, "\n")
} }

View File

@ -1,69 +0,0 @@
package diff
import (
"reflect"
"testing"
)
func TestMyersDiff(t *testing.T) {
tests := []struct {
name string
src []string
dst []string
compareSpace bool
expected []operation
}{
{
name: "Insert operation",
src: []string{"a", "b"},
dst: []string{"a", "b", "c"},
compareSpace: true,
expected: []operation{MOVE, MOVE, INSERT},
},
{
name: "Delete operation",
src: []string{"a", "b", "c"},
dst: []string{"a", "b"},
compareSpace: true,
expected: []operation{MOVE, MOVE, DELETE},
},
{
name: "No changes",
src: []string{"a", "b", "c"},
dst: []string{"a", "b", "c"},
compareSpace: true,
expected: []operation{MOVE, MOVE, MOVE},
},
{
name: "Move operation",
src: []string{"a", "b", "c"},
dst: []string{"c", "a", "b"},
compareSpace: true,
expected: []operation{INSERT, MOVE, MOVE, DELETE},
},
{
name: "Ignore whitespace differences",
src: []string{"a ", "b"},
dst: []string{"a", "b"},
compareSpace: false,
expected: []operation{MOVE, MOVE},
},
{
name: "Consider whitespace differences",
src: []string{"a ", "b"},
dst: []string{"a", "b"},
compareSpace: true,
expected: []operation{DELETE, INSERT, MOVE},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
result := myersDiff(test.src, test.dst, test.compareSpace)
if !reflect.DeepEqual(result, test.expected) {
t.Errorf("myersDiff(%v, %v, %v) = %v; want %v",
test.src, test.dst, test.compareSpace, result, test.expected)
}
})
}
}

View File

@ -0,0 +1,143 @@
package diff
// source: https://github.com/MFAshby/myers
// Myer's diff algorithm in golang
// Ported from https://blog.robertelder.org/diff-algorithm/
type OpType int
const (
OpInsert OpType = iota
OpDelete
)
type Op struct {
OpType OpType // Insert or delete, as above
OldPos int // Position in the old list of item to be inserted or deleted
NewPos int // Position in the _new_ list of item to be inserted
Elem any // Actual value to be inserted or deleted
}
// Returns a minimal list of differences between 2 lists e and f
// requiring O(min(len(e),len(f))) space and O(min(len(e),len(f)) * D)
// worst-case execution time where D is the number of differences.
func myersDiff(e, f []any, equals func(any, any) bool) []Op {
return diffInternal(e, f, equals, 0, 0)
}
func diffInternal(e, f []any, equals func(any, any) bool, i, j int) []Op {
N := len(e)
M := len(f)
L := N + M
Z := 2*min(N, M) + 2
switch {
case N > 0 && M > 0:
w := N - M
g := make([]int, Z)
p := make([]int, Z)
hMax := L/2 + L%2 + 1
for h := range hMax {
for r := range 2 {
var c, d []int
var o, m int
if r == 0 {
c = g
d = p
o = 1
m = 1
} else {
c = p
d = g
o = 0
m = -1
}
kMin := -(h - 2*max(0, h-M))
kMax := h - 2*max(0, h-N) + 1
for k := kMin; k < kMax; k += 2 {
var a int
if k == -h || k != h && c[pyMod((k-1), Z)] < c[pyMod((k+1), Z)] {
a = c[pyMod((k+1), Z)]
} else {
a = c[pyMod((k-1), Z)] + 1
}
b := a - k
s, t := a, b
for a < N && b < M && equals(e[(1-o)*N+m*a+(o-1)], f[(1-o)*M+m*b+(o-1)]) {
a, b = a+1, b+1
}
c[pyMod(k, Z)] = a
z := -(k - w)
if pyMod(L, 2) == o && z >= -(h-o) && z <= h-o && c[pyMod(k, Z)]+d[pyMod(z, Z)] >= N {
var D, x, y, u, v int
if o == 1 {
D = 2*h - 1
x = s
y = t
u = a
v = b
} else {
D = 2 * h
x = N - a
y = M - b
u = N - s
v = M - t
}
switch {
case D > 1 || (x != u && y != v):
return append(diffInternal(e[0:x], f[0:y], equals, i, j), diffInternal(e[u:N], f[v:M], equals, i+u, j+v)...)
case M > N:
return diffInternal(make([]any, 0), f[N:M], equals, i+N, j+N)
case M < N:
return diffInternal(e[M:N], make([]any, 0), equals, i+M, j+M)
default:
return make([]Op, 0)
}
}
}
}
}
case N > 0:
res := make([]Op, N)
for n := range N {
res[n] = Op{OpDelete, i + n, -1, e[n]}
}
return res
default:
res := make([]Op, M)
for n := range M {
res[n] = Op{OpInsert, i, j + n, f[n]}
}
return res
}
panic("Should never hit this!")
}
/**
* The remainder op in python always matches the sign of the _denominator_
* e.g -1%3 = 2.
* In golang it matches the sign of the numerator.
* See https://en.wikipedia.org/wiki/Modulo_operation#Variants_of_the_definition
* Since we always have a positive denominator here, we can emulate the
* pyMod x%y as (x+y) % y
*/
func pyMod(x, y int) int {
return (x + y) % y
}
// Let us map element in same way as in
// Convenient wrapper for string lists
func myersDiffStr(e, f []string, compareSpace bool) []Op {
e1, f1 := make([]any, len(e)), make([]any, len(f))
for i, ee := range e {
e1[i] = ee
}
for i, fe := range f {
f1[i] = fe
}
return myersDiff(e1, f1, func(s1, s2 any) bool {
return compareStrings(s1.(string), s2.(string), compareSpace)
})
}

View File

@ -0,0 +1,48 @@
package diff
import (
"reflect"
t "testing"
)
type TestCase struct {
l1 []string
l2 []string
exp []Op
}
func TestDiff(t *t.T) {
A := "A"
B := "B"
C := "C"
testCases := []TestCase{
{[]string{}, []string{}, []Op{}},
{[]string{}, []string{"foo"}, []Op{{OpInsert, 0, 0, "foo"}}},
{[]string{"foo", "bar", "baz"}, []string{"foo", "bar", "baz"}, []Op{}},
{[]string{"foo", "bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 1, -1, "bar"}}},
{[]string{"baz"}, []string{"foo", "baz"}, []Op{{OpInsert, 0, 0, "foo"}}},
{[]string{"bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 0, -1, "bar"}, {OpInsert, 1, 0, "foo"}}},
{[]string{"foo", "bar", "baz"}, []string{"foo", "bar"}, []Op{{OpDelete, 2, -1, "baz"}}},
{
[]string{A, B, C, A, B, B, A},
[]string{C, B, A, B, A, C},
[]Op{{OpDelete, 0, -1, A}, {OpInsert, 1, 0, C}, {OpDelete, 2, -1, C}, {OpDelete, 5, -1, B}, {OpInsert, 7, 5, C}},
},
{
[]string{C, A, B, A, B, A, B, A, B, A, B, A, B, C},
[]string{B, A, B, A, B, A, B, A, B, A, B, A, B, A},
[]Op{{OpDelete, 0, -1, C}, {OpInsert, 1, 0, B}, {OpDelete, 13, -1, C}, {OpInsert, 14, 13, A}},
},
{
[]string{B},
[]string{A, B, C, B, A},
[]Op{{OpInsert, 0, 0, A}, {OpInsert, 0, 1, B}, {OpInsert, 0, 2, C}, {OpInsert, 1, 4, A}},
},
}
for _, c := range testCases {
act := myersDiffStr(c.l1, c.l2, true)
if !reflect.DeepEqual(c.exp, act) {
t.Errorf("Failed diff, expected %v actual %v\n", c.exp, act)
}
}
}

View File

@ -9,15 +9,6 @@ import (
"github.com/joint-online-judge/JOJ3/internal/stage" "github.com/joint-online-judge/JOJ3/internal/stage"
) )
// operation represents the type of edit operation.
type operation uint
const (
INSERT operation = iota + 1
DELETE
MOVE
)
func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( func (*Diff) Run(results []stage.ExecutorResult, confAny any) (
[]stage.ParserResult, bool, error, []stage.ParserResult, bool, error,
) { ) {
@ -89,16 +80,15 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) (
answerLines := strings.Split(answerStr, "\n") answerLines := strings.Split(answerStr, "\n")
resultLines := strings.Split(resultStr, "\n") resultLines := strings.Split(resultStr, "\n")
// Generate Myers diff // Generate Myers diff
diffOps := myersDiff(answerLines, resultLines, diffOps := myersDiffStr(answerLines, resultLines,
output.CompareSpace) output.CompareSpace)
// Generate diff block with surrounding context // Generate diff block with surrounding context
diffOutput := generateDiffWithContext( diffOutput := formatDiff(
answerLines, answerLines,
resultLines, resultLines,
diffOps, diffOps,
output.MaxDiffLength,
) )
diffOutput = strings.TrimSuffix(diffOutput, "\n \n") diffOutput = strings.TrimSuffix(diffOutput, "\n ")
comment += fmt.Sprintf( comment += fmt.Sprintf(
"```diff\n%s\n```\n", "```diff\n%s\n```\n",
diffOutput, diffOutput,