refactor(parser/diff): linear space diff
This commit is contained in:
parent
fd583c5b7a
commit
171bc48305
|
@ -50,148 +50,32 @@ func isWhitespace(b byte) bool {
|
|||
b == 0xA0
|
||||
}
|
||||
|
||||
// myersDiff computes the Myers' diff between two slices of strings.
|
||||
// src: https://github.com/cj1128/myers-diff/blob/master/main.go
|
||||
// TODO: it has O(n^2) time complexity
|
||||
func myersDiff(src, dst []string, compareSpace bool) []operation {
|
||||
n := len(src)
|
||||
m := len(dst)
|
||||
maxLength := n + m
|
||||
var trace []map[int]int
|
||||
var x, y int
|
||||
|
||||
loop:
|
||||
for d := 0; d <= maxLength; d += 1 {
|
||||
v := make(map[int]int, d+2)
|
||||
trace = append(trace, v)
|
||||
|
||||
if d == 0 {
|
||||
t := 0
|
||||
for len(src) > t &&
|
||||
len(dst) > t &&
|
||||
compareStrings(src[t], dst[t], compareSpace) {
|
||||
t += 1
|
||||
}
|
||||
v[0] = t
|
||||
if t == len(src) && len(src) == len(dst) {
|
||||
break loop
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
lastV := trace[d-1]
|
||||
|
||||
for k := -d; k <= d; k += 2 {
|
||||
if k == -d || (k != d && lastV[k-1] < lastV[k+1]) {
|
||||
x = lastV[k+1]
|
||||
} else {
|
||||
x = lastV[k-1] + 1
|
||||
}
|
||||
|
||||
y = x - k
|
||||
|
||||
for x < n && y < m && compareStrings(src[x], dst[y], compareSpace) {
|
||||
x, y = x+1, y+1
|
||||
}
|
||||
|
||||
v[k] = x
|
||||
|
||||
if x == n && y == m {
|
||||
break loop
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var script []operation
|
||||
x = n
|
||||
y = m
|
||||
var k, prevK, prevX, prevY int
|
||||
|
||||
for d := len(trace) - 1; d > 0; d -= 1 {
|
||||
k = x - y
|
||||
lastV := trace[d-1]
|
||||
|
||||
if k == -d || (k != d && lastV[k-1] < lastV[k+1]) {
|
||||
prevK = k + 1
|
||||
} else {
|
||||
prevK = k - 1
|
||||
}
|
||||
|
||||
prevX = lastV[prevK]
|
||||
prevY = prevX - prevK
|
||||
|
||||
for x > prevX && y > prevY {
|
||||
script = append(script, MOVE)
|
||||
x -= 1
|
||||
y -= 1
|
||||
}
|
||||
|
||||
if x == prevX {
|
||||
script = append(script, INSERT)
|
||||
} else {
|
||||
script = append(script, DELETE)
|
||||
}
|
||||
|
||||
x, y = prevX, prevY
|
||||
}
|
||||
|
||||
if trace[0][0] != 0 {
|
||||
for i := 0; i < trace[0][0]; i += 1 {
|
||||
script = append(script, MOVE)
|
||||
}
|
||||
}
|
||||
|
||||
return reverse(script)
|
||||
}
|
||||
|
||||
// reverse reverses a slice of operations.
|
||||
func reverse(s []operation) []operation {
|
||||
result := make([]operation, len(s))
|
||||
for i, v := range s {
|
||||
result[len(s)-1-i] = v
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// generateDiffWithContext creates a diff block with surrounding context from stdout and result.
|
||||
func generateDiffWithContext(
|
||||
stdoutLines, resultLines []string, ops []operation, maxLength int,
|
||||
) string {
|
||||
var diffBuilder strings.Builder
|
||||
|
||||
srcIndex, dstIndex, lineCount := 0, 0, 0
|
||||
|
||||
func formatDiff(oldList []string, newList []string, ops []Op) string {
|
||||
var result []string
|
||||
i, j := 0, 0
|
||||
for _, op := range ops {
|
||||
s := ""
|
||||
switch op {
|
||||
case INSERT:
|
||||
if dstIndex < len(resultLines) {
|
||||
s = fmt.Sprintf("+ %s\n", resultLines[dstIndex])
|
||||
dstIndex += 1
|
||||
if op.OpType == OpDelete {
|
||||
for i < op.OldPos {
|
||||
result = append(result, " "+oldList[i])
|
||||
i++
|
||||
j++
|
||||
}
|
||||
case MOVE:
|
||||
if srcIndex < len(stdoutLines) {
|
||||
s = fmt.Sprintf(" %s\n", stdoutLines[srcIndex])
|
||||
srcIndex += 1
|
||||
dstIndex += 1
|
||||
result = append(result, "- "+fmt.Sprint(op.Elem))
|
||||
i++
|
||||
} else if op.OpType == OpInsert {
|
||||
for j < op.NewPos {
|
||||
result = append(result, " "+newList[j])
|
||||
i++
|
||||
j++
|
||||
}
|
||||
case DELETE:
|
||||
if srcIndex < len(stdoutLines) {
|
||||
s = fmt.Sprintf("- %s\n", stdoutLines[srcIndex])
|
||||
srcIndex += 1
|
||||
lineCount += 1
|
||||
result = append(result, "+ "+fmt.Sprint(op.Elem))
|
||||
j++
|
||||
}
|
||||
}
|
||||
if maxLength > 0 && diffBuilder.Len()+len(s) > maxLength {
|
||||
remaining := maxLength - diffBuilder.Len()
|
||||
if remaining > 0 {
|
||||
diffBuilder.WriteString(s[:remaining])
|
||||
for i < len(oldList) && j < len(newList) {
|
||||
result = append(result, " "+oldList[i])
|
||||
i++
|
||||
j++
|
||||
}
|
||||
diffBuilder.WriteString("\n\n(truncated)")
|
||||
break
|
||||
}
|
||||
diffBuilder.WriteString(s)
|
||||
}
|
||||
|
||||
return diffBuilder.String()
|
||||
return strings.Join(result, "\n")
|
||||
}
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
package diff
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMyersDiff(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
src []string
|
||||
dst []string
|
||||
compareSpace bool
|
||||
expected []operation
|
||||
}{
|
||||
{
|
||||
name: "Insert operation",
|
||||
src: []string{"a", "b"},
|
||||
dst: []string{"a", "b", "c"},
|
||||
compareSpace: true,
|
||||
expected: []operation{MOVE, MOVE, INSERT},
|
||||
},
|
||||
{
|
||||
name: "Delete operation",
|
||||
src: []string{"a", "b", "c"},
|
||||
dst: []string{"a", "b"},
|
||||
compareSpace: true,
|
||||
expected: []operation{MOVE, MOVE, DELETE},
|
||||
},
|
||||
{
|
||||
name: "No changes",
|
||||
src: []string{"a", "b", "c"},
|
||||
dst: []string{"a", "b", "c"},
|
||||
compareSpace: true,
|
||||
expected: []operation{MOVE, MOVE, MOVE},
|
||||
},
|
||||
{
|
||||
name: "Move operation",
|
||||
src: []string{"a", "b", "c"},
|
||||
dst: []string{"c", "a", "b"},
|
||||
compareSpace: true,
|
||||
expected: []operation{INSERT, MOVE, MOVE, DELETE},
|
||||
},
|
||||
{
|
||||
name: "Ignore whitespace differences",
|
||||
src: []string{"a ", "b"},
|
||||
dst: []string{"a", "b"},
|
||||
compareSpace: false,
|
||||
expected: []operation{MOVE, MOVE},
|
||||
},
|
||||
{
|
||||
name: "Consider whitespace differences",
|
||||
src: []string{"a ", "b"},
|
||||
dst: []string{"a", "b"},
|
||||
compareSpace: true,
|
||||
expected: []operation{DELETE, INSERT, MOVE},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := myersDiff(test.src, test.dst, test.compareSpace)
|
||||
if !reflect.DeepEqual(result, test.expected) {
|
||||
t.Errorf("myersDiff(%v, %v, %v) = %v; want %v",
|
||||
test.src, test.dst, test.compareSpace, result, test.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
143
internal/parser/diff/myers.go
Normal file
143
internal/parser/diff/myers.go
Normal file
|
@ -0,0 +1,143 @@
|
|||
package diff
|
||||
|
||||
// source: https://github.com/MFAshby/myers
|
||||
// Myer's diff algorithm in golang
|
||||
// Ported from https://blog.robertelder.org/diff-algorithm/
|
||||
|
||||
type OpType int
|
||||
|
||||
const (
|
||||
OpInsert OpType = iota
|
||||
OpDelete
|
||||
)
|
||||
|
||||
type Op struct {
|
||||
OpType OpType // Insert or delete, as above
|
||||
OldPos int // Position in the old list of item to be inserted or deleted
|
||||
NewPos int // Position in the _new_ list of item to be inserted
|
||||
Elem any // Actual value to be inserted or deleted
|
||||
}
|
||||
|
||||
// Returns a minimal list of differences between 2 lists e and f
|
||||
// requiring O(min(len(e),len(f))) space and O(min(len(e),len(f)) * D)
|
||||
// worst-case execution time where D is the number of differences.
|
||||
func myersDiff(e, f []any, equals func(any, any) bool) []Op {
|
||||
return diffInternal(e, f, equals, 0, 0)
|
||||
}
|
||||
|
||||
func diffInternal(e, f []any, equals func(any, any) bool, i, j int) []Op {
|
||||
N := len(e)
|
||||
M := len(f)
|
||||
L := N + M
|
||||
Z := 2*min(N, M) + 2
|
||||
switch {
|
||||
case N > 0 && M > 0:
|
||||
w := N - M
|
||||
g := make([]int, Z)
|
||||
p := make([]int, Z)
|
||||
|
||||
hMax := L/2 + L%2 + 1
|
||||
for h := range hMax {
|
||||
for r := range 2 {
|
||||
var c, d []int
|
||||
var o, m int
|
||||
if r == 0 {
|
||||
c = g
|
||||
d = p
|
||||
o = 1
|
||||
m = 1
|
||||
} else {
|
||||
c = p
|
||||
d = g
|
||||
o = 0
|
||||
m = -1
|
||||
}
|
||||
kMin := -(h - 2*max(0, h-M))
|
||||
kMax := h - 2*max(0, h-N) + 1
|
||||
for k := kMin; k < kMax; k += 2 {
|
||||
var a int
|
||||
if k == -h || k != h && c[pyMod((k-1), Z)] < c[pyMod((k+1), Z)] {
|
||||
a = c[pyMod((k+1), Z)]
|
||||
} else {
|
||||
a = c[pyMod((k-1), Z)] + 1
|
||||
}
|
||||
b := a - k
|
||||
s, t := a, b
|
||||
|
||||
for a < N && b < M && equals(e[(1-o)*N+m*a+(o-1)], f[(1-o)*M+m*b+(o-1)]) {
|
||||
a, b = a+1, b+1
|
||||
}
|
||||
c[pyMod(k, Z)] = a
|
||||
z := -(k - w)
|
||||
if pyMod(L, 2) == o && z >= -(h-o) && z <= h-o && c[pyMod(k, Z)]+d[pyMod(z, Z)] >= N {
|
||||
var D, x, y, u, v int
|
||||
if o == 1 {
|
||||
D = 2*h - 1
|
||||
x = s
|
||||
y = t
|
||||
u = a
|
||||
v = b
|
||||
} else {
|
||||
D = 2 * h
|
||||
x = N - a
|
||||
y = M - b
|
||||
u = N - s
|
||||
v = M - t
|
||||
}
|
||||
switch {
|
||||
case D > 1 || (x != u && y != v):
|
||||
return append(diffInternal(e[0:x], f[0:y], equals, i, j), diffInternal(e[u:N], f[v:M], equals, i+u, j+v)...)
|
||||
case M > N:
|
||||
return diffInternal(make([]any, 0), f[N:M], equals, i+N, j+N)
|
||||
case M < N:
|
||||
return diffInternal(e[M:N], make([]any, 0), equals, i+M, j+M)
|
||||
default:
|
||||
return make([]Op, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
case N > 0:
|
||||
res := make([]Op, N)
|
||||
for n := range N {
|
||||
res[n] = Op{OpDelete, i + n, -1, e[n]}
|
||||
}
|
||||
return res
|
||||
default:
|
||||
res := make([]Op, M)
|
||||
for n := range M {
|
||||
res[n] = Op{OpInsert, i, j + n, f[n]}
|
||||
}
|
||||
return res
|
||||
}
|
||||
panic("Should never hit this!")
|
||||
}
|
||||
|
||||
/**
|
||||
* The remainder op in python always matches the sign of the _denominator_
|
||||
* e.g -1%3 = 2.
|
||||
* In golang it matches the sign of the numerator.
|
||||
* See https://en.wikipedia.org/wiki/Modulo_operation#Variants_of_the_definition
|
||||
* Since we always have a positive denominator here, we can emulate the
|
||||
* pyMod x%y as (x+y) % y
|
||||
*/
|
||||
func pyMod(x, y int) int {
|
||||
return (x + y) % y
|
||||
}
|
||||
|
||||
// Let us map element in same way as in
|
||||
|
||||
// Convenient wrapper for string lists
|
||||
func myersDiffStr(e, f []string, compareSpace bool) []Op {
|
||||
e1, f1 := make([]any, len(e)), make([]any, len(f))
|
||||
for i, ee := range e {
|
||||
e1[i] = ee
|
||||
}
|
||||
for i, fe := range f {
|
||||
f1[i] = fe
|
||||
}
|
||||
return myersDiff(e1, f1, func(s1, s2 any) bool {
|
||||
return compareStrings(s1.(string), s2.(string), compareSpace)
|
||||
})
|
||||
}
|
48
internal/parser/diff/myers_test.go
Normal file
48
internal/parser/diff/myers_test.go
Normal file
|
@ -0,0 +1,48 @@
|
|||
package diff
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
t "testing"
|
||||
)
|
||||
|
||||
type TestCase struct {
|
||||
l1 []string
|
||||
l2 []string
|
||||
exp []Op
|
||||
}
|
||||
|
||||
func TestDiff(t *t.T) {
|
||||
A := "A"
|
||||
B := "B"
|
||||
C := "C"
|
||||
testCases := []TestCase{
|
||||
{[]string{}, []string{}, []Op{}},
|
||||
{[]string{}, []string{"foo"}, []Op{{OpInsert, 0, 0, "foo"}}},
|
||||
{[]string{"foo", "bar", "baz"}, []string{"foo", "bar", "baz"}, []Op{}},
|
||||
{[]string{"foo", "bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 1, -1, "bar"}}},
|
||||
{[]string{"baz"}, []string{"foo", "baz"}, []Op{{OpInsert, 0, 0, "foo"}}},
|
||||
{[]string{"bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 0, -1, "bar"}, {OpInsert, 1, 0, "foo"}}},
|
||||
{[]string{"foo", "bar", "baz"}, []string{"foo", "bar"}, []Op{{OpDelete, 2, -1, "baz"}}},
|
||||
{
|
||||
[]string{A, B, C, A, B, B, A},
|
||||
[]string{C, B, A, B, A, C},
|
||||
[]Op{{OpDelete, 0, -1, A}, {OpInsert, 1, 0, C}, {OpDelete, 2, -1, C}, {OpDelete, 5, -1, B}, {OpInsert, 7, 5, C}},
|
||||
},
|
||||
{
|
||||
[]string{C, A, B, A, B, A, B, A, B, A, B, A, B, C},
|
||||
[]string{B, A, B, A, B, A, B, A, B, A, B, A, B, A},
|
||||
[]Op{{OpDelete, 0, -1, C}, {OpInsert, 1, 0, B}, {OpDelete, 13, -1, C}, {OpInsert, 14, 13, A}},
|
||||
},
|
||||
{
|
||||
[]string{B},
|
||||
[]string{A, B, C, B, A},
|
||||
[]Op{{OpInsert, 0, 0, A}, {OpInsert, 0, 1, B}, {OpInsert, 0, 2, C}, {OpInsert, 1, 4, A}},
|
||||
},
|
||||
}
|
||||
for _, c := range testCases {
|
||||
act := myersDiffStr(c.l1, c.l2, true)
|
||||
if !reflect.DeepEqual(c.exp, act) {
|
||||
t.Errorf("Failed diff, expected %v actual %v\n", c.exp, act)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -9,15 +9,6 @@ import (
|
|||
"github.com/joint-online-judge/JOJ3/internal/stage"
|
||||
)
|
||||
|
||||
// operation represents the type of edit operation.
|
||||
type operation uint
|
||||
|
||||
const (
|
||||
INSERT operation = iota + 1
|
||||
DELETE
|
||||
MOVE
|
||||
)
|
||||
|
||||
func (*Diff) Run(results []stage.ExecutorResult, confAny any) (
|
||||
[]stage.ParserResult, bool, error,
|
||||
) {
|
||||
|
@ -89,16 +80,15 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) (
|
|||
answerLines := strings.Split(answerStr, "\n")
|
||||
resultLines := strings.Split(resultStr, "\n")
|
||||
// Generate Myers diff
|
||||
diffOps := myersDiff(answerLines, resultLines,
|
||||
diffOps := myersDiffStr(answerLines, resultLines,
|
||||
output.CompareSpace)
|
||||
// Generate diff block with surrounding context
|
||||
diffOutput := generateDiffWithContext(
|
||||
diffOutput := formatDiff(
|
||||
answerLines,
|
||||
resultLines,
|
||||
diffOps,
|
||||
output.MaxDiffLength,
|
||||
)
|
||||
diffOutput = strings.TrimSuffix(diffOutput, "\n \n")
|
||||
diffOutput = strings.TrimSuffix(diffOutput, "\n ")
|
||||
comment += fmt.Sprintf(
|
||||
"```diff\n%s\n```\n",
|
||||
diffOutput,
|
||||
|
|
Loading…
Reference in New Issue
Block a user