refactor(parser/diff): linear space diff
This commit is contained in:
parent
fd583c5b7a
commit
171bc48305
|
@ -50,148 +50,32 @@ func isWhitespace(b byte) bool {
|
||||||
b == 0xA0
|
b == 0xA0
|
||||||
}
|
}
|
||||||
|
|
||||||
// myersDiff computes the Myers' diff between two slices of strings.
|
func formatDiff(oldList []string, newList []string, ops []Op) string {
|
||||||
// src: https://github.com/cj1128/myers-diff/blob/master/main.go
|
var result []string
|
||||||
// TODO: it has O(n^2) time complexity
|
i, j := 0, 0
|
||||||
func myersDiff(src, dst []string, compareSpace bool) []operation {
|
|
||||||
n := len(src)
|
|
||||||
m := len(dst)
|
|
||||||
maxLength := n + m
|
|
||||||
var trace []map[int]int
|
|
||||||
var x, y int
|
|
||||||
|
|
||||||
loop:
|
|
||||||
for d := 0; d <= maxLength; d += 1 {
|
|
||||||
v := make(map[int]int, d+2)
|
|
||||||
trace = append(trace, v)
|
|
||||||
|
|
||||||
if d == 0 {
|
|
||||||
t := 0
|
|
||||||
for len(src) > t &&
|
|
||||||
len(dst) > t &&
|
|
||||||
compareStrings(src[t], dst[t], compareSpace) {
|
|
||||||
t += 1
|
|
||||||
}
|
|
||||||
v[0] = t
|
|
||||||
if t == len(src) && len(src) == len(dst) {
|
|
||||||
break loop
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
lastV := trace[d-1]
|
|
||||||
|
|
||||||
for k := -d; k <= d; k += 2 {
|
|
||||||
if k == -d || (k != d && lastV[k-1] < lastV[k+1]) {
|
|
||||||
x = lastV[k+1]
|
|
||||||
} else {
|
|
||||||
x = lastV[k-1] + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
y = x - k
|
|
||||||
|
|
||||||
for x < n && y < m && compareStrings(src[x], dst[y], compareSpace) {
|
|
||||||
x, y = x+1, y+1
|
|
||||||
}
|
|
||||||
|
|
||||||
v[k] = x
|
|
||||||
|
|
||||||
if x == n && y == m {
|
|
||||||
break loop
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var script []operation
|
|
||||||
x = n
|
|
||||||
y = m
|
|
||||||
var k, prevK, prevX, prevY int
|
|
||||||
|
|
||||||
for d := len(trace) - 1; d > 0; d -= 1 {
|
|
||||||
k = x - y
|
|
||||||
lastV := trace[d-1]
|
|
||||||
|
|
||||||
if k == -d || (k != d && lastV[k-1] < lastV[k+1]) {
|
|
||||||
prevK = k + 1
|
|
||||||
} else {
|
|
||||||
prevK = k - 1
|
|
||||||
}
|
|
||||||
|
|
||||||
prevX = lastV[prevK]
|
|
||||||
prevY = prevX - prevK
|
|
||||||
|
|
||||||
for x > prevX && y > prevY {
|
|
||||||
script = append(script, MOVE)
|
|
||||||
x -= 1
|
|
||||||
y -= 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if x == prevX {
|
|
||||||
script = append(script, INSERT)
|
|
||||||
} else {
|
|
||||||
script = append(script, DELETE)
|
|
||||||
}
|
|
||||||
|
|
||||||
x, y = prevX, prevY
|
|
||||||
}
|
|
||||||
|
|
||||||
if trace[0][0] != 0 {
|
|
||||||
for i := 0; i < trace[0][0]; i += 1 {
|
|
||||||
script = append(script, MOVE)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return reverse(script)
|
|
||||||
}
|
|
||||||
|
|
||||||
// reverse reverses a slice of operations.
|
|
||||||
func reverse(s []operation) []operation {
|
|
||||||
result := make([]operation, len(s))
|
|
||||||
for i, v := range s {
|
|
||||||
result[len(s)-1-i] = v
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
// generateDiffWithContext creates a diff block with surrounding context from stdout and result.
|
|
||||||
func generateDiffWithContext(
|
|
||||||
stdoutLines, resultLines []string, ops []operation, maxLength int,
|
|
||||||
) string {
|
|
||||||
var diffBuilder strings.Builder
|
|
||||||
|
|
||||||
srcIndex, dstIndex, lineCount := 0, 0, 0
|
|
||||||
|
|
||||||
for _, op := range ops {
|
for _, op := range ops {
|
||||||
s := ""
|
if op.OpType == OpDelete {
|
||||||
switch op {
|
for i < op.OldPos {
|
||||||
case INSERT:
|
result = append(result, " "+oldList[i])
|
||||||
if dstIndex < len(resultLines) {
|
i++
|
||||||
s = fmt.Sprintf("+ %s\n", resultLines[dstIndex])
|
j++
|
||||||
dstIndex += 1
|
|
||||||
}
|
}
|
||||||
case MOVE:
|
result = append(result, "- "+fmt.Sprint(op.Elem))
|
||||||
if srcIndex < len(stdoutLines) {
|
i++
|
||||||
s = fmt.Sprintf(" %s\n", stdoutLines[srcIndex])
|
} else if op.OpType == OpInsert {
|
||||||
srcIndex += 1
|
for j < op.NewPos {
|
||||||
dstIndex += 1
|
result = append(result, " "+newList[j])
|
||||||
}
|
i++
|
||||||
case DELETE:
|
j++
|
||||||
if srcIndex < len(stdoutLines) {
|
|
||||||
s = fmt.Sprintf("- %s\n", stdoutLines[srcIndex])
|
|
||||||
srcIndex += 1
|
|
||||||
lineCount += 1
|
|
||||||
}
|
}
|
||||||
|
result = append(result, "+ "+fmt.Sprint(op.Elem))
|
||||||
|
j++
|
||||||
}
|
}
|
||||||
if maxLength > 0 && diffBuilder.Len()+len(s) > maxLength {
|
|
||||||
remaining := maxLength - diffBuilder.Len()
|
|
||||||
if remaining > 0 {
|
|
||||||
diffBuilder.WriteString(s[:remaining])
|
|
||||||
}
|
|
||||||
diffBuilder.WriteString("\n\n(truncated)")
|
|
||||||
break
|
|
||||||
}
|
|
||||||
diffBuilder.WriteString(s)
|
|
||||||
}
|
}
|
||||||
|
for i < len(oldList) && j < len(newList) {
|
||||||
return diffBuilder.String()
|
result = append(result, " "+oldList[i])
|
||||||
|
i++
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
return strings.Join(result, "\n")
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,69 +0,0 @@
|
||||||
package diff
|
|
||||||
|
|
||||||
import (
|
|
||||||
"reflect"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestMyersDiff(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
src []string
|
|
||||||
dst []string
|
|
||||||
compareSpace bool
|
|
||||||
expected []operation
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "Insert operation",
|
|
||||||
src: []string{"a", "b"},
|
|
||||||
dst: []string{"a", "b", "c"},
|
|
||||||
compareSpace: true,
|
|
||||||
expected: []operation{MOVE, MOVE, INSERT},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Delete operation",
|
|
||||||
src: []string{"a", "b", "c"},
|
|
||||||
dst: []string{"a", "b"},
|
|
||||||
compareSpace: true,
|
|
||||||
expected: []operation{MOVE, MOVE, DELETE},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "No changes",
|
|
||||||
src: []string{"a", "b", "c"},
|
|
||||||
dst: []string{"a", "b", "c"},
|
|
||||||
compareSpace: true,
|
|
||||||
expected: []operation{MOVE, MOVE, MOVE},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Move operation",
|
|
||||||
src: []string{"a", "b", "c"},
|
|
||||||
dst: []string{"c", "a", "b"},
|
|
||||||
compareSpace: true,
|
|
||||||
expected: []operation{INSERT, MOVE, MOVE, DELETE},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Ignore whitespace differences",
|
|
||||||
src: []string{"a ", "b"},
|
|
||||||
dst: []string{"a", "b"},
|
|
||||||
compareSpace: false,
|
|
||||||
expected: []operation{MOVE, MOVE},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Consider whitespace differences",
|
|
||||||
src: []string{"a ", "b"},
|
|
||||||
dst: []string{"a", "b"},
|
|
||||||
compareSpace: true,
|
|
||||||
expected: []operation{DELETE, INSERT, MOVE},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, test := range tests {
|
|
||||||
t.Run(test.name, func(t *testing.T) {
|
|
||||||
result := myersDiff(test.src, test.dst, test.compareSpace)
|
|
||||||
if !reflect.DeepEqual(result, test.expected) {
|
|
||||||
t.Errorf("myersDiff(%v, %v, %v) = %v; want %v",
|
|
||||||
test.src, test.dst, test.compareSpace, result, test.expected)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
143
internal/parser/diff/myers.go
Normal file
143
internal/parser/diff/myers.go
Normal file
|
@ -0,0 +1,143 @@
|
||||||
|
package diff
|
||||||
|
|
||||||
|
// source: https://github.com/MFAshby/myers
|
||||||
|
// Myer's diff algorithm in golang
|
||||||
|
// Ported from https://blog.robertelder.org/diff-algorithm/
|
||||||
|
|
||||||
|
type OpType int
|
||||||
|
|
||||||
|
const (
|
||||||
|
OpInsert OpType = iota
|
||||||
|
OpDelete
|
||||||
|
)
|
||||||
|
|
||||||
|
type Op struct {
|
||||||
|
OpType OpType // Insert or delete, as above
|
||||||
|
OldPos int // Position in the old list of item to be inserted or deleted
|
||||||
|
NewPos int // Position in the _new_ list of item to be inserted
|
||||||
|
Elem any // Actual value to be inserted or deleted
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a minimal list of differences between 2 lists e and f
|
||||||
|
// requiring O(min(len(e),len(f))) space and O(min(len(e),len(f)) * D)
|
||||||
|
// worst-case execution time where D is the number of differences.
|
||||||
|
func myersDiff(e, f []any, equals func(any, any) bool) []Op {
|
||||||
|
return diffInternal(e, f, equals, 0, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func diffInternal(e, f []any, equals func(any, any) bool, i, j int) []Op {
|
||||||
|
N := len(e)
|
||||||
|
M := len(f)
|
||||||
|
L := N + M
|
||||||
|
Z := 2*min(N, M) + 2
|
||||||
|
switch {
|
||||||
|
case N > 0 && M > 0:
|
||||||
|
w := N - M
|
||||||
|
g := make([]int, Z)
|
||||||
|
p := make([]int, Z)
|
||||||
|
|
||||||
|
hMax := L/2 + L%2 + 1
|
||||||
|
for h := range hMax {
|
||||||
|
for r := range 2 {
|
||||||
|
var c, d []int
|
||||||
|
var o, m int
|
||||||
|
if r == 0 {
|
||||||
|
c = g
|
||||||
|
d = p
|
||||||
|
o = 1
|
||||||
|
m = 1
|
||||||
|
} else {
|
||||||
|
c = p
|
||||||
|
d = g
|
||||||
|
o = 0
|
||||||
|
m = -1
|
||||||
|
}
|
||||||
|
kMin := -(h - 2*max(0, h-M))
|
||||||
|
kMax := h - 2*max(0, h-N) + 1
|
||||||
|
for k := kMin; k < kMax; k += 2 {
|
||||||
|
var a int
|
||||||
|
if k == -h || k != h && c[pyMod((k-1), Z)] < c[pyMod((k+1), Z)] {
|
||||||
|
a = c[pyMod((k+1), Z)]
|
||||||
|
} else {
|
||||||
|
a = c[pyMod((k-1), Z)] + 1
|
||||||
|
}
|
||||||
|
b := a - k
|
||||||
|
s, t := a, b
|
||||||
|
|
||||||
|
for a < N && b < M && equals(e[(1-o)*N+m*a+(o-1)], f[(1-o)*M+m*b+(o-1)]) {
|
||||||
|
a, b = a+1, b+1
|
||||||
|
}
|
||||||
|
c[pyMod(k, Z)] = a
|
||||||
|
z := -(k - w)
|
||||||
|
if pyMod(L, 2) == o && z >= -(h-o) && z <= h-o && c[pyMod(k, Z)]+d[pyMod(z, Z)] >= N {
|
||||||
|
var D, x, y, u, v int
|
||||||
|
if o == 1 {
|
||||||
|
D = 2*h - 1
|
||||||
|
x = s
|
||||||
|
y = t
|
||||||
|
u = a
|
||||||
|
v = b
|
||||||
|
} else {
|
||||||
|
D = 2 * h
|
||||||
|
x = N - a
|
||||||
|
y = M - b
|
||||||
|
u = N - s
|
||||||
|
v = M - t
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case D > 1 || (x != u && y != v):
|
||||||
|
return append(diffInternal(e[0:x], f[0:y], equals, i, j), diffInternal(e[u:N], f[v:M], equals, i+u, j+v)...)
|
||||||
|
case M > N:
|
||||||
|
return diffInternal(make([]any, 0), f[N:M], equals, i+N, j+N)
|
||||||
|
case M < N:
|
||||||
|
return diffInternal(e[M:N], make([]any, 0), equals, i+M, j+M)
|
||||||
|
default:
|
||||||
|
return make([]Op, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case N > 0:
|
||||||
|
res := make([]Op, N)
|
||||||
|
for n := range N {
|
||||||
|
res[n] = Op{OpDelete, i + n, -1, e[n]}
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
default:
|
||||||
|
res := make([]Op, M)
|
||||||
|
for n := range M {
|
||||||
|
res[n] = Op{OpInsert, i, j + n, f[n]}
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
panic("Should never hit this!")
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The remainder op in python always matches the sign of the _denominator_
|
||||||
|
* e.g -1%3 = 2.
|
||||||
|
* In golang it matches the sign of the numerator.
|
||||||
|
* See https://en.wikipedia.org/wiki/Modulo_operation#Variants_of_the_definition
|
||||||
|
* Since we always have a positive denominator here, we can emulate the
|
||||||
|
* pyMod x%y as (x+y) % y
|
||||||
|
*/
|
||||||
|
func pyMod(x, y int) int {
|
||||||
|
return (x + y) % y
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let us map element in same way as in
|
||||||
|
|
||||||
|
// Convenient wrapper for string lists
|
||||||
|
func myersDiffStr(e, f []string, compareSpace bool) []Op {
|
||||||
|
e1, f1 := make([]any, len(e)), make([]any, len(f))
|
||||||
|
for i, ee := range e {
|
||||||
|
e1[i] = ee
|
||||||
|
}
|
||||||
|
for i, fe := range f {
|
||||||
|
f1[i] = fe
|
||||||
|
}
|
||||||
|
return myersDiff(e1, f1, func(s1, s2 any) bool {
|
||||||
|
return compareStrings(s1.(string), s2.(string), compareSpace)
|
||||||
|
})
|
||||||
|
}
|
48
internal/parser/diff/myers_test.go
Normal file
48
internal/parser/diff/myers_test.go
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
package diff
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
t "testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type TestCase struct {
|
||||||
|
l1 []string
|
||||||
|
l2 []string
|
||||||
|
exp []Op
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDiff(t *t.T) {
|
||||||
|
A := "A"
|
||||||
|
B := "B"
|
||||||
|
C := "C"
|
||||||
|
testCases := []TestCase{
|
||||||
|
{[]string{}, []string{}, []Op{}},
|
||||||
|
{[]string{}, []string{"foo"}, []Op{{OpInsert, 0, 0, "foo"}}},
|
||||||
|
{[]string{"foo", "bar", "baz"}, []string{"foo", "bar", "baz"}, []Op{}},
|
||||||
|
{[]string{"foo", "bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 1, -1, "bar"}}},
|
||||||
|
{[]string{"baz"}, []string{"foo", "baz"}, []Op{{OpInsert, 0, 0, "foo"}}},
|
||||||
|
{[]string{"bar", "baz"}, []string{"foo", "baz"}, []Op{{OpDelete, 0, -1, "bar"}, {OpInsert, 1, 0, "foo"}}},
|
||||||
|
{[]string{"foo", "bar", "baz"}, []string{"foo", "bar"}, []Op{{OpDelete, 2, -1, "baz"}}},
|
||||||
|
{
|
||||||
|
[]string{A, B, C, A, B, B, A},
|
||||||
|
[]string{C, B, A, B, A, C},
|
||||||
|
[]Op{{OpDelete, 0, -1, A}, {OpInsert, 1, 0, C}, {OpDelete, 2, -1, C}, {OpDelete, 5, -1, B}, {OpInsert, 7, 5, C}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
[]string{C, A, B, A, B, A, B, A, B, A, B, A, B, C},
|
||||||
|
[]string{B, A, B, A, B, A, B, A, B, A, B, A, B, A},
|
||||||
|
[]Op{{OpDelete, 0, -1, C}, {OpInsert, 1, 0, B}, {OpDelete, 13, -1, C}, {OpInsert, 14, 13, A}},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
[]string{B},
|
||||||
|
[]string{A, B, C, B, A},
|
||||||
|
[]Op{{OpInsert, 0, 0, A}, {OpInsert, 0, 1, B}, {OpInsert, 0, 2, C}, {OpInsert, 1, 4, A}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, c := range testCases {
|
||||||
|
act := myersDiffStr(c.l1, c.l2, true)
|
||||||
|
if !reflect.DeepEqual(c.exp, act) {
|
||||||
|
t.Errorf("Failed diff, expected %v actual %v\n", c.exp, act)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -9,15 +9,6 @@ import (
|
||||||
"github.com/joint-online-judge/JOJ3/internal/stage"
|
"github.com/joint-online-judge/JOJ3/internal/stage"
|
||||||
)
|
)
|
||||||
|
|
||||||
// operation represents the type of edit operation.
|
|
||||||
type operation uint
|
|
||||||
|
|
||||||
const (
|
|
||||||
INSERT operation = iota + 1
|
|
||||||
DELETE
|
|
||||||
MOVE
|
|
||||||
)
|
|
||||||
|
|
||||||
func (*Diff) Run(results []stage.ExecutorResult, confAny any) (
|
func (*Diff) Run(results []stage.ExecutorResult, confAny any) (
|
||||||
[]stage.ParserResult, bool, error,
|
[]stage.ParserResult, bool, error,
|
||||||
) {
|
) {
|
||||||
|
@ -89,16 +80,15 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) (
|
||||||
answerLines := strings.Split(answerStr, "\n")
|
answerLines := strings.Split(answerStr, "\n")
|
||||||
resultLines := strings.Split(resultStr, "\n")
|
resultLines := strings.Split(resultStr, "\n")
|
||||||
// Generate Myers diff
|
// Generate Myers diff
|
||||||
diffOps := myersDiff(answerLines, resultLines,
|
diffOps := myersDiffStr(answerLines, resultLines,
|
||||||
output.CompareSpace)
|
output.CompareSpace)
|
||||||
// Generate diff block with surrounding context
|
// Generate diff block with surrounding context
|
||||||
diffOutput := generateDiffWithContext(
|
diffOutput := formatDiff(
|
||||||
answerLines,
|
answerLines,
|
||||||
resultLines,
|
resultLines,
|
||||||
diffOps,
|
diffOps,
|
||||||
output.MaxDiffLength,
|
|
||||||
)
|
)
|
||||||
diffOutput = strings.TrimSuffix(diffOutput, "\n \n")
|
diffOutput = strings.TrimSuffix(diffOutput, "\n ")
|
||||||
comment += fmt.Sprintf(
|
comment += fmt.Sprintf(
|
||||||
"```diff\n%s\n```\n",
|
"```diff\n%s\n```\n",
|
||||||
diffOutput,
|
diffOutput,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user