From 37c0d76bf311988ed09b1d51ca8f67a24a274a8a Mon Sep 17 00:00:00 2001 From: Boming Zhang Date: Fri, 28 Mar 2025 08:08:44 -0400 Subject: [PATCH] refactor(parser/diff): modify code from peter-evans/patience --- go.mod | 1 - go.sum | 2 - internal/parser/diff/diff.go | 30 ----- internal/parser/diff/parser.go | 8 +- internal/parser/diff/patience.go | 188 +++++++++++++++++++++++++++++++ 5 files changed, 194 insertions(+), 35 deletions(-) create mode 100644 internal/parser/diff/patience.go diff --git a/go.mod b/go.mod index fdd7b2f..13661a8 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,6 @@ require ( github.com/koding/multiconfig v0.0.0-20171124222453-69c27309b2d7 github.com/mcuadros/go-defaults v1.2.0 github.com/mitchellh/mapstructure v1.5.0 - github.com/peter-evans/patience v0.3.0 google.golang.org/grpc v1.71.0 google.golang.org/protobuf v1.36.5 ) diff --git a/go.sum b/go.sum index 75ce526..b254936 100644 --- a/go.sum +++ b/go.sum @@ -76,8 +76,6 @@ github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RR github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= -github.com/peter-evans/patience v0.3.0 h1:rX0JdJeepqdQl1Sk9c9uvorjYYzL2TfgLX1adqYm9cA= -github.com/peter-evans/patience v0.3.0/go.mod h1:Kmxu5sY1NmBLFSStvXjX1wS9mIv7wMcP/ubucyMOAu0= github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= diff --git a/internal/parser/diff/diff.go b/internal/parser/diff/diff.go index 8c94432..25f0695 100644 --- a/internal/parser/diff/diff.go +++ b/internal/parser/diff/diff.go @@ -1,12 +1,5 @@ package diff -import ( - "fmt" - "strings" - - "github.com/peter-evans/patience" -) - // compareStrings compares two strings character by character, optionally ignoring whitespace. func compareStrings(str1, str2 string, compareSpace bool) bool { if compareSpace { @@ -51,26 +44,3 @@ func isWhitespace(b byte) bool { b == 0x85 || b == 0xA0 } - -// typeSymbol returns the associated symbol of a DiffType. -func typeSymbol(t patience.DiffType) string { - switch t { - case patience.Equal: - return " " - case patience.Insert: - return "+ " - case patience.Delete: - return "- " - default: - panic("unknown DiffType") - } -} - -// DiffText returns the source and destination texts (all equalities, insertions and deletions). -func DiffText(diffs []patience.DiffLine) string { - s := make([]string, len(diffs)) - for i, l := range diffs { - s[i] = fmt.Sprintf("%s%s", typeSymbol(l.Type), l.Text) - } - return strings.Join(s, "\n") -} diff --git a/internal/parser/diff/parser.go b/internal/parser/diff/parser.go index 8a89e65..cd9cee9 100644 --- a/internal/parser/diff/parser.go +++ b/internal/parser/diff/parser.go @@ -7,7 +7,6 @@ import ( "strings" "github.com/joint-online-judge/JOJ3/internal/stage" - "github.com/peter-evans/patience" ) func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( @@ -83,7 +82,12 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( } answerLines := strings.Split(answerStr, "\n") resultLines := strings.Split(resultStr, "\n") - diffs := patience.Diff(answerLines, resultLines) + diffs := PatienceDiff( + answerLines, + resultLines, + func(a, b string) bool { + return compareStrings(a, b, output.CompareSpace) + }) diffOutput := DiffText(diffs) diffOutput = strings.TrimSuffix(diffOutput, "\n ") if truncated { diff --git a/internal/parser/diff/patience.go b/internal/parser/diff/patience.go new file mode 100644 index 0000000..99f3df1 --- /dev/null +++ b/internal/parser/diff/patience.go @@ -0,0 +1,188 @@ +package diff + +// modified from https://github.com/peter-evans/patience + +import ( + "fmt" + "strings" +) + +// DiffType defines the type of a diff element. +type DiffType int8 + +const ( + // Delete represents a diff delete operation. + Delete DiffType = -1 + // Insert represents a diff insert operation. + Insert DiffType = 1 + // Equal represents no diff. + Equal DiffType = 0 +) + +// DiffLine represents a single line and its diff type. +type DiffLine struct { + Text string + Type DiffType +} + +// typeSymbol returns the associated symbol of a DiffType. +func typeSymbol(t DiffType) string { + switch t { + case Equal: + return " " + case Insert: + return "+ " + case Delete: + return "- " + default: + panic("unknown DiffType") + } +} + +// DiffText returns the source and destination texts (all equalities, insertions and deletions). +func DiffText(diffs []DiffLine) string { + s := make([]string, len(diffs)) + for i, l := range diffs { + s[i] = fmt.Sprintf("%s%s", typeSymbol(l.Type), l.Text) + } + return strings.Join(s, "\n") +} + +// LCS computes the longest common subsequence of two string +// slices and returns the index pairs of the LCS. +func LCS(a, b []string, equal func(a, b string) bool) [][2]int { + // Initialize the LCS table. + lcs := make([][]int, len(a)+1) + for i := 0; i <= len(a); i++ { + lcs[i] = make([]int, len(b)+1) + } + + // Populate the LCS table. + for i := 1; i < len(lcs); i++ { + for j := 1; j < len(lcs[i]); j++ { + if equal(a[i-1], b[j-1]) { + lcs[i][j] = lcs[i-1][j-1] + 1 + } else { + lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1]) + } + } + } + + // Backtrack to find the LCS. + i, j := len(a), len(b) + s := make([][2]int, 0, lcs[i][j]) + for i > 0 && j > 0 { + switch { + case equal(a[i-1], b[j-1]): + s = append(s, [2]int{i - 1, j - 1}) + i-- + j-- + case lcs[i-1][j] > lcs[i][j-1]: + i-- + default: + j-- + } + } + + // Reverse the backtracked LCS. + for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { + s[i], s[j] = s[j], s[i] + } + + return s +} + +// toDiffLines is a convenience function to convert a slice of strings +// to a slice of DiffLines with the specified diff type. +func toDiffLines(a []string, t DiffType) []DiffLine { + diffs := make([]DiffLine, len(a)) + for i, l := range a { + diffs[i] = DiffLine{l, t} + } + return diffs +} + +// uniqueElements returns a slice of unique elements from a slice of +// strings, and a slice of the original indices of each element. +func uniqueElements(a []string) ([]string, []int) { + m := make(map[string]int) + for _, e := range a { + m[e]++ + } + elements := []string{} + indices := []int{} + for i, e := range a { + if m[e] == 1 { + elements = append(elements, e) + indices = append(indices, i) + } + } + return elements, indices +} + +// PatienceDiff returns the patience diff of two slices of strings. +func PatienceDiff(a, b []string, equal func(a, b string) bool) []DiffLine { + switch { + case len(a) == 0 && len(b) == 0: + return nil + case len(a) == 0: + return toDiffLines(b, Insert) + case len(b) == 0: + return toDiffLines(a, Delete) + } + + // Find equal elements at the head of slices a and b. + i := 0 + for i < len(a) && i < len(b) && equal(a[i], b[i]) { + i++ + } + if i > 0 { + return append( + toDiffLines(a[:i], Equal), + PatienceDiff(a[i:], b[i:], equal)..., + ) + } + + // Find equal elements at the tail of slices a and b. + j := 0 + for j < len(a) && j < len(b) && equal(a[len(a)-1-j], b[len(b)-1-j]) { + j++ + } + if j > 0 { + return append( + PatienceDiff(a[:len(a)-j], b[:len(b)-j], equal), + toDiffLines(a[len(a)-j:], Equal)..., + ) + } + + // Find the longest common subsequence of unique elements in a and b. + ua, idxa := uniqueElements(a) + ub, idxb := uniqueElements(b) + lcs := LCS(ua, ub, equal) + + // If the LCS is empty, the diff is all deletions and insertions. + if len(lcs) == 0 { + return append(toDiffLines(a, Delete), toDiffLines(b, Insert)...) + } + + // Lookup the original indices of slices a and b. + for i, x := range lcs { + lcs[i][0] = idxa[x[0]] + lcs[i][1] = idxb[x[1]] + } + + diffs := []DiffLine{} + ga, gb := 0, 0 + for _, ip := range lcs { + // PatienceDiff the gaps between the lcs elements. + diffs = append(diffs, PatienceDiff(a[ga:ip[0]], b[gb:ip[1]], equal)...) + // Append the LCS elements to the diff. + diffs = append(diffs, DiffLine{Type: Equal, Text: a[ip[0]]}) + ga = ip[0] + 1 + gb = ip[1] + 1 + } + // PatienceDiff the remaining elements of a and b after the final LCS element. + diffs = append(diffs, PatienceDiff(a[ga:], b[gb:], equal)...) + + return diffs +}