From 29c05f4b36f92cf38d66aeb8e4fb06280d65badc Mon Sep 17 00:00:00 2001 From: Boming Zhang Date: Fri, 28 Mar 2025 08:41:08 -0400 Subject: [PATCH] refactor(parser/diff): rune based strings compare --- internal/parser/diff/diff.go | 46 ---------------------------- internal/parser/diff/parser.go | 8 ++--- internal/parser/diff/patience.go | 52 +++++++++++++++++++++++++++----- 3 files changed, 48 insertions(+), 58 deletions(-) delete mode 100644 internal/parser/diff/diff.go diff --git a/internal/parser/diff/diff.go b/internal/parser/diff/diff.go deleted file mode 100644 index 25f0695..0000000 --- a/internal/parser/diff/diff.go +++ /dev/null @@ -1,46 +0,0 @@ -package diff - -// compareStrings compares two strings character by character, optionally ignoring whitespace. -func compareStrings(str1, str2 string, compareSpace bool) bool { - if compareSpace { - return str1 == str2 - } - var i, j int - l1 := len(str1) - l2 := len(str2) - for i < l1 && j < l2 { - for i < l1 && isWhitespace(str1[i]) { - i++ - } - for j < l2 && isWhitespace(str2[j]) { - j++ - } - if i < l1 && j < l2 && str1[i] != str2[j] { - return false - } - if i < l1 { - i++ - } - if j < l2 { - j++ - } - } - for i < l1 && isWhitespace(str1[i]) { - i++ - } - for j < l2 && isWhitespace(str2[j]) { - j++ - } - return i == l1 && j == l2 -} - -func isWhitespace(b byte) bool { - return b == ' ' || - b == '\t' || - b == '\n' || - b == '\r' || - b == '\v' || - b == '\f' || - b == 0x85 || - b == 0xA0 -} diff --git a/internal/parser/diff/parser.go b/internal/parser/diff/parser.go index cd9cee9..b1a98d1 100644 --- a/internal/parser/diff/parser.go +++ b/internal/parser/diff/parser.go @@ -39,7 +39,7 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( if err != nil { return nil, true, err } - isSame := compareStrings( + isSame := stringsEqual( string(answer), result.Files[output.FileName], output.CompareSpace, @@ -82,13 +82,13 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( } answerLines := strings.Split(answerStr, "\n") resultLines := strings.Split(resultStr, "\n") - diffs := PatienceDiff( + diffs := patienceDiff( answerLines, resultLines, func(a, b string) bool { - return compareStrings(a, b, output.CompareSpace) + return stringsEqual(a, b, output.CompareSpace) }) - diffOutput := DiffText(diffs) + diffOutput := diffText(diffs) diffOutput = strings.TrimSuffix(diffOutput, "\n ") if truncated { diffOutput += "\n\n(truncated)" diff --git a/internal/parser/diff/patience.go b/internal/parser/diff/patience.go index 99f3df1..4d507a1 100644 --- a/internal/parser/diff/patience.go +++ b/internal/parser/diff/patience.go @@ -5,8 +5,44 @@ package diff import ( "fmt" "strings" + "unicode" ) +// stringsEqual compares two strings character by character, optionally ignoring whitespace. +func stringsEqual(str1, str2 string, compareSpace bool) bool { + if compareSpace { + return str1 == str2 + } + runes1 := []rune(str1) + runes2 := []rune(str2) + var i, j, l1, l2 int + l1 = len(runes1) + l2 = len(runes2) + for i < l1 && j < l2 { + for i < l1 && unicode.IsSpace(runes1[i]) { + i++ + } + for j < l2 && unicode.IsSpace(runes2[j]) { + j++ + } + if i >= l1 || j >= l2 { + break + } + if runes1[i] != runes2[j] { + return false + } + i++ + j++ + } + for i < l1 && unicode.IsSpace(runes1[i]) { + i++ + } + for j < l2 && unicode.IsSpace(runes2[j]) { + j++ + } + return i == l1 && j == l2 +} + // DiffType defines the type of a diff element. type DiffType int8 @@ -39,8 +75,8 @@ func typeSymbol(t DiffType) string { } } -// DiffText returns the source and destination texts (all equalities, insertions and deletions). -func DiffText(diffs []DiffLine) string { +// diffText returns the source and destination texts (all equalities, insertions and deletions). +func diffText(diffs []DiffLine) string { s := make([]string, len(diffs)) for i, l := range diffs { s[i] = fmt.Sprintf("%s%s", typeSymbol(l.Type), l.Text) @@ -120,8 +156,8 @@ func uniqueElements(a []string) ([]string, []int) { return elements, indices } -// PatienceDiff returns the patience diff of two slices of strings. -func PatienceDiff(a, b []string, equal func(a, b string) bool) []DiffLine { +// patienceDiff returns the patience diff of two slices of strings. +func patienceDiff(a, b []string, equal func(a, b string) bool) []DiffLine { switch { case len(a) == 0 && len(b) == 0: return nil @@ -139,7 +175,7 @@ func PatienceDiff(a, b []string, equal func(a, b string) bool) []DiffLine { if i > 0 { return append( toDiffLines(a[:i], Equal), - PatienceDiff(a[i:], b[i:], equal)..., + patienceDiff(a[i:], b[i:], equal)..., ) } @@ -150,7 +186,7 @@ func PatienceDiff(a, b []string, equal func(a, b string) bool) []DiffLine { } if j > 0 { return append( - PatienceDiff(a[:len(a)-j], b[:len(b)-j], equal), + patienceDiff(a[:len(a)-j], b[:len(b)-j], equal), toDiffLines(a[len(a)-j:], Equal)..., ) } @@ -175,14 +211,14 @@ func PatienceDiff(a, b []string, equal func(a, b string) bool) []DiffLine { ga, gb := 0, 0 for _, ip := range lcs { // PatienceDiff the gaps between the lcs elements. - diffs = append(diffs, PatienceDiff(a[ga:ip[0]], b[gb:ip[1]], equal)...) + diffs = append(diffs, patienceDiff(a[ga:ip[0]], b[gb:ip[1]], equal)...) // Append the LCS elements to the diff. diffs = append(diffs, DiffLine{Type: Equal, Text: a[ip[0]]}) ga = ip[0] + 1 gb = ip[1] + 1 } // PatienceDiff the remaining elements of a and b after the final LCS element. - diffs = append(diffs, PatienceDiff(a[ga:], b[gb:], equal)...) + diffs = append(diffs, patienceDiff(a[ga:], b[gb:], equal)...) return diffs }