JOJ3/internal/parser/diff/patience.go
张泊明518370910136 29c05f4b36
All checks were successful
submodules sync / sync (push) Successful in 1m0s
build / build (push) Successful in 2m18s
build / trigger-build-image (push) Successful in 9s
refactor(parser/diff): rune based strings compare
2025-03-28 08:42:30 -04:00

225 lines
5.0 KiB
Go

package diff
// modified from https://github.com/peter-evans/patience
import (
"fmt"
"strings"
"unicode"
)
// stringsEqual compares two strings character by character, optionally ignoring whitespace.
func stringsEqual(str1, str2 string, compareSpace bool) bool {
if compareSpace {
return str1 == str2
}
runes1 := []rune(str1)
runes2 := []rune(str2)
var i, j, l1, l2 int
l1 = len(runes1)
l2 = len(runes2)
for i < l1 && j < l2 {
for i < l1 && unicode.IsSpace(runes1[i]) {
i++
}
for j < l2 && unicode.IsSpace(runes2[j]) {
j++
}
if i >= l1 || j >= l2 {
break
}
if runes1[i] != runes2[j] {
return false
}
i++
j++
}
for i < l1 && unicode.IsSpace(runes1[i]) {
i++
}
for j < l2 && unicode.IsSpace(runes2[j]) {
j++
}
return i == l1 && j == l2
}
// DiffType defines the type of a diff element.
type DiffType int8
const (
// Delete represents a diff delete operation.
Delete DiffType = -1
// Insert represents a diff insert operation.
Insert DiffType = 1
// Equal represents no diff.
Equal DiffType = 0
)
// DiffLine represents a single line and its diff type.
type DiffLine struct {
Text string
Type DiffType
}
// typeSymbol returns the associated symbol of a DiffType.
func typeSymbol(t DiffType) string {
switch t {
case Equal:
return " "
case Insert:
return "+ "
case Delete:
return "- "
default:
panic("unknown DiffType")
}
}
// diffText returns the source and destination texts (all equalities, insertions and deletions).
func diffText(diffs []DiffLine) string {
s := make([]string, len(diffs))
for i, l := range diffs {
s[i] = fmt.Sprintf("%s%s", typeSymbol(l.Type), l.Text)
}
return strings.Join(s, "\n")
}
// LCS computes the longest common subsequence of two string
// slices and returns the index pairs of the LCS.
func LCS(a, b []string, equal func(a, b string) bool) [][2]int {
// Initialize the LCS table.
lcs := make([][]int, len(a)+1)
for i := 0; i <= len(a); i++ {
lcs[i] = make([]int, len(b)+1)
}
// Populate the LCS table.
for i := 1; i < len(lcs); i++ {
for j := 1; j < len(lcs[i]); j++ {
if equal(a[i-1], b[j-1]) {
lcs[i][j] = lcs[i-1][j-1] + 1
} else {
lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1])
}
}
}
// Backtrack to find the LCS.
i, j := len(a), len(b)
s := make([][2]int, 0, lcs[i][j])
for i > 0 && j > 0 {
switch {
case equal(a[i-1], b[j-1]):
s = append(s, [2]int{i - 1, j - 1})
i--
j--
case lcs[i-1][j] > lcs[i][j-1]:
i--
default:
j--
}
}
// Reverse the backtracked LCS.
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
s[i], s[j] = s[j], s[i]
}
return s
}
// toDiffLines is a convenience function to convert a slice of strings
// to a slice of DiffLines with the specified diff type.
func toDiffLines(a []string, t DiffType) []DiffLine {
diffs := make([]DiffLine, len(a))
for i, l := range a {
diffs[i] = DiffLine{l, t}
}
return diffs
}
// uniqueElements returns a slice of unique elements from a slice of
// strings, and a slice of the original indices of each element.
func uniqueElements(a []string) ([]string, []int) {
m := make(map[string]int)
for _, e := range a {
m[e]++
}
elements := []string{}
indices := []int{}
for i, e := range a {
if m[e] == 1 {
elements = append(elements, e)
indices = append(indices, i)
}
}
return elements, indices
}
// patienceDiff returns the patience diff of two slices of strings.
func patienceDiff(a, b []string, equal func(a, b string) bool) []DiffLine {
switch {
case len(a) == 0 && len(b) == 0:
return nil
case len(a) == 0:
return toDiffLines(b, Insert)
case len(b) == 0:
return toDiffLines(a, Delete)
}
// Find equal elements at the head of slices a and b.
i := 0
for i < len(a) && i < len(b) && equal(a[i], b[i]) {
i++
}
if i > 0 {
return append(
toDiffLines(a[:i], Equal),
patienceDiff(a[i:], b[i:], equal)...,
)
}
// Find equal elements at the tail of slices a and b.
j := 0
for j < len(a) && j < len(b) && equal(a[len(a)-1-j], b[len(b)-1-j]) {
j++
}
if j > 0 {
return append(
patienceDiff(a[:len(a)-j], b[:len(b)-j], equal),
toDiffLines(a[len(a)-j:], Equal)...,
)
}
// Find the longest common subsequence of unique elements in a and b.
ua, idxa := uniqueElements(a)
ub, idxb := uniqueElements(b)
lcs := LCS(ua, ub, equal)
// If the LCS is empty, the diff is all deletions and insertions.
if len(lcs) == 0 {
return append(toDiffLines(a, Delete), toDiffLines(b, Insert)...)
}
// Lookup the original indices of slices a and b.
for i, x := range lcs {
lcs[i][0] = idxa[x[0]]
lcs[i][1] = idxb[x[1]]
}
diffs := []DiffLine{}
ga, gb := 0, 0
for _, ip := range lcs {
// PatienceDiff the gaps between the lcs elements.
diffs = append(diffs, patienceDiff(a[ga:ip[0]], b[gb:ip[1]], equal)...)
// Append the LCS elements to the diff.
diffs = append(diffs, DiffLine{Type: Equal, Text: a[ip[0]]})
ga = ip[0] + 1
gb = ip[1] + 1
}
// PatienceDiff the remaining elements of a and b after the final LCS element.
diffs = append(diffs, patienceDiff(a[ga:], b[gb:], equal)...)
return diffs
}