feat(parser/diff): patience diff from rogpeppe/go-internal
This commit is contained in:
		
							parent
							
								
									e644b180a9
								
							
						
					
					
						commit
						a3a0d99be6
					
				|  | @ -1,10 +1,5 @@ | ||||||
| package diff | package diff | ||||||
| 
 | 
 | ||||||
| import ( |  | ||||||
| 	"fmt" |  | ||||||
| 	"strings" |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| // compareStrings compares two strings character by character, optionally ignoring whitespace.
 | // compareStrings compares two strings character by character, optionally ignoring whitespace.
 | ||||||
| func compareStrings(str1, str2 string, compareSpace bool) bool { | func compareStrings(str1, str2 string, compareSpace bool) bool { | ||||||
| 	if compareSpace { | 	if compareSpace { | ||||||
|  | @ -49,33 +44,3 @@ func isWhitespace(b byte) bool { | ||||||
| 		b == 0x85 || | 		b == 0x85 || | ||||||
| 		b == 0xA0 | 		b == 0xA0 | ||||||
| } | } | ||||||
| 
 |  | ||||||
| func formatDiff(oldList []string, newList []string, ops []Op[string]) string { |  | ||||||
| 	var result []string |  | ||||||
| 	i, j := 0, 0 |  | ||||||
| 	for _, op := range ops { |  | ||||||
| 		if op.OpType == OpDelete { |  | ||||||
| 			for i < op.OldPos { |  | ||||||
| 				result = append(result, "  "+oldList[i]) |  | ||||||
| 				i++ |  | ||||||
| 				j++ |  | ||||||
| 			} |  | ||||||
| 			result = append(result, "- "+fmt.Sprint(op.Elem)) |  | ||||||
| 			i++ |  | ||||||
| 		} else if op.OpType == OpInsert { |  | ||||||
| 			for j < op.NewPos { |  | ||||||
| 				result = append(result, "  "+newList[j]) |  | ||||||
| 				i++ |  | ||||||
| 				j++ |  | ||||||
| 			} |  | ||||||
| 			result = append(result, "+ "+fmt.Sprint(op.Elem)) |  | ||||||
| 			j++ |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	for i < len(oldList) && j < len(newList) { |  | ||||||
| 		result = append(result, "  "+oldList[i]) |  | ||||||
| 		i++ |  | ||||||
| 		j++ |  | ||||||
| 	} |  | ||||||
| 	return strings.Join(result, "\n") |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  | @ -1,134 +0,0 @@ | ||||||
| package diff |  | ||||||
| 
 |  | ||||||
| // source: https://github.com/MFAshby/myers
 |  | ||||||
| // Myer's diff algorithm in golang
 |  | ||||||
| // Ported from https://blog.robertelder.org/diff-algorithm/
 |  | ||||||
| 
 |  | ||||||
| type OpType int |  | ||||||
| 
 |  | ||||||
| const ( |  | ||||||
| 	OpInsert OpType = iota |  | ||||||
| 	OpDelete |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| type Op[T any] struct { |  | ||||||
| 	OpType OpType // Insert or delete, as above
 |  | ||||||
| 	OldPos int    // Position in the old list of item to be inserted or deleted
 |  | ||||||
| 	NewPos int    // Position in the _new_ list of item to be inserted
 |  | ||||||
| 	Elem   T      // Actual value to be inserted or deleted
 |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Returns a minimal list of differences between 2 lists e and f
 |  | ||||||
| // requiring O(min(len(e),len(f))) space and O(min(len(e),len(f)) * D)
 |  | ||||||
| // worst-case execution time where D is the number of differences.
 |  | ||||||
| func myersDiff[T any](e, f []T, equals func(T, T) bool) []Op[T] { |  | ||||||
| 	return diffInternal(e, f, equals, 0, 0) |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func diffInternal[T any](e, f []T, equals func(T, T) bool, i, j int) []Op[T] { |  | ||||||
| 	N := len(e) |  | ||||||
| 	M := len(f) |  | ||||||
| 	L := N + M |  | ||||||
| 	Z := 2*min(N, M) + 2 |  | ||||||
| 	switch { |  | ||||||
| 	case N > 0 && M > 0: |  | ||||||
| 		w := N - M |  | ||||||
| 		g := make([]int, Z) |  | ||||||
| 		p := make([]int, Z) |  | ||||||
| 
 |  | ||||||
| 		hMax := L/2 + L%2 + 1 |  | ||||||
| 		for h := range hMax { |  | ||||||
| 			for r := range 2 { |  | ||||||
| 				var c, d []int |  | ||||||
| 				var o, m int |  | ||||||
| 				if r == 0 { |  | ||||||
| 					c = g |  | ||||||
| 					d = p |  | ||||||
| 					o = 1 |  | ||||||
| 					m = 1 |  | ||||||
| 				} else { |  | ||||||
| 					c = p |  | ||||||
| 					d = g |  | ||||||
| 					o = 0 |  | ||||||
| 					m = -1 |  | ||||||
| 				} |  | ||||||
| 				kMin := -(h - 2*max(0, h-M)) |  | ||||||
| 				kMax := h - 2*max(0, h-N) + 1 |  | ||||||
| 				for k := kMin; k < kMax; k += 2 { |  | ||||||
| 					var a int |  | ||||||
| 					if k == -h || k != h && c[pyMod((k-1), Z)] < c[pyMod((k+1), Z)] { |  | ||||||
| 						a = c[pyMod((k+1), Z)] |  | ||||||
| 					} else { |  | ||||||
| 						a = c[pyMod((k-1), Z)] + 1 |  | ||||||
| 					} |  | ||||||
| 					b := a - k |  | ||||||
| 					s, t := a, b |  | ||||||
| 
 |  | ||||||
| 					for a < N && b < M && equals(e[(1-o)*N+m*a+(o-1)], f[(1-o)*M+m*b+(o-1)]) { |  | ||||||
| 						a, b = a+1, b+1 |  | ||||||
| 					} |  | ||||||
| 					c[pyMod(k, Z)] = a |  | ||||||
| 					z := -(k - w) |  | ||||||
| 					if pyMod(L, 2) == o && z >= -(h-o) && z <= h-o && c[pyMod(k, Z)]+d[pyMod(z, Z)] >= N { |  | ||||||
| 						var D, x, y, u, v int |  | ||||||
| 						if o == 1 { |  | ||||||
| 							D = 2*h - 1 |  | ||||||
| 							x = s |  | ||||||
| 							y = t |  | ||||||
| 							u = a |  | ||||||
| 							v = b |  | ||||||
| 						} else { |  | ||||||
| 							D = 2 * h |  | ||||||
| 							x = N - a |  | ||||||
| 							y = M - b |  | ||||||
| 							u = N - s |  | ||||||
| 							v = M - t |  | ||||||
| 						} |  | ||||||
| 						switch { |  | ||||||
| 						case D > 1 || (x != u && y != v): |  | ||||||
| 							return append(diffInternal(e[0:x], f[0:y], equals, i, j), diffInternal(e[u:N], f[v:M], equals, i+u, j+v)...) |  | ||||||
| 						case M > N: |  | ||||||
| 							return diffInternal(make([]T, 0), f[N:M], equals, i+N, j+N) |  | ||||||
| 						case M < N: |  | ||||||
| 							return diffInternal(e[M:N], make([]T, 0), equals, i+M, j+M) |  | ||||||
| 						default: |  | ||||||
| 							return make([]Op[T], 0) |  | ||||||
| 						} |  | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	case N > 0: |  | ||||||
| 		res := make([]Op[T], N) |  | ||||||
| 		for n := range N { |  | ||||||
| 			res[n] = Op[T]{OpDelete, i + n, -1, e[n]} |  | ||||||
| 		} |  | ||||||
| 		return res |  | ||||||
| 	default: |  | ||||||
| 		res := make([]Op[T], M) |  | ||||||
| 		for n := range M { |  | ||||||
| 			res[n] = Op[T]{OpInsert, i, j + n, f[n]} |  | ||||||
| 		} |  | ||||||
| 		return res |  | ||||||
| 	} |  | ||||||
| 	panic("Should never hit this!") |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /** |  | ||||||
|  * The remainder op in python always matches the sign of the _denominator_ |  | ||||||
|  * e.g -1%3 = 2. |  | ||||||
|  * In golang it matches the sign of the numerator. |  | ||||||
|  * See https://en.wikipedia.org/wiki/Modulo_operation#Variants_of_the_definition
 |  | ||||||
|  */ |  | ||||||
| func pyMod(x, y int) int { |  | ||||||
| 	return (x%y + y) % y |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| // Let us map element in same way as in
 |  | ||||||
| 
 |  | ||||||
| // Convenient wrapper for string lists
 |  | ||||||
| func myersDiffStr(e, f []string, compareSpace bool) []Op[string] { |  | ||||||
| 	return myersDiff[string](e, f, func(s1, s2 string) bool { |  | ||||||
| 		return compareStrings(s1, s2, compareSpace) |  | ||||||
| 	}) |  | ||||||
| } |  | ||||||
|  | @ -1,49 +0,0 @@ | ||||||
| package diff |  | ||||||
| 
 |  | ||||||
| import ( |  | ||||||
| 	"reflect" |  | ||||||
| 	t "testing" |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| type TestCase struct { |  | ||||||
| 	l1  []string |  | ||||||
| 	l2  []string |  | ||||||
| 	exp []Op[string] |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| func TestDiff(t *t.T) { |  | ||||||
| 	A := "A" |  | ||||||
| 	B := "B" |  | ||||||
| 	C := "C" |  | ||||||
| 	testCases := []TestCase{ |  | ||||||
| 		{[]string{}, []string{}, []Op[string]{}}, |  | ||||||
| 		{[]string{}, []string{"foo"}, []Op[string]{{OpInsert, 0, 0, "foo"}}}, |  | ||||||
| 		{[]string{"foo"}, []string{}, []Op[string]{{OpDelete, 0, -1, "foo"}}}, |  | ||||||
| 		{[]string{"foo", "bar", "baz"}, []string{"foo", "bar", "baz"}, []Op[string]{}}, |  | ||||||
| 		{[]string{"foo", "bar", "baz"}, []string{"foo", "baz"}, []Op[string]{{OpDelete, 1, -1, "bar"}}}, |  | ||||||
| 		{[]string{"baz"}, []string{"foo", "baz"}, []Op[string]{{OpInsert, 0, 0, "foo"}}}, |  | ||||||
| 		{[]string{"bar", "baz"}, []string{"foo", "baz"}, []Op[string]{{OpDelete, 0, -1, "bar"}, {OpInsert, 1, 0, "foo"}}}, |  | ||||||
| 		{[]string{"foo", "bar", "baz"}, []string{"foo", "bar"}, []Op[string]{{OpDelete, 2, -1, "baz"}}}, |  | ||||||
| 		{ |  | ||||||
| 			[]string{A, B, C, A, B, B, A}, |  | ||||||
| 			[]string{C, B, A, B, A, C}, |  | ||||||
| 			[]Op[string]{{OpDelete, 0, -1, A}, {OpInsert, 1, 0, C}, {OpDelete, 2, -1, C}, {OpDelete, 5, -1, B}, {OpInsert, 7, 5, C}}, |  | ||||||
| 		}, |  | ||||||
| 		{ |  | ||||||
| 			[]string{C, A, B, A, B, A, B, A, B, A, B, A, B, C}, |  | ||||||
| 			[]string{B, A, B, A, B, A, B, A, B, A, B, A, B, A}, |  | ||||||
| 			[]Op[string]{{OpDelete, 0, -1, C}, {OpInsert, 1, 0, B}, {OpDelete, 13, -1, C}, {OpInsert, 14, 13, A}}, |  | ||||||
| 		}, |  | ||||||
| 		{ |  | ||||||
| 			[]string{B}, |  | ||||||
| 			[]string{A, B, C, B, A}, |  | ||||||
| 			[]Op[string]{{OpInsert, 0, 0, A}, {OpInsert, 0, 1, B}, {OpInsert, 0, 2, C}, {OpInsert, 1, 4, A}}, |  | ||||||
| 		}, |  | ||||||
| 	} |  | ||||||
| 	for _, c := range testCases { |  | ||||||
| 		act := myersDiffStr(c.l1, c.l2, true) |  | ||||||
| 		if !reflect.DeepEqual(c.exp, act) { |  | ||||||
| 			t.Errorf("Failed diff, expected %v actual %v\n", c.exp, act) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  | @ -80,16 +80,8 @@ func (*Diff) Run(results []stage.ExecutorResult, confAny any) ( | ||||||
| 							resultStr = resultStr[:output.MaxDiffLength] | 							resultStr = resultStr[:output.MaxDiffLength] | ||||||
| 							truncated = true | 							truncated = true | ||||||
| 						} | 						} | ||||||
| 						answerLines := strings.Split(answerStr, "\n") | 						diffOutput := patienceDiff( | ||||||
| 						resultLines := strings.Split(resultStr, "\n") | 							answerStr, resultStr, output.CompareSpace, | ||||||
| 						// Generate Myers diff
 |  | ||||||
| 						diffOps := myersDiffStr(answerLines, resultLines, |  | ||||||
| 							output.CompareSpace) |  | ||||||
| 						// Generate diff block with surrounding context
 |  | ||||||
| 						diffOutput := formatDiff( |  | ||||||
| 							answerLines, |  | ||||||
| 							resultLines, |  | ||||||
| 							diffOps, |  | ||||||
| 						) | 						) | ||||||
| 						diffOutput = strings.TrimSuffix(diffOutput, "\n  ") | 						diffOutput = strings.TrimSuffix(diffOutput, "\n  ") | ||||||
| 						if truncated { | 						if truncated { | ||||||
|  |  | ||||||
							
								
								
									
										239
									
								
								internal/parser/diff/patience.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										239
									
								
								internal/parser/diff/patience.go
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,239 @@ | ||||||
|  | // Copyright 2022 The Go Authors. All rights reserved.
 | ||||||
|  | // Use of this source code is governed by a BSD-style
 | ||||||
|  | // license that can be found in the LICENSE file.
 | ||||||
|  | 
 | ||||||
|  | package diff | ||||||
|  | 
 | ||||||
|  | // modified from https://github.com/rogpeppe/go-internal/blob/master/diff/diff.go
 | ||||||
|  | 
 | ||||||
|  | import ( | ||||||
|  | 	"bytes" | ||||||
|  | 	"sort" | ||||||
|  | 	"strings" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | // A pair is a pair of values tracked for both the x and y side of a diff.
 | ||||||
|  | // It is typically a pair of line indexes.
 | ||||||
|  | type pair struct{ x, y int } | ||||||
|  | 
 | ||||||
|  | // Diff returns an anchored diff of the two texts old and new
 | ||||||
|  | // in the “unified diff” format. If old and new are identical,
 | ||||||
|  | // Diff returns a nil slice (no output).
 | ||||||
|  | //
 | ||||||
|  | // Unix diff implementations typically look for a diff with
 | ||||||
|  | // the smallest number of lines inserted and removed,
 | ||||||
|  | // which can in the worst case take time quadratic in the
 | ||||||
|  | // number of lines in the texts. As a result, many implementations
 | ||||||
|  | // either can be made to run for a long time or cut off the search
 | ||||||
|  | // after a predetermined amount of work.
 | ||||||
|  | //
 | ||||||
|  | // In contrast, this implementation looks for a diff with the
 | ||||||
|  | // smallest number of “unique” lines inserted and removed,
 | ||||||
|  | // where unique means a line that appears just once in both old and new.
 | ||||||
|  | // We call this an “anchored diff” because the unique lines anchor
 | ||||||
|  | // the chosen matching regions. An anchored diff is usually clearer
 | ||||||
|  | // than a standard diff, because the algorithm does not try to
 | ||||||
|  | // reuse unrelated blank lines or closing braces.
 | ||||||
|  | // The algorithm also guarantees to run in O(n log n) time
 | ||||||
|  | // instead of the standard O(n²) time.
 | ||||||
|  | //
 | ||||||
|  | // Some systems call this approach a “patience diff,” named for
 | ||||||
|  | // the “patience sorting” algorithm, itself named for a solitaire card game.
 | ||||||
|  | // We avoid that name for two reasons. First, the name has been used
 | ||||||
|  | // for a few different variants of the algorithm, so it is imprecise.
 | ||||||
|  | // Second, the name is frequently interpreted as meaning that you have
 | ||||||
|  | // to wait longer (to be patient) for the diff, meaning that it is a slower algorithm,
 | ||||||
|  | // when in fact the algorithm is faster than the standard one.
 | ||||||
|  | func patienceDiff(old, new string, compareSpace bool) string { | ||||||
|  | 	x := strings.SplitAfter(old, "\n") | ||||||
|  | 	y := strings.SplitAfter(new, "\n") | ||||||
|  | 
 | ||||||
|  | 	// Print diff header.
 | ||||||
|  | 	var out bytes.Buffer | ||||||
|  | 
 | ||||||
|  | 	// Loop over matches to consider,
 | ||||||
|  | 	// expanding each match to include surrounding lines,
 | ||||||
|  | 	// and then printing diff chunks.
 | ||||||
|  | 	// To avoid setup/teardown cases outside the loop,
 | ||||||
|  | 	// tgs returns a leading {0,0} and trailing {len(x), len(y)} pair
 | ||||||
|  | 	// in the sequence of matches.
 | ||||||
|  | 	var ( | ||||||
|  | 		done  pair     // printed up to x[:done.x] and y[:done.y]
 | ||||||
|  | 		chunk pair     // start lines of current chunk
 | ||||||
|  | 		count pair     // number of lines from each side in current chunk
 | ||||||
|  | 		ctext []string // lines for current chunk
 | ||||||
|  | 	) | ||||||
|  | 	for _, m := range tgs(x, y) { | ||||||
|  | 		if m.x < done.x { | ||||||
|  | 			// Already handled scanning forward from earlier match.
 | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		// Expand matching lines as far possible,
 | ||||||
|  | 		// establishing that x[start.x:end.x] == y[start.y:end.y].
 | ||||||
|  | 		// Note that on the first (or last) iteration we may (or definitely do)
 | ||||||
|  | 		// have an empty match: start.x==end.x and start.y==end.y.
 | ||||||
|  | 		start := m | ||||||
|  | 		for start.x > done.x && start.y > done.y && compareStrings(x[start.x-1], y[start.y-1], compareSpace) { | ||||||
|  | 			start.x-- | ||||||
|  | 			start.y-- | ||||||
|  | 		} | ||||||
|  | 		end := m | ||||||
|  | 		for end.x < len(x) && end.y < len(y) && compareStrings(x[end.x], y[end.y], compareSpace) { | ||||||
|  | 			end.x++ | ||||||
|  | 			end.y++ | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		// Emit the mismatched lines before start into this chunk.
 | ||||||
|  | 		// (No effect on first sentinel iteration, when start = {0,0}.)
 | ||||||
|  | 		for _, s := range x[done.x:start.x] { | ||||||
|  | 			ctext = append(ctext, "- "+s) | ||||||
|  | 			count.x++ | ||||||
|  | 		} | ||||||
|  | 		for _, s := range y[done.y:start.y] { | ||||||
|  | 			ctext = append(ctext, "+ "+s) | ||||||
|  | 			count.y++ | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		// If we're not at EOF and have too few common lines,
 | ||||||
|  | 		// the chunk includes all the common lines and continues.
 | ||||||
|  | 		const C = 3 // number of context lines
 | ||||||
|  | 		if (end.x < len(x) || end.y < len(y)) && | ||||||
|  | 			(end.x-start.x < C || (len(ctext) > 0 && end.x-start.x < 2*C)) { | ||||||
|  | 			for _, s := range x[start.x:end.x] { | ||||||
|  | 				ctext = append(ctext, "  "+s) | ||||||
|  | 				count.x++ | ||||||
|  | 				count.y++ | ||||||
|  | 			} | ||||||
|  | 			done = end | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		// End chunk with common lines for context.
 | ||||||
|  | 		if len(ctext) > 0 { | ||||||
|  | 			n := min(end.x-start.x, C) | ||||||
|  | 			for _, s := range x[start.x : start.x+n] { | ||||||
|  | 				ctext = append(ctext, "  "+s) | ||||||
|  | 				count.x++ | ||||||
|  | 				count.y++ | ||||||
|  | 			} | ||||||
|  | 			done = pair{start.x + n, start.y + n} | ||||||
|  | 
 | ||||||
|  | 			// Format and emit chunk.
 | ||||||
|  | 			// Convert line numbers to 1-indexed.
 | ||||||
|  | 			// Special case: empty file shows up as 0,0 not 1,0.
 | ||||||
|  | 			if count.x > 0 { | ||||||
|  | 				chunk.x++ | ||||||
|  | 			} | ||||||
|  | 			if count.y > 0 { | ||||||
|  | 				chunk.y++ | ||||||
|  | 			} | ||||||
|  | 			// We do not need this line
 | ||||||
|  | 			// fmt.Fprintf(&out, "@@ -%d,%d +%d,%d @@\n", chunk.x, count.x, chunk.y, count.y)
 | ||||||
|  | 			for _, s := range ctext { | ||||||
|  | 				out.WriteString(s) | ||||||
|  | 			} | ||||||
|  | 			count.x = 0 | ||||||
|  | 			count.y = 0 | ||||||
|  | 			ctext = ctext[:0] | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		// If we reached EOF, we're done.
 | ||||||
|  | 		if end.x >= len(x) && end.y >= len(y) { | ||||||
|  | 			break | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		// Otherwise start a new chunk.
 | ||||||
|  | 		chunk = pair{end.x - C, end.y - C} | ||||||
|  | 		for _, s := range x[chunk.x:end.x] { | ||||||
|  | 			ctext = append(ctext, "  "+s) | ||||||
|  | 			count.x++ | ||||||
|  | 			count.y++ | ||||||
|  | 		} | ||||||
|  | 		done = end | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return out.String() | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | // tgs returns the pairs of indexes of the longest common subsequence
 | ||||||
|  | // of unique lines in x and y, where a unique line is one that appears
 | ||||||
|  | // once in x and once in y.
 | ||||||
|  | //
 | ||||||
|  | // The longest common subsequence algorithm is as described in
 | ||||||
|  | // Thomas G. Szymanski, “A Special Case of the Maximal Common
 | ||||||
|  | // Subsequence Problem,” Princeton TR #170 (January 1975),
 | ||||||
|  | // available at https://research.swtch.com/tgs170.pdf.
 | ||||||
|  | func tgs(x, y []string) []pair { | ||||||
|  | 	// Count the number of times each string appears in a and b.
 | ||||||
|  | 	// We only care about 0, 1, many, counted as 0, -1, -2
 | ||||||
|  | 	// for the x side and 0, -4, -8 for the y side.
 | ||||||
|  | 	// Using negative numbers now lets us distinguish positive line numbers later.
 | ||||||
|  | 	m := make(map[string]int) | ||||||
|  | 	for _, s := range x { | ||||||
|  | 		if c := m[s]; c > -2 { | ||||||
|  | 			m[s] = c - 1 | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	for _, s := range y { | ||||||
|  | 		if c := m[s]; c > -8 { | ||||||
|  | 			m[s] = c - 4 | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Now unique strings can be identified by m[s] = -1+-4.
 | ||||||
|  | 	//
 | ||||||
|  | 	// Gather the indexes of those strings in x and y, building:
 | ||||||
|  | 	//	xi[i] = increasing indexes of unique strings in x.
 | ||||||
|  | 	//	yi[i] = increasing indexes of unique strings in y.
 | ||||||
|  | 	//	inv[i] = index j such that x[xi[i]] = y[yi[j]].
 | ||||||
|  | 	var xi, yi, inv []int | ||||||
|  | 	for i, s := range y { | ||||||
|  | 		if m[s] == -1+-4 { | ||||||
|  | 			m[s] = len(yi) | ||||||
|  | 			yi = append(yi, i) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	for i, s := range x { | ||||||
|  | 		if j, ok := m[s]; ok && j >= 0 { | ||||||
|  | 			xi = append(xi, i) | ||||||
|  | 			inv = append(inv, j) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	// Apply Algorithm A from Szymanski's paper.
 | ||||||
|  | 	// In those terms, A = J = inv and B = [0, n).
 | ||||||
|  | 	// We add sentinel pairs {0,0}, and {len(x),len(y)}
 | ||||||
|  | 	// to the returned sequence, to help the processing loop.
 | ||||||
|  | 	J := inv | ||||||
|  | 	n := len(xi) | ||||||
|  | 	T := make([]int, n) | ||||||
|  | 	L := make([]int, n) | ||||||
|  | 	for i := range T { | ||||||
|  | 		T[i] = n + 1 | ||||||
|  | 	} | ||||||
|  | 	for i := range n { | ||||||
|  | 		k := sort.Search(n, func(k int) bool { | ||||||
|  | 			return T[k] >= J[i] | ||||||
|  | 		}) | ||||||
|  | 		T[k] = J[i] | ||||||
|  | 		L[i] = k + 1 | ||||||
|  | 	} | ||||||
|  | 	k := 0 | ||||||
|  | 	for _, v := range L { | ||||||
|  | 		if k < v { | ||||||
|  | 			k = v | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	seq := make([]pair, 2+k) | ||||||
|  | 	seq[1+k] = pair{len(x), len(y)} // sentinel at end
 | ||||||
|  | 	lastj := n | ||||||
|  | 	for i := n - 1; i >= 0; i-- { | ||||||
|  | 		if L[i] == k && J[i] < lastj { | ||||||
|  | 			seq[k] = pair{xi[i], yi[J[i]]} | ||||||
|  | 			k-- | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	seq[0] = pair{0, 0} // sentinel at start
 | ||||||
|  | 	return seq | ||||||
|  | } | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user