aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/util/util.go
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-06-27 13:41:07 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-06-27 14:16:35 -0400
commitf795d47d4c0cef03a410fb1001bae6819f9ba41f (patch)
treeb311168aa52bda183affde023b9fc4c45ed6d880 /pkg/util/util.go
parent916430e5d3f33a24b13e188428d5335862472411 (diff)
Add Levenshtein distance util
Diffstat (limited to 'pkg/util/util.go')
-rw-r--r--pkg/util/util.go62
1 files changed, 62 insertions, 0 deletions
diff --git a/pkg/util/util.go b/pkg/util/util.go
index 577401e..b26b46a 100644
--- a/pkg/util/util.go
+++ b/pkg/util/util.go
@@ -2,6 +2,7 @@ package util
import (
"iter"
+ "math"
"time"
)
@@ -100,3 +101,64 @@ func BackwardsFilterIter[E any](s []E, cond func(e E) bool) iter.Seq2[int, E] {
}
}
}
+
+// A Levenshtein distance implementation based off of
+//
+// https://en.wikipedia.org/wiki/Levenshtein_distance#Iterative_with_full_matrix
+// PERF: more performant implementations exist
+func LevensteinDistance(s, t string) int {
+ m, n := len(s), len(t)
+ d := make([][]int, m+1)
+ for i := range m + 1 {
+ d[i] = make([]int, n+1)
+ }
+
+ for i := range m {
+ d[i+1][0] = i
+ }
+ for j := range n {
+ d[0][j+1] = j
+ }
+
+ var subCost int
+ for j := range n {
+ for i := range m {
+ if s[i] == t[j] {
+ subCost = 0
+ } else {
+ subCost = 1
+ }
+
+ del := d[i][j+1] + 1
+ insert := d[i+1][j] + 1
+ sub := d[i][j] + subCost
+ d[i+1][j+1] = min(del, insert, sub)
+ }
+ }
+
+ return d[m][n]
+}
+
+// Find nearest element of a slice using cmp, returns the found element and
+// if the distance is below ceil
+func Nearest[E any](candidate E, valid []E, cmp func(E, E) int, ceil int) (E, bool) {
+ minDistance := math.MaxInt
+ minIdx := -1
+ var d int
+ for i, e := range valid {
+ if sd := cmp(candidate, e); sd < 0 {
+ d = -sd
+ } else {
+ d = sd
+ }
+ if d < minDistance {
+ minDistance = d
+ minIdx = i
+ }
+ }
+
+ if minIdx < 0 {
+ return candidate, false
+ }
+ return valid[minIdx], minDistance < ceil
+}