Go 實現字符串類似度計算函數 Levenshtein 和 SimilarText

【轉】http://www.syyong.com/Go/Go-implements-the-string-similarity-calculation-function-Levenshtein-and-SimilarText.htmlphp

levenshtein() 和 similar_text() 是 PHP 內置的兩個字符串類似度計算函數。Levenshtein 計算兩個字符串之間的編輯距離,SimilarText 計算兩個字符串的類似度。下面使用Go分別實現兩者。html

Levenshtein

// levenshtein() // costIns: Defines the cost of insertion. // costRep: Defines the cost of replacement. // costDel: Defines the cost of deletion.
func Levenshtein(str1, str2 string, costIns, costRep, costDel int) int { var maxLen = 255 l1 := len(str1) l2 := len(str2) if l1 == 0 { return l2 * costIns } if l2 == 0 { return l1 * costDel } if l1 > maxLen || l2 > maxLen { return -1 } tmp := make([]int, l2+1) p1 := make([]int, l2+1) p2 := make([]int, l2+1) var c0, c1, c2 int
    var i1, i2 int
    for i2 := 0; i2 <= l2; i2++ { p1[i2] = i2 * costIns } for i1 = 0; i1 < l1; i1++ { p2[0] = p1[0] + costDel for i2 = 0; i2 < l2; i2++ { if str1[i1] == str2[i2] { c0 = p1[i2] } else { c0 = p1[i2] + costRep } c1 = p1[i2+1] + costDel if c1 < c0 { c0 = c1 } c2 = p2[i2] + costIns if c2 < c0 { c0 = c2 } p2[i2+1] = c0 } tmp = p1 p1 = p2 p2 = tmp } c0 = p1[l2] return c0 }

 

SimilarText

// similar_text()
func SimilarText(first, second string, percent *float64) int { var similarText func(string, string, int, int) int similarText = func(str1, str2 string, len1, len2 int) int { var sum, max int pos1, pos2 := 0, 0

        // Find the longest segment of the same section in two strings
        for i := 0; i < len1; i++ { for j := 0; j < len2; j++ { for l := 0; (i+l < len1) && (j+l < len2) && (str1[i+l] == str2[j+l]); l++ { if l+1 > max { max = l + 1 pos1 = i pos2 = j } } } } if sum = max; sum > 0 { if pos1 > 0 && pos2 > 0 { sum += similarText(str1, str2, pos1, pos2) } if (pos1+max < len1) && (pos2+max < len2) { s1 := []byte(str1) s2 := []byte(str2) sum += similarText(string(s1[pos1+max:]), string(s2[pos2+max:]), len1-pos1-max, len2-pos2-max) } } return sum } l1, l2 := len(first), len(second) if l1+l2 == 0 { return 0 } sim := similarText(first, second, l1, l2) if percent != nil { *percent = float64(sim*200) / float64(l1+l2) } return sim }

 

 Github地址

https://github.com/syyongx/php2gogit

相關文章
相關標籤/搜索