字符串類似度算法(編輯距離算法 Levenshtein Distance)

public static float similarity(CharSequence source, CharSequence target) {
        if (source == null || target == null)
            throw new IllegalArgumentException("empty sequence is illegal");
        int len = Math.max(source.length(), target.length());
        return 1 - (float) editDistance(source, target) / len;
    }


    private static int editDistance(CharSequence source, CharSequence target) {
        int ls = source.length();
        int lt = target.length();
        int[][] m = new int[ls + 1][lt + 1];
        for (int i = 0; i < ls + 1; i++)
            m[i][0] = i;
        for (int j = 0; j < lt + 1; j++)
            m[0][j] = j;
        for (int i = 1; i < ls + 1; i++)
            for (int j = 1; j < lt + 1; j++) {
                if (source.charAt(i - 1) == target.charAt(j - 1)) {
                    m[i][j] = m[i - 1][j - 1];
                } else {
                    int insert = m[i][j - 1] + 1;
                    int delete = m[i - 1][j] + 1;
                    int replace = m[i - 1][j - 1] + 1;
                    m[i][j] = Math.min(Math.min(insert, delete), replace);
                }
            }
        return m[ls][lt];
    }


    public static void main(String[] args) {
        System.out.println(similarity("Hello", "Hello"));
    }
相關文章
相關標籤/搜索