A library implementing different string similarity and distance measures.
More...
|
static double | Levenshtein (String s1, String s2) |
|
static double | NormalizedLevenshtein (String s1, String s2) |
|
static double | WeightedLevenshtein (String s1, String s2) |
|
static double | Damerau (String s1, String s2) |
|
static double | JaroWinkler (String s1, String s2) |
|
static double | LongestCommonSubsequence (String s1, String s2) |
|
static double | MetricLCS (String s1, String s2) |
|
static double | twoGram (String s1, String s2) |
|
static double | NGram (String s1, String s2) |
|
static double | QGram (String s1, String s2) |
|
A library implementing different string similarity and distance measures.
A dozen of algorithms (including Levenshtein edit distance and sibblings, Jaro-Winkler, Longest Common Subsequence, cosine similarity etc.) are currently implemented. Check the summary table below for the complete list... https://github.com/tdebatty/java-string-similarity
- Author
- Copyright 2015 Thibault Debatty https://github.com/tdebatty/java-string-similarity/blob/master/LICENSE.md
◆ Damerau()
static double Damerau |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
52 return d.distance(s1, s2);
◆ JaroWinkler()
static double JaroWinkler |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
58 return jw.similarity(s1, s2);
◆ Levenshtein()
static double Levenshtein |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
21 return l.distance(s1, s2);
◆ LongestCommonSubsequence()
static double LongestCommonSubsequence |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
64 return lcs.distance(s1, s2);
◆ MetricLCS()
static double MetricLCS |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
70 return lcs.distance(s1, s2);
◆ NGram()
static double NGram |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
◆ NormalizedLevenshtein()
static double NormalizedLevenshtein |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
◆ QGram()
static double QGram |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
88 return dig.distance(s1, s2);
◆ twoGram()
static double twoGram |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
◆ WeightedLevenshtein()
static double WeightedLevenshtein |
( |
String |
s1, |
|
|
String |
s2 |
|
) |
| |
|
static |
33 new CharacterSubstitutionInterface() {
34 public double cost(
char c1,
char c2) {
38 if (c1 ==
't' && c2 ==
'r') {
46 return wl.distance(s1, s2);