labelingsystem-server  Version 0.1.0.0
Similarity Class Reference

A library implementing different string similarity and distance measures. More...

Static Public Member Functions

static double Levenshtein (String s1, String s2)
 
static double NormalizedLevenshtein (String s1, String s2)
 
static double WeightedLevenshtein (String s1, String s2)
 
static double Damerau (String s1, String s2)
 
static double JaroWinkler (String s1, String s2)
 
static double LongestCommonSubsequence (String s1, String s2)
 
static double MetricLCS (String s1, String s2)
 
static double twoGram (String s1, String s2)
 
static double NGram (String s1, String s2)
 
static double QGram (String s1, String s2)
 

Detailed Description

A library implementing different string similarity and distance measures.

A dozen of algorithms (including Levenshtein edit distance and sibblings, Jaro-Winkler, Longest Common Subsequence, cosine similarity etc.) are currently implemented. Check the summary table below for the complete list... https://github.com/tdebatty/java-string-similarity

Author
Copyright 2015 Thibault Debatty https://github.com/tdebatty/java-string-similarity/blob/master/LICENSE.md

Member Function Documentation

◆ Damerau()

static double Damerau ( String  s1,
String  s2 
)
static
50  {
51  Damerau d = new Damerau();
52  return d.distance(s1, s2);
53  }

◆ JaroWinkler()

static double JaroWinkler ( String  s1,
String  s2 
)
static
56  {
57  JaroWinkler jw = new JaroWinkler();
58  return jw.similarity(s1, s2);
59  }

◆ Levenshtein()

static double Levenshtein ( String  s1,
String  s2 
)
static
19  {
20  Levenshtein l = new Levenshtein();
21  return l.distance(s1, s2);
22  }

◆ LongestCommonSubsequence()

static double LongestCommonSubsequence ( String  s1,
String  s2 
)
static
62  {
64  return lcs.distance(s1, s2);
65  }

◆ MetricLCS()

static double MetricLCS ( String  s1,
String  s2 
)
static
68  {
69  MetricLCS lcs = new MetricLCS();
70  return lcs.distance(s1, s2);
71  }

◆ NGram()

static double NGram ( String  s1,
String  s2 
)
static
80  {
81  NGram ngram = new NGram(4);
82  return ngram.distance(s1, s2);
83  }

Referenced by Similarity.twoGram().

◆ NormalizedLevenshtein()

static double NormalizedLevenshtein ( String  s1,
String  s2 
)
static
25  {
27  return nl.distance(s1, s2);
28  }

Referenced by SearchResource.getResultsLabelingSystem().

◆ QGram()

static double QGram ( String  s1,
String  s2 
)
static
86  {
87  QGram dig = new QGram(2);
88  return dig.distance(s1, s2);
89  }

◆ twoGram()

static double twoGram ( String  s1,
String  s2 
)
static
74  {
75  NGram twogram = new NGram(2);
76  return twogram.distance(s1, s2);
77  }

References Similarity.NGram().

◆ WeightedLevenshtein()

static double WeightedLevenshtein ( String  s1,
String  s2 
)
static
31  {
33  new CharacterSubstitutionInterface() {
34  public double cost(char c1, char c2) {
35  // The cost for substituting 't' and 'r' is considered
36  // smaller as these 2 are located next to each other
37  // on a keyboard
38  if (c1 == 't' && c2 == 'r') {
39  return 0.5;
40  }
41  // For most cases, the cost of substituting 2 characters
42  // is 1.0
43  return 1.0;
44  }
45  });
46  return wl.distance(s1, s2);
47  }
v1.utils.similarity.Similarity.LongestCommonSubsequence
static double LongestCommonSubsequence(String s1, String s2)
Definition: Similarity.java:62
v1.utils.similarity.Similarity.JaroWinkler
static double JaroWinkler(String s1, String s2)
Definition: Similarity.java:56
v1.utils.similarity.Similarity.NormalizedLevenshtein
static double NormalizedLevenshtein(String s1, String s2)
Definition: Similarity.java:25
v1.utils.similarity.Similarity.Levenshtein
static double Levenshtein(String s1, String s2)
Definition: Similarity.java:19
v1.utils.similarity.Similarity.NGram
static double NGram(String s1, String s2)
Definition: Similarity.java:80
v1.utils.similarity.Similarity.Damerau
static double Damerau(String s1, String s2)
Definition: Similarity.java:50
v1.utils.similarity.Similarity.MetricLCS
static double MetricLCS(String s1, String s2)
Definition: Similarity.java:68
v1.utils.similarity.Similarity.WeightedLevenshtein
static double WeightedLevenshtein(String s1, String s2)
Definition: Similarity.java:31
v1.utils.similarity.Similarity.QGram
static double QGram(String s1, String s2)
Definition: Similarity.java:86