-
-
Notifications
You must be signed in to change notification settings - Fork 610
/
RabinKarpSubsequenceSearch.java
62 lines (52 loc) · 2.29 KB
/
RabinKarpSubsequenceSearch.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package algo.string;
/**
* Created by sherxon on 1/12/17.
*/
/**
* This is Rabin Karp substring match algorithm. Time complexity is O(MxN), where m is length of string
* to be matched and N is substring length. Worst case happens when all hash codes are the same and string
* equal method checks all combinations. This is very rare if good hash function is used.
* The idea is simple, calculating hash code of substring and text at length of substring
* and check if these hashes are equal and substrings are also the same. To optimize hashing each sub-sequence
* we can use rolling hash function which is constant time hash generation algorithm
*/
public class RabinKarpSubsequenceSearch {
int prime = 31;
public static void main(String[] args) {
RabinKarpSubsequenceSearch rks = new RabinKarpSubsequenceSearch();
System.out.println(rks.searchPattern("Sherxon".toCharArray(), "n".toCharArray()));
System.out.println(rks.searchPattern("Sherxon".toCharArray(), "xon".toCharArray()));
System.out.println(rks.searchPattern("Sherxon".toCharArray(), "hs".toCharArray()));
System.out.println(rks.searchPattern("Sherxon".toCharArray(), "erx".toCharArray()));
System.out.println(rks.searchPattern("Sherxon".toCharArray(), "S".toCharArray()));
}
public int searchPattern(char[] text, char[] pattern) {
int m = text.length;
int n = pattern.length;
int textHash = hash(text, n);
int patternHash = hash(pattern, n);
for (int i = 0; i <= m - n; i++) {
if (patternHash == textHash && equal(text, pattern, i)) return i;
if (i < m - n) textHash = reHash(textHash, text[i], text[i + n], n);
}
return -1;
}
private int reHash(int textHash, char first, char last, int length) {
int h = (textHash - first) / prime;
h += Math.pow(prime, length - 1) * last;
return h;
}
private boolean equal(char[] text, char[] pattern, int from) {
for (int i = 0; i < pattern.length; i++)
if (text[i + from] != pattern[i])
return false;
return true;
}
private int hash(char[] s, int to) {
int h = 0;
for (int i = 0; i < to; i++) {
h += Math.pow(prime, i) * s[i];
}
return h;
}
}