-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNGram.java
More file actions
68 lines (59 loc) · 2.3 KB
/
NGram.java
File metadata and controls
68 lines (59 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Random;
import java.util.Scanner;
import java.util.ArrayList;
import java.util.List;
import javax.swing.JFileChooser;
import javax.swing.JOptionPane;
public class NGram {
public static List<String> ngrams(int max, String val) {
List<String> out = new ArrayList<String>(1000);
String[] words = val.split(" ");
for (int i = 0; i < words.length - max + 1; i++) {
out.add(makeString(words, i, max));
}
return out;
}
public static String makeString(String[] words, int start, int length) {
StringBuilder tmp= new StringBuilder(100);
for (int i = start; i < start + length; i++) {
tmp.append(words[i]).append(" ");
}
return tmp.substring(0, tmp.length() - 1);
}
public static List<String> reduceNgrams(List<String> in, int size) {
if (1 < size) {
List<String> working = reduceByOne(in);
in.addAll(working);
for (int i = size -2 ; i > 0; i--) {
working = reduceByOne(working);
in.addAll(working);
}
}
return in;
}
public static List<String> reduceByOne(List<String> in) {
List<String> out = new ArrayList<String>(in.size());
int end;
for (String s : in) {
end = s.lastIndexOf(" ");
out.add(s.substring(0, -1 == end ? s.length() : end));
}
//the last one will always reduce twice - words 0, n-1 are in the loop this catches the words 1, n
String s = in.get(in.size() -1);
out.add(s.substring(s.indexOf(" ")+1));
return out;
}
public static void main(String[] args) {
long start;
start = System.currentTimeMillis();
List<String> ngrams = ngrams(3, "Your text goes here, actual mileage may vary");
reduceNgrams(ngrams, 3);
List<String> grams = ngrams(3,"الدوري الإيطالي لاتسيو يتغلب ضيفه انتر ميلان بهدفين نظيفين الأسبوع المسابقة");
for(String s: grams){
System.out.println(System.currentTimeMillis() - start);
System.out.println(s);
}
}
}