Download the jar from the following site https://code.google.com/p/lucene-gosen/
public static void writing(File readfile,File writefile) throws IOException, FileNotFoundException{
StringTagger stirngTagger = SenFactory.getStringTagger(null);
Reader reader = new InputStreamReader(new FileInputStream(readfile), "UTF-8");
StreamTagger tagger = new StreamTagger(stirngTagger, reader);
FileWriter filewriter = new FileWriter(writefile);
BufferedWriter bw = new BufferedWriter(filewriter);
while (tagger.hasNext()) {
Token token = tagger.next();
bw.write(token.getSurface());
bw.newLine();
}
bw.close();
}
public class Wordseparated {
public CountTable count(String readfile,String writefile) throws IOException, FileNotFoundException{
CountTable table = new CountTable();
BufferedReader brfile = new BufferedReader(new FileReader(readfile));
BufferedWriter bwfile = new BufferedWriter(new FileWriter(writefile));
while (true) {
String linefile = brfile.readLine();
if (linefile == null) {
break;
}
for (String s : linefile.split("\\s+")) {
if (!s.equals("")) {
int count = table.get(s);
table.add(s);
}
}
}
brfile.close();
for (String s : table.getKeysByCount()) {
int count = table.get(s);
bwfile.write(s);
bwfile.newLine();
}
bwfile.close();
return table;
}
}
Recommended Posts