Standardmäßig wird die japanische Analyse von Solr morphologisch analysiert. Dies ist der Code, wenn Sie die morphologische Analyse in Java verwenden möchten. In der Administrationskonsole können Sie das gleiche Ergebnis erzielen, indem Sie die ausführliche Ausgabe auf der Analyseseite verwenden.
package hello.solr;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
public class HelloAnalysisJapaneseSimple {
@SuppressWarnings({ "unchecked", "rawtypes" })
static public void main(String[] args) throws Exception {
String fieldName = "field_text_ja";
String coreName = "core_nlp";
String text = "Hallo. Das Wetter ist heute gut, nicht wahr? Ich bin Mitarbeiter von Nissan Motor Co., Ltd.";
HashMap<String, SolrInputField> fields = new HashMap<String, SolrInputField>();
// Document
SolrInputDocument doc = new SolrInputDocument(fields);
{
// Document Field
doc.setField("id", "0");
doc.setField(fieldName, text);
}
// Request
DocumentAnalysisRequest request = new DocumentAnalysisRequest();
request.addDocument(doc);
String solrLocation = "http://localhost:8983/solr/" + coreName;
// NLP Client
SolrClient client = new HttpSolrClient.Builder(solrLocation).build();
// NLP Response
NamedList<Object> response = client.request(request);
// Get analysis response
NamedList<Object> analysis = (NamedList<Object>) response
.get("analysis");
SimpleOrderedMap f = ((SimpleOrderedMap) ((SimpleOrderedMap) analysis
.getVal(0)).get(fieldName));
SimpleOrderedMap index = (SimpleOrderedMap) f.get("index");
NamedList nlpResult = (NamedList) index.getVal(0);
System.err.println("Tokenizer,Filter ---");
{
for (int n = 0; n < nlpResult.size(); n++) {
System.err.println(nlpResult.getName(n) + "="
+ nlpResult.getVal(n));
}
}
ArrayList wordListPOS = (ArrayList) nlpResult
.get("org.apache.lucene.analysis.ja.JapaneseTokenizer");
if (wordListPOS != null) {
for (int n = 0; n < wordListPOS.size(); n++) {
SimpleOrderedMap wordPOS = (SimpleOrderedMap) wordListPOS
.get(n);
if (n == 0) {
System.err.println("<names>");
for (int m = 0; m < wordPOS.size(); m++) {
System.err.println(wordPOS.getName(m) + "="
+ wordPOS.getVal(m));
}
System.err.println("</names>");
}
String namePOS = "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech";
String nameREADING = "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading";
System.err.println( //
"text='" + wordPOS.get("text") + "'" //
+ ",type='" + wordPOS.get("type") + "'" //
+ ",partOfSpeech='" + wordPOS.get(namePOS) + "'" //
+ ",reading='" + wordPOS.get(nameREADING) + "'" //
);
}
}
}
}
<names>
text=Hallo
raw_bytes=[e3 81 93 e3 82 93 e3 81 ab e3 81 a1 e3 81 af]
start=0
end=5
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1
type=word
org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute#termFrequency=1
org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute#baseForm=null
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech=Beeindruckende Worte
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech (en)=interjection
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading=Hallo
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading (en)=konnichiha
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation=Hallo
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation (en)=konnichiwa
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType (en)=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm (en)=null
position=1
positionHistory=[1]
</names>
text='Hallo',type='word',partOfSpeech='Beeindruckende Worte',reading='Hallo'
text='heute',type='word',partOfSpeech='Substantiv-Anwalt möglich',reading='heute'
text='Ist',type='word',partOfSpeech='Partikel-係Partikel',reading='C.'
text='Gut',type='word',partOfSpeech='Adjektiv-Unabhängigkeit',reading='gut'
text='Wetter',type='word',partOfSpeech='Substantiv-Allgemeines',reading='Wetter'
text='ist',type='word',partOfSpeech='Hilfsverb',reading='Tod'
text='Hallo',type='word',partOfSpeech='Partikel-終Partikel',reading='Ne'
text='ich',type='word',partOfSpeech='Substantiv-代Substantiv-Allgemeines',reading='ich'
text='Ist',type='word',partOfSpeech='Partikel-係Partikel',reading='C.'
text='Nissan',type='word',partOfSpeech='Substantiv-固有Substantiv-Organisation',reading='Nissan'
text='Nissan Motor',type='word',partOfSpeech='Substantiv-固有Substantiv-Organisation',reading='Nissan Jidosha'
text='Wagen',type='word',partOfSpeech='Substantiv-Allgemeines',reading='Jidosha'
text='von',type='word',partOfSpeech='Partikel-Union',reading='Nein'
text='Mitarbeiter',type='word',partOfSpeech='Substantiv-Allgemeines',reading='Scheinen'
text='ist',type='word',partOfSpeech='Hilfsverb',reading='Tod'
Recommended Posts