Par défaut, l'analyse japonaise de Solr est analysée morphologiquement. C'est le code lorsque vous souhaitez utiliser l'analyse morphologique en Java. Dans la console d'administration, vous pouvez obtenir le même résultat en utilisant la sortie détaillée sur la page Analyse.
package hello.solr;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
public class HelloAnalysisJapaneseSimple {
@SuppressWarnings({ "unchecked", "rawtypes" })
static public void main(String[] args) throws Exception {
String fieldName = "field_text_ja";
String coreName = "core_nlp";
String text = "Bonjour. Il fait beau aujourd'hui, n'est-ce pas. Je suis un employé de Nissan Motor Co., Ltd.";
HashMap<String, SolrInputField> fields = new HashMap<String, SolrInputField>();
// Document
SolrInputDocument doc = new SolrInputDocument(fields);
{
// Document Field
doc.setField("id", "0");
doc.setField(fieldName, text);
}
// Request
DocumentAnalysisRequest request = new DocumentAnalysisRequest();
request.addDocument(doc);
String solrLocation = "http://localhost:8983/solr/" + coreName;
// NLP Client
SolrClient client = new HttpSolrClient.Builder(solrLocation).build();
// NLP Response
NamedList<Object> response = client.request(request);
// Get analysis response
NamedList<Object> analysis = (NamedList<Object>) response
.get("analysis");
SimpleOrderedMap f = ((SimpleOrderedMap) ((SimpleOrderedMap) analysis
.getVal(0)).get(fieldName));
SimpleOrderedMap index = (SimpleOrderedMap) f.get("index");
NamedList nlpResult = (NamedList) index.getVal(0);
System.err.println("Tokenizer,Filter ---");
{
for (int n = 0; n < nlpResult.size(); n++) {
System.err.println(nlpResult.getName(n) + "="
+ nlpResult.getVal(n));
}
}
ArrayList wordListPOS = (ArrayList) nlpResult
.get("org.apache.lucene.analysis.ja.JapaneseTokenizer");
if (wordListPOS != null) {
for (int n = 0; n < wordListPOS.size(); n++) {
SimpleOrderedMap wordPOS = (SimpleOrderedMap) wordListPOS
.get(n);
if (n == 0) {
System.err.println("<names>");
for (int m = 0; m < wordPOS.size(); m++) {
System.err.println(wordPOS.getName(m) + "="
+ wordPOS.getVal(m));
}
System.err.println("</names>");
}
String namePOS = "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech";
String nameREADING = "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading";
System.err.println( //
"text='" + wordPOS.get("text") + "'" //
+ ",type='" + wordPOS.get("type") + "'" //
+ ",partOfSpeech='" + wordPOS.get(namePOS) + "'" //
+ ",reading='" + wordPOS.get(nameREADING) + "'" //
);
}
}
}
}
<names>
text=Bonjour
raw_bytes=[e3 81 93 e3 82 93 e3 81 ab e3 81 a1 e3 81 af]
start=0
end=5
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1
type=word
org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute#termFrequency=1
org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute#baseForm=null
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech=Des mots impressionnants
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech (en)=interjection
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading=Bonjour
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading (en)=konnichiha
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation=Bonjour
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation (en)=konnichiwa
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType (en)=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm (en)=null
position=1
positionHistory=[1]
</names>
text='Bonjour',type='word',partOfSpeech='Des mots impressionnants',reading='Bonjour'
text='aujourd'hui',type='word',partOfSpeech='nom-Avocat possible',reading='aujourd'hui'
text='Est',type='word',partOfSpeech='Particule-係Particule',reading='C'
text='Bien',type='word',partOfSpeech='adjectif-Indépendance',reading='bien'
text='Météo',type='word',partOfSpeech='nom-Général',reading='Météo'
text='est',type='word',partOfSpeech='Verbe auxiliaire',reading='mort'
text='Hey',type='word',partOfSpeech='Particule-終Particule',reading='Ne'
text='je',type='word',partOfSpeech='nom-代nom-Général',reading='je'
text='Est',type='word',partOfSpeech='Particule-係Particule',reading='C'
text='Nissan',type='word',partOfSpeech='nom-固有nom-Organisation',reading='Nissan'
text='Moteur Nissan',type='word',partOfSpeech='nom-固有nom-Organisation',reading='Nissan Jidosha'
text='Voiture',type='word',partOfSpeech='nom-Général',reading='Jidosha'
text='de',type='word',partOfSpeech='Particule-syndicat',reading='Non'
text='Employé',type='word',partOfSpeech='nom-Général',reading='Éclat'
text='est',type='word',partOfSpeech='Verbe auxiliaire',reading='mort'
Recommended Posts