Überblick

Standardmäßig wird die japanische Analyse von Solr morphologisch analysiert. Dies ist der Code, wenn Sie die morphologische Analyse in Java verwenden möchten. In der Administrationskonsole können Sie das gleiche Ergebnis erzielen, indem Sie die ausführliche Ausgabe auf der Analyseseite verwenden.

Quellcode


package hello.solr;

import java.util.ArrayList;
import java.util.HashMap;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;

public class HelloAnalysisJapaneseSimple {

	@SuppressWarnings({ "unchecked", "rawtypes" })
	static public void main(String[] args) throws Exception {

		String fieldName = "field_text_ja";
		String coreName = "core_nlp";
		String text = "Hallo. Das Wetter ist heute gut, nicht wahr? Ich bin Mitarbeiter von Nissan Motor Co., Ltd.";

		HashMap<String, SolrInputField> fields = new HashMap<String, SolrInputField>();

		// Document
		SolrInputDocument doc = new SolrInputDocument(fields);
		{
			// Document Field
			doc.setField("id", "0");
			doc.setField(fieldName, text);
		}

		// Request
		DocumentAnalysisRequest request = new DocumentAnalysisRequest();
		request.addDocument(doc);

		String solrLocation = "http://localhost:8983/solr/" + coreName;

		// NLP Client
		SolrClient client = new HttpSolrClient.Builder(solrLocation).build();

		// NLP Response
		NamedList<Object> response = client.request(request);

		// Get analysis response
		NamedList<Object> analysis = (NamedList<Object>) response
				.get("analysis");

		SimpleOrderedMap f = ((SimpleOrderedMap) ((SimpleOrderedMap) analysis
				.getVal(0)).get(fieldName));

		SimpleOrderedMap index = (SimpleOrderedMap) f.get("index");

		NamedList nlpResult = (NamedList) index.getVal(0);

		System.err.println("Tokenizer,Filter ---");
		{
			for (int n = 0; n < nlpResult.size(); n++) {
				System.err.println(nlpResult.getName(n) + "="
						+ nlpResult.getVal(n));
			}
		}

		ArrayList wordListPOS = (ArrayList) nlpResult
				.get("org.apache.lucene.analysis.ja.JapaneseTokenizer");

		if (wordListPOS != null) {
			for (int n = 0; n < wordListPOS.size(); n++) {
				SimpleOrderedMap wordPOS = (SimpleOrderedMap) wordListPOS
						.get(n);

				if (n == 0) {
					System.err.println("<names>");
					for (int m = 0; m < wordPOS.size(); m++) {
						System.err.println(wordPOS.getName(m) + "="
								+ wordPOS.getVal(m));
					}
					System.err.println("</names>");
				}

				String namePOS = "org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech";
				String nameREADING = "org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading";

				System.err.println( //
						"text='" + wordPOS.get("text") + "'" //
						+ ",type='"	+ wordPOS.get("type") + "'" //
						+ ",partOfSpeech='" + wordPOS.get(namePOS) + "'" //
						+ ",reading='" + wordPOS.get(nameREADING) + "'" //
						);
			}
		}

	}
}

Ergebnis


<names>
text=Hallo
raw_bytes=[e3 81 93 e3 82 93 e3 81 ab e3 81 a1 e3 81 af]
start=0
end=5
org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#positionLength=1
type=word
org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute#termFrequency=1
org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute#baseForm=null
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech=Beeindruckende Worte
org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute#partOfSpeech (en)=interjection
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading=Hallo
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#reading (en)=konnichiha
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation=Hallo
org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute#pronunciation (en)=konnichiwa
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionType (en)=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm=null
org.apache.lucene.analysis.ja.tokenattributes.InflectionAttribute#inflectionForm (en)=null
position=1
positionHistory=[1]
</names>
text='Hallo',type='word',partOfSpeech='Beeindruckende Worte',reading='Hallo'
text='heute',type='word',partOfSpeech='Substantiv-Anwalt möglich',reading='heute'
text='Ist',type='word',partOfSpeech='Partikel-係Partikel',reading='C.'
text='Gut',type='word',partOfSpeech='Adjektiv-Unabhängigkeit',reading='gut'
text='Wetter',type='word',partOfSpeech='Substantiv-Allgemeines',reading='Wetter'
text='ist',type='word',partOfSpeech='Hilfsverb',reading='Tod'
text='Hallo',type='word',partOfSpeech='Partikel-終Partikel',reading='Ne'
text='ich',type='word',partOfSpeech='Substantiv-代Substantiv-Allgemeines',reading='ich'
text='Ist',type='word',partOfSpeech='Partikel-係Partikel',reading='C.'
text='Nissan',type='word',partOfSpeech='Substantiv-固有Substantiv-Organisation',reading='Nissan'
text='Nissan Motor',type='word',partOfSpeech='Substantiv-固有Substantiv-Organisation',reading='Nissan Jidosha'
text='Wagen',type='word',partOfSpeech='Substantiv-Allgemeines',reading='Jidosha'
text='von',type='word',partOfSpeech='Partikel-Union',reading='Nein'
text='Mitarbeiter',type='word',partOfSpeech='Substantiv-Allgemeines',reading='Scheinen'
text='ist',type='word',partOfSpeech='Hilfsverb',reading='Tod'

[JAVA] Erhalten Sie detaillierte Ergebnisse der morphologischen Analyse mit Apache Solr 7.6 + SolrJ (Japanisch)

Überblick

Quellcode

Ergebnis