Overview

There are many ways to see the results of natural language processing in the management console, but I couldn't find a way to call it in Java, so I looked it up. In the management console, you can get the same result by using Verbose Output on the Analysis page.

code

package hello.solr;

import java.util.ArrayList;
import java.util.HashMap;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;

public class HelloAnalysisEnglishSimple {

	@SuppressWarnings({ "unchecked", "rawtypes" })
	static public void main(String[] args) throws Exception {

		String fieldName = "field_text_opennlp";
		String coreName = "core_nlp";
		String text = "Hello, I'm a data engineer of Nissan Motor.";

		HashMap<String, SolrInputField> fields = new HashMap<String, SolrInputField>();

		// Document
		SolrInputDocument doc = new SolrInputDocument(fields);
		{
			// Document Field
			doc.setField("id", "0");
			doc.setField(fieldName, text);
		}

		// Request
		DocumentAnalysisRequest request = new DocumentAnalysisRequest();
		request.addDocument(doc);

		String solrLocation = "http://localhost:8983/solr/" + coreName;

		// NLP Client
		SolrClient client = new HttpSolrClient.Builder(solrLocation).build();

		// NLP Response
		NamedList<Object> response = client.request(request);

		// Get analysis response
		NamedList<Object> analysis = (NamedList<Object>) response
				.get("analysis");

		SimpleOrderedMap f = ((SimpleOrderedMap) ((SimpleOrderedMap) analysis
				.getVal(0)).get(fieldName));

		SimpleOrderedMap index = (SimpleOrderedMap) f.get("index");

		NamedList nlpResult = (NamedList) index.getVal(0);

		ArrayList wordListPOS = (ArrayList) nlpResult
				.get("org.apache.lucene.analysis.opennlp.OpenNLPPOSFilter");

		for (int n = 0; n < wordListPOS.size(); n++) {
			SimpleOrderedMap wordPOS = (SimpleOrderedMap) wordListPOS.get(n);
			System.err.println("text='" + wordPOS.get("text") + "',type='"
					+ wordPOS.get("type") + "'");
		}

	}
}

result

text='Hello',type='UH'
text=',',type=','
text='I',type='PRP'
text=''m',type='VBP'
text='a',type='DT'
text='data',type='NN'
text='engineer',type='NN'
text='of',type='IN'
text='Nissan',type='NNP'
text='Motor',type='NNP'
text='.',type='.'

Supplement

See below for Type values. https://www.ibm.com/support/knowledgecenter/ja/SS5RWK_3.5.0/com.ibm.discovery.es.ta.doc/iiysspostagset.htm

[JAVA] Get detailed results of morphological analysis with Apache Solr 7.6 + SolrJ

Overview

code

result

Supplement