There are many ways to see the results of natural language processing in the management console, but I couldn't find a way to call it in Java, so I looked it up. In the management console, you can get the same result by using Verbose Output on the Analysis page.
package hello.solr;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
public class HelloAnalysisEnglishSimple {
@SuppressWarnings({ "unchecked", "rawtypes" })
static public void main(String[] args) throws Exception {
String fieldName = "field_text_opennlp";
String coreName = "core_nlp";
String text = "Hello, I'm a data engineer of Nissan Motor.";
HashMap<String, SolrInputField> fields = new HashMap<String, SolrInputField>();
// Document
SolrInputDocument doc = new SolrInputDocument(fields);
{
// Document Field
doc.setField("id", "0");
doc.setField(fieldName, text);
}
// Request
DocumentAnalysisRequest request = new DocumentAnalysisRequest();
request.addDocument(doc);
String solrLocation = "http://localhost:8983/solr/" + coreName;
// NLP Client
SolrClient client = new HttpSolrClient.Builder(solrLocation).build();
// NLP Response
NamedList<Object> response = client.request(request);
// Get analysis response
NamedList<Object> analysis = (NamedList<Object>) response
.get("analysis");
SimpleOrderedMap f = ((SimpleOrderedMap) ((SimpleOrderedMap) analysis
.getVal(0)).get(fieldName));
SimpleOrderedMap index = (SimpleOrderedMap) f.get("index");
NamedList nlpResult = (NamedList) index.getVal(0);
ArrayList wordListPOS = (ArrayList) nlpResult
.get("org.apache.lucene.analysis.opennlp.OpenNLPPOSFilter");
for (int n = 0; n < wordListPOS.size(); n++) {
SimpleOrderedMap wordPOS = (SimpleOrderedMap) wordListPOS.get(n);
System.err.println("text='" + wordPOS.get("text") + "',type='"
+ wordPOS.get("type") + "'");
}
}
}
text='Hello',type='UH'
text=',',type=','
text='I',type='PRP'
text=''m',type='VBP'
text='a',type='DT'
text='data',type='NN'
text='engineer',type='NN'
text='of',type='IN'
text='Nissan',type='NNP'
text='Motor',type='NNP'
text='.',type='.'
See below for Type values. https://www.ibm.com/support/knowledgecenter/ja/SS5RWK_3.5.0/com.ibm.discovery.es.ta.doc/iiysspostagset.htm
Recommended Posts