Analysieren Sie englischen Text mit Stanford CoreNLP. (Japanisch wird nicht unterstützt) https://stanfordnlp.github.io/CoreNLP/
Der Code lautet wie folgt.
package pkg;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.CoreAnnotations.AfterAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.BeforeAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.IndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.NamedEntityTagAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.OriginalTextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentenceIndexAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.SentenceAnnotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation;
import edu.stanford.nlp.util.CoreMap;
public class Main3 {
public static void main(String[] args) {
String text = "I have a red Nissan car.";
Properties properties = new Properties();
properties.setProperty("annotators", "tokenize, ssplit, pos, depparse");
StanfordCoreNLP coreNLP = new StanfordCoreNLP(properties);
Annotation annotation = new Annotation(text);
coreNLP.annotate(annotation);
{
List<CoreMap> sentenceMap = annotation.get(SentencesAnnotation.class);
for (CoreMap label : sentenceMap) {
SemanticGraph graph = label.get(BasicDependenciesAnnotation.class);
IndexedWord root = graph.getFirstRoot();
printWord(root, graph, 0);
}
}
}
public static void printWord(IndexedWord word, SemanticGraph graph, int tab) {
System.err.println("---");
System.err.println("depth:" + tab);
System.err.println("TextAnnotation:" + word.get(TextAnnotation.class));
System.err.println("OriginalTextAnnotation:" + word.get(OriginalTextAnnotation.class));
System.err.println("CharacterOffsetBeginAnnotation:" + word.get(CharacterOffsetBeginAnnotation.class));
System.err.println("CharacterOffsetEndAnnotation:" + word.get(CharacterOffsetEndAnnotation.class));
System.err.println("IndexAnnotation:" + word.get(IndexAnnotation.class));
System.err.println("SentenceIndexAnnotation:" + word.get(SentenceIndexAnnotation.class));
System.err.println("PartOfSpeechAnnotation:" + word.get(PartOfSpeechAnnotation.class));
// System.err.println(word.toString());
List<IndexedWord> list = graph.getChildList(word);
for (int n = 0; n < list.size(); n++) {
printWord(list.get(n), graph, tab + 1);
}
}
}
Ausführungsergebnis
Adding annotator tokenize
TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.
Adding annotator ssplit
edu.stanford.nlp.pipeline.AnnotatorImplementations:
Adding annotator pos
Reading POS tagger model from edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger ... done [0.7 sec].
Adding annotator depparse
Loading depparse model file: edu/stanford/nlp/models/parser/nndep/PTB_Stanford_params.txt.gz ...
PreComputed 100000, Elapsed Time: 2.151 (s)
Initializing dependency parser done [3.2 sec].
---
depth:0
TextAnnotation:have
OriginalTextAnnotation:have
CharacterOffsetBeginAnnotation:2
CharacterOffsetEndAnnotation:6
IndexAnnotation:2
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:VBP
---
depth:1
TextAnnotation:I
OriginalTextAnnotation:I
CharacterOffsetBeginAnnotation:0
CharacterOffsetEndAnnotation:1
IndexAnnotation:1
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:PRP
---
depth:1
TextAnnotation:car
OriginalTextAnnotation:car
CharacterOffsetBeginAnnotation:20
CharacterOffsetEndAnnotation:23
IndexAnnotation:6
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NN
---
depth:2
TextAnnotation:a
OriginalTextAnnotation:a
CharacterOffsetBeginAnnotation:7
CharacterOffsetEndAnnotation:8
IndexAnnotation:3
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:DT
---
depth:2
TextAnnotation:red
OriginalTextAnnotation:red
CharacterOffsetBeginAnnotation:9
CharacterOffsetEndAnnotation:12
IndexAnnotation:4
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:JJ
---
depth:2
TextAnnotation:Nissan
OriginalTextAnnotation:Nissan
CharacterOffsetBeginAnnotation:13
CharacterOffsetEndAnnotation:19
IndexAnnotation:5
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:NNP
---
depth:1
TextAnnotation:.
OriginalTextAnnotation:.
CharacterOffsetBeginAnnotation:23
CharacterOffsetEndAnnotation:24
IndexAnnotation:7
SentenceIndexAnnotation:0
PartOfSpeechAnnotation:.
Recommended Posts