Use Watson Conversation as NLP (Java) (Natural Language Processing)

Watson Conversation has a function to extract date, time, number, and currency from text as System Entity, so we will use it specifically.

The library to use is watson Java SDK https://github.com/watson-developer-cloud/java-sdk

Overview

Input: "It is April 1, 2018." output: EntityKeyword [facet = sys-date, begin = 0, end = 9, lex = 2018-04-01, str = April 1, 2018] EntityKeyword [facet=sys-number, begin=2, end=4, lex=30, str=30] EntityKeyword [facet=sys-number, begin=5, end=6, lex=4, str=4] EntityKeyword [facet=sys-number, begin=7, end=8, lex=1, str=1]

Wrap the SDK library as follows and use it.

Service body

ConversationNLPService.java



package com.ibm.watson.developer_cloud.conversation;
import java.util.*;
import com.ibm.watson.developer_cloud.conversation.v1.ConversationService;
import com.ibm.watson.developer_cloud.conversation.v1.model.Entity;
import com.ibm.watson.developer_cloud.conversation.v1.model.MessageRequest;
import com.ibm.watson.developer_cloud.conversation.v1.model.MessageResponse;
public class ConversationNLPService {
	String username;
	String password;
	String workspaceid;
	public ConversationNLPService(String username, String password, String workspaceid) {
		this.username = username;
		this.password = password;
		this.workspaceid = workspaceid;
	}
	public ConversationNLPServiceResponse nlp(String input) throws Exception {
	ConversationService service = new ConversationService(ConversationService.VERSION_DATE_2016_09_20);
		service.setUsernameAndPassword(username, password);
		Map<String, Object> context = new HashMap<String, Object>();
		{
			// SET TIMEZONE
			context.put("timezone", TimeZone.getDefault().getID());
		}
		// remove control code
		input = input.replace("\t", " ").replace("\n", " ").replace("\r", " ");
	MessageRequest newMessage = new MessageRequest.Builder() //
				.inputText(input) //
				.context(context) //
				.build();
		MessageResponse response = service.message(workspaceid, newMessage).execute();
		String s = response.getInputText();
		ConversationNLPServiceResponse rsp = new ConversationNLPServiceResponse();
		List<Entity> list = response.getEntities();
		for (Entity e : list) {
			EntityKeyword kwd = new EntityKeyword(e.getEntity(), e.getLocation()[0], e.getLocation()[1], e.getValue(),
					s.substring(e.getLocation()[0], e.getLocation()[1]));
			rsp.addKeyword(kwd);
		}
		return rsp;
	}
}

ConversationNLPServiceResponse



package com.ibm.watson.developer_cloud.conversation;
import java.util.ArrayList;
public class ConversationNLPServiceResponse {
	ArrayList<EntityKeyword> kwds = new ArrayList<>();
	protected void addKeyword(EntityKeyword kwd) {
		for (EntityKeyword kw : kwds) {
			if (kw.getFacet().equals(kwd.getFacet()) && kw.isLongerMach(kwd)) {
				System.err.println("!! " + kwd);
				return;
			}
		}
		this.kwds.add(kwd);
	}
	public ArrayList<EntityKeyword> asList() {
		return kwds;
	}
}

EntityKeyword.java



package com.ibm.watson.developer_cloud.conversation;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;

public class EntityKeyword {
	public static String SYS_DATE = "sys-date";
	public static String SYS_NUMBER = "sys-number";
	public static String SYS_CURRENCY = "sys-currency";
	String facet;
	int begin;
	int end;
	String lex;
	String str;
	static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
	public boolean isLongerMach(EntityKeyword kwd) {
		return (this.begin <= kwd.begin) && (kwd.end <= this.end) && ((kwd.end - kwd.begin) < (this.end - this.begin));
	}
	public Date asDate() {
		if (isDate() == false) {
			return null;
		} else {
			try {
				return sdf.parse(this.lex);
			} catch (ParseException e) {
				e.printStackTrace();
				return null;
			}
		}
	}

	public boolean isDate() {
		return facet != null && facet.equals(SYS_DATE);
	}
	public boolean isCurrency() {
		return facet != null && facet.equals(SYS_CURRENCY);
	}
	public boolean isNumber() {
		return facet != null && facet.equals(SYS_NUMBER);
	}
	public EntityKeyword(String facet, int begin, int end, String lex, String str) {
		super();
		this.facet = facet;
		this.begin = begin;
		this.end = end;
		this.lex = lex;
		this.str = str;
	}
	public int getBegin() {
		return begin;
	}
	public int getEnd() {
		return end;
	}
	public String getLex() {
		return lex;
	}
	public String getFacet() {
		return facet;
	}
	public String getStr() {
		return str;
	}
	@Override
	public String toString() {
		return "EntityKeyword [facet=" + facet + ", begin=" + begin + ", end=" + end + ", lex=" + lex + ", str=" + str
				+ "]";
	}
}


How to use

ConversationNLPServiceMain.java


package com.ibm.watson.developer_cloud.conversation;
import java.util.ArrayList;

public class ConversationNLPServiceMain {
	public static void main(String[] args) throws Exception {
		// PLEASE GET credentials from https://console.bluemix.net/home/
	
		// Enablement_Conversation
		String username = "xxx";
		String password = "xxx";
		// CONV_NLP
		String workspaceid = "xxx";
	
		ConversationNLPService service = new ConversationNLPService(username, password, workspaceid);
	
		String input = "It is April 1, 2018.";
	
		ConversationNLPServiceResponse response = service.nlp(input);
		ArrayList<EntityKeyword> kwds = response.asList();
	
		for (EntityKeyword kwd : kwds) {
			System.err.println(kwd.toString());
		}
	}
}


System.err



EntityKeyword [facet=sys-date, begin=0, end=9, lex=2018-04-01, str=April 1, 2018]
EntityKeyword [facet=sys-number, begin=2, end=4, lex=30, str=30]
EntityKeyword [facet=sys-number, begin=5, end=6, lex=4, str=4]
EntityKeyword [facet=sys-number, begin=7, end=8, lex=1, str=1]

It's almost like this ...

"April 1, 2018" will be normalized to "2018-04-01". You can also normalize "Tomorrow" or "Next Sunday". However, there are mistakes, so I think it is necessary not to be overly dependent. I think it would be good to modify this class to add "processing when there is only one date in the result" and "handling when the date and time are consecutive".

Impressions


I think it's okay to have an NLP-based API in the Watson API, but for some reason it's not provided, so I made something like this.

Recommended Posts

Use Watson Conversation as NLP (Java) (Natural Language Processing)
Introducing NLP4J-[000] Natural Language Processing Index in Java
[Processing × Java] How to use variables
[Processing × Java] How to use arrays
[Processing × Java] How to use the class
[Processing × Java] How to use the function
NLP4J [004] Try text analysis using natural language processing and parsing statistical processing in Java
NLP4J [003] Try text analysis using natural language processing and part-speech statistical processing in Java
Use Java7 try-with-resources statement for Cursor close processing