Watson Conversation has a function to extract date, time, number, and currency from text as System Entity, so we will use it specifically.
The library to use is watson Java SDK https://github.com/watson-developer-cloud/java-sdk
Overview
Input: "It is April 1, 2018." output: EntityKeyword [facet = sys-date, begin = 0, end = 9, lex = 2018-04-01, str = April 1, 2018] EntityKeyword [facet=sys-number, begin=2, end=4, lex=30, str=30] EntityKeyword [facet=sys-number, begin=5, end=6, lex=4, str=4] EntityKeyword [facet=sys-number, begin=7, end=8, lex=1, str=1]
Wrap the SDK library as follows and use it.
Service body
ConversationNLPService.java
package com.ibm.watson.developer_cloud.conversation;
import java.util.*;
import com.ibm.watson.developer_cloud.conversation.v1.ConversationService;
import com.ibm.watson.developer_cloud.conversation.v1.model.Entity;
import com.ibm.watson.developer_cloud.conversation.v1.model.MessageRequest;
import com.ibm.watson.developer_cloud.conversation.v1.model.MessageResponse;
public class ConversationNLPService {
String username;
String password;
String workspaceid;
public ConversationNLPService(String username, String password, String workspaceid) {
this.username = username;
this.password = password;
this.workspaceid = workspaceid;
}
public ConversationNLPServiceResponse nlp(String input) throws Exception {
ConversationService service = new ConversationService(ConversationService.VERSION_DATE_2016_09_20);
service.setUsernameAndPassword(username, password);
Map<String, Object> context = new HashMap<String, Object>();
{
// SET TIMEZONE
context.put("timezone", TimeZone.getDefault().getID());
}
// remove control code
input = input.replace("\t", " ").replace("\n", " ").replace("\r", " ");
MessageRequest newMessage = new MessageRequest.Builder() //
.inputText(input) //
.context(context) //
.build();
MessageResponse response = service.message(workspaceid, newMessage).execute();
String s = response.getInputText();
ConversationNLPServiceResponse rsp = new ConversationNLPServiceResponse();
List<Entity> list = response.getEntities();
for (Entity e : list) {
EntityKeyword kwd = new EntityKeyword(e.getEntity(), e.getLocation()[0], e.getLocation()[1], e.getValue(),
s.substring(e.getLocation()[0], e.getLocation()[1]));
rsp.addKeyword(kwd);
}
return rsp;
}
}
ConversationNLPServiceResponse
package com.ibm.watson.developer_cloud.conversation;
import java.util.ArrayList;
public class ConversationNLPServiceResponse {
ArrayList<EntityKeyword> kwds = new ArrayList<>();
protected void addKeyword(EntityKeyword kwd) {
for (EntityKeyword kw : kwds) {
if (kw.getFacet().equals(kwd.getFacet()) && kw.isLongerMach(kwd)) {
System.err.println("!! " + kwd);
return;
}
}
this.kwds.add(kwd);
}
public ArrayList<EntityKeyword> asList() {
return kwds;
}
}
EntityKeyword.java
package com.ibm.watson.developer_cloud.conversation;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class EntityKeyword {
public static String SYS_DATE = "sys-date";
public static String SYS_NUMBER = "sys-number";
public static String SYS_CURRENCY = "sys-currency";
String facet;
int begin;
int end;
String lex;
String str;
static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
public boolean isLongerMach(EntityKeyword kwd) {
return (this.begin <= kwd.begin) && (kwd.end <= this.end) && ((kwd.end - kwd.begin) < (this.end - this.begin));
}
public Date asDate() {
if (isDate() == false) {
return null;
} else {
try {
return sdf.parse(this.lex);
} catch (ParseException e) {
e.printStackTrace();
return null;
}
}
}
public boolean isDate() {
return facet != null && facet.equals(SYS_DATE);
}
public boolean isCurrency() {
return facet != null && facet.equals(SYS_CURRENCY);
}
public boolean isNumber() {
return facet != null && facet.equals(SYS_NUMBER);
}
public EntityKeyword(String facet, int begin, int end, String lex, String str) {
super();
this.facet = facet;
this.begin = begin;
this.end = end;
this.lex = lex;
this.str = str;
}
public int getBegin() {
return begin;
}
public int getEnd() {
return end;
}
public String getLex() {
return lex;
}
public String getFacet() {
return facet;
}
public String getStr() {
return str;
}
@Override
public String toString() {
return "EntityKeyword [facet=" + facet + ", begin=" + begin + ", end=" + end + ", lex=" + lex + ", str=" + str
+ "]";
}
}
How to use
ConversationNLPServiceMain.java
package com.ibm.watson.developer_cloud.conversation;
import java.util.ArrayList;
public class ConversationNLPServiceMain {
public static void main(String[] args) throws Exception {
// PLEASE GET credentials from https://console.bluemix.net/home/
// Enablement_Conversation
String username = "xxx";
String password = "xxx";
// CONV_NLP
String workspaceid = "xxx";
ConversationNLPService service = new ConversationNLPService(username, password, workspaceid);
String input = "It is April 1, 2018.";
ConversationNLPServiceResponse response = service.nlp(input);
ArrayList<EntityKeyword> kwds = response.asList();
for (EntityKeyword kwd : kwds) {
System.err.println(kwd.toString());
}
}
}
System.err
EntityKeyword [facet=sys-date, begin=0, end=9, lex=2018-04-01, str=April 1, 2018]
EntityKeyword [facet=sys-number, begin=2, end=4, lex=30, str=30]
EntityKeyword [facet=sys-number, begin=5, end=6, lex=4, str=4]
EntityKeyword [facet=sys-number, begin=7, end=8, lex=1, str=1]
It's almost like this ...
"April 1, 2018" will be normalized to "2018-04-01". You can also normalize "Tomorrow" or "Next Sunday". However, there are mistakes, so I think it is necessary not to be overly dependent. I think it would be good to modify this class to add "processing when there is only one date in the result" and "handling when the date and time are consecutive".
Impressions
I think it's okay to have an NLP-based API in the Watson API, but for some reason it's not provided, so I made something like this.
Recommended Posts