[JAVA] I want to morphologically analyze the log in the DB and put it in the DB to classify messages 1

JavaPG I made a PG that GROPUP the log message stored in db and further morphologically analyze it with Mecab and normalize it.

Tiggercmd.java


import java.io.File;
import java.io.FileInputStream;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.Properties;

import oracle.jdbc.*;
import net.moraleboost.mecab.Lattice; 
import net.moraleboost.mecab.impl.StandardTagger; 
import net.moraleboost.mecab.Node;
class Tiggercmd {
	static String[] hinsis = {
		"type0","type1","type2","type3","style0","style1","calls0","calls1","calls2"	
	};
	public static void main(String[] args) throws Exception { //Build Tagger.
		if (args.length<1) {
			System.err.println(
					"USAGE:Tiggercmd readwrite-propfileName(non extents)"
					);
			return;
		}
		StandardTagger tagger = new StandardTagger("");
		//Get version string
		System.err.println("MeCab version " + tagger.version());
		//Build Lattice (an object that stores runtime information required for morphological analysis)
		Lattice lattice = tagger.createLattice();
		//Open INFO property
		Properties Tiggercmd_props = new Properties();
		Tiggercmd_props.load(new FileInputStream(new File(args[0])));
		OracleConnection con1 = null;
		OracleConnection con2 = null;
		PreparedStatement in_sql_prepare = null;
		ResultSet in_rls = null;
		OraclePreparedStatement con2prpares = null;
		int msgid = 0;
		int seq =0;
		String features[] = null;
		try {
			//Connect to IN DB
			con1 = connect(Tiggercmd_props.getProperty("conids1.user"), Tiggercmd_props.getProperty("conids1.password"),Tiggercmd_props.getProperty("conids1.TNSPINGString"));
			//Connect to WRITE DB
			con2 = connect(Tiggercmd_props.getProperty("conids2.user"), Tiggercmd_props.getProperty("conids2.password"),Tiggercmd_props.getProperty("conids2.TNSPINGString"));		
			//Execute readQuery
			in_sql_prepare = con1.prepareStatement(Tiggercmd_props.getProperty("in.sqls"));
			in_rls = in_sql_prepare.executeQuery();
			System.err.println("READSQL:"+in_sql_prepare.toString());
			while (in_rls.next() ){
				//Set the analysis target character string
				String in_surface = in_rls.getString(Tiggercmd_props.getProperty("in.surface_colname"));//String text = "Its a sunny day.";
				System.out.println(msgid+":\t"+in_surface);
				lattice.setSentence(in_surface);
				tagger.parse(lattice);
				//Output morphological analysis results
				System.out.println(lattice.toString());
				//Output surface shape and features while tracing morphemes one by one
				Node node = lattice.bosNode();
				while (node != null) {
					String addssql = Tiggercmd_props.getProperty("wr.sqls");
					//Set to parse variables with OraclePreparedStatement
					con2prpares = (OraclePreparedStatement)con2.prepareStatement(addssql);
					con2prpares.setIntAtName("msgid",msgid);
					con2prpares.setStringAtName("msg",in_surface);
					con2prpares.setIntAtName("seq",seq);
					con2prpares.setStringAtName("surface",node.surface());
					con2prpares.setStringAtName("feature",node.feature());
					features = node.feature().split(",");
					for (int i=0;i<hinsis.length;i++){
						con2prpares.setStringAtName(hinsis[i],(i<features.length)?features[i]:null);
					}
					//Update and update count output
					System.err.println("inserts:\t"+con2prpares.executeUpdate());
					con2prpares.close();con2prpares=null;
					node = node.next();
					seq++;
				}
				con2.commit();
				seq = 0;
				lattice.clear();
				msgid ++;
			}
			in_rls.close();in_rls=null;
			in_sql_prepare.close();in_sql_prepare=null;
			con1.close();con1=null;
			con2.close();con2=null;
		}catch (Exception oe){
			oe.printStackTrace(System.err);
		}finally {
			try {
				if (in_rls!=null) in_rls.close();
				if (in_sql_prepare!=null) in_sql_prepare.close();
				if (con1 != null) con1.close();
				if (con2prpares!=null) con2prpares.close();
				if (con2 != null) con2.close();
			}catch (Exception fe){}
		}
		// lattice,Destroy tagger
		lattice.destroy();
		tagger.destroy();
	}
	/**
	 * Utility method: creates a new JDBC connection to the database.
	 */
	static OracleConnection connect(String user,String password,String tnspingString) throws Exception {
		OracleConnection conn = null;
		OracleDriver dr = new OracleDriver();
		Properties prop = new Properties();
		prop.setProperty("user",user);
		prop.setProperty("password",password);
		conn = (OracleConnection)dr.connect("jdbc:oracle:thin:@"+tnspingString,prop);
		conn.setAutoCommit(false);
		return conn;
	}
}
	 

properties

domainlogconrwinfo.properties


#INFO
#Query target connection information
conids1.user=hoge
conids1.password=hoge
conids1.TNSPINGString=(DESCRIPTION = (ADDRESS_LIST = (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.0.XXX)(PORT = 1521))) (CONNECT_DATA = (SERVER = DEDICATED) (SERVICE_NAME = servicename1)))
#↑ The full connection information output with the tnspings connection name is described.

#INSERT target connection information
conids2.user=hoge
conids2.password=hoge
conids2.TNSPINGString=(DESCRIPTION = (ADDRESS_LIST = (ADDRESS = (PROTOCOL = TCP)(HOST = 192.168.0.***)(PORT = 1521))) (CONNECT_DATA = (SERVER = DEDICATED) (SERVICE_NAME = servicenamea)))
#↑ The full connection information output with the tnspings connection name is described.

in.sqls= \
select a.MESSAGETEXT,count(a.TIMEPARTS) from domainlog a \
where 1=1 \
group by a.MESSAGETEXT \
order by a.MESSAGETEXT 

#↑ SQL to get the list of messages you want to analyze
in.surface_colname=MESSAGETEXT
#↑ Column name assigned to the message in that SQL

#↓ INSERT SQL
wr.sqls= \
insert into domainlog_rexs (msgid,MESSAGETEXT,seq,surface,manals \
,type0,type1,type2,type3,style0,style1,calls0,calls1,calls2 ) \
values(:msgid,:msg,:seq,:surface,:feature, \
:type0,:type1,:type2,:type3,:style0,:style1,:calls0,:calls1,:calls2 \
) 

#↑ If the following morphological analysis data variables are described in VALUES according to the column, the following variables will be set there.
#:msgid parse message serial number
#:msg Analysis message string
#:seq Analysis morpheme serial number
#:surface Analysis morpheme surface string
#:feature Feature (part of speech?) System analysis result character string
#:type0         IPA Type1
#:type1         IPA Type2
#:type2         IPA Type3
#:type3         IPA Type4
#:style0 type 1
#:style1 type 2
#:calls0 reading 1
#:calls1 reading 2
#:calls2 reading 3
#Reference link https://qiita.com/priancho/items/f633a9e99616b3fe451f

Then, for normalization, use reading 1 to display the same phrase in a list and display nouns as parts.

Use LISTAGG (character SUM catalog function) for character summary

python


--MSGID,List the combined CALLS1 in the analysis results for each MSGTEXT
select a.MESSAGETEXT,a.MSGID,a.CONCATHINSIWORD,a.ITEMS from (
select a.MSGID,a.MESSAGETEXT,b.ITEMS
,listagg(a.CALLS1,';') within group (order by a.MSGID,a.SEQ)  as concatHINSIWORD
from domainlog_rexs  a
,(select a.MSGID,listagg(a.SURFACE,';') within group (order by a.SEQ) as items 
from domainlog_rexs a 
 where 1=1
 and a.SURFACE is not null
 and a.TYPE0='noun'	
 and a.TYPE1 in (
 'General' 
,'number'
,'Change connection'
)
 group by a.MSGID) b
where 1=1
and ( a.CALLS0 <> '*' and a.TYPE0 is not null)
and a.MSGID=b.MSGID
group by a.MSGID,a.MESSAGETEXT,b.ITEMS
) a
order by a.MSGID

Display result example

mecab.png

Results of normalization and concerns about Japanese software engineers

result: The 10,000-stroke log message has been reduced to thousands by grouping simple messages. Furthermore, the grouping of the morphological analysis results in terms of wording reduced the number to less than 100 putters. As a result, what is the number of subjects that people pay attention to?

Concern: Useful wisdom created by these researchers (in this example, the morphological analysis engine Mecab) Take in and make it practical without being swept away by fashion Are there Japanese vendors or famous software / hardware development / manufacturing / sales companies? Is it because changing the existing interest structure is the most dangerous behavior for us? What will happen by the Tokyo Olympics if this cannot be broken? I am worried as a Japanese engineer.

that's all

Recommended Posts

I want to morphologically analyze the log in the DB and put it in the DB to classify messages 1
I want to embed any TraceId in the log
Is it possible to put the library (aar) in the Android library (aar) and use it?
I want to get the value in Ruby
I want to simplify the log output on Android
[Ruby] When you want to assign the result obtained by conditional branching to a variable and put it in the argument
I want to set the conditions to be displayed in collection_check_boxes
I want to bring Tomcat to the server and start the application
I want to change the log output settings of UtilLoggingJdbcLogger
I want to call a method and count the number
I want to put the JDK on my Mac PC
I want to transition to the same screen in the saved state
I want to simplify the conditional if-else statement in Java
In Java, I want to trim multiple specified characters from only the beginning and end.
After posting an article with Rails Simple Calendar, I want to reflect it in the calendar.
I want to give edit and delete permissions only to the poster
I want to return to the previous screen with kotlin and java!
[Ruby] I want to put an array in a variable. I want to convert to an array
I want to display the images under assets/images in the production environment
I want to remove the top margin in Grouped UITableView (swift)
[Java] I want to perform distinct with the key in the object
I want to change the value of Attribute in Selenium of Ruby
[Android] I want to get the listener from the button in ListView
I want to import the pull-down menu items when submitting a form in Rails into CSV and display them from the DB data.
I want to download a file on the Internet using Ruby and save it locally (with caution)
I want to write JSP in Emacs more easily than the default.
I summarized the points to note when using resources and resources in combination
Add the pre-built jar library to Android and call it in the framework
[Active Admin] I want to customize the default create and update processing
[Ruby] I want to extract only the value of the hash and only the key
I want to pass the argument of Annotation and the argument of the calling method to aspect
I want to get the IP address when connecting to Wi-Fi in Java
I want to display an error message when registering in the database
I actually expressed the older sister problem in code and calculated it
I want to eliminate duplicate error messages
I want to use @Autowired in Servlet
I received the data of the journey (diary application) in Java and visualized it # 001
I want to control the start / stop of servers and databases with Alexa
I want to recursively get the superclass and interface of a certain class
I want to output the day of the week
I want to send an email in Java.
I tried to organize the session in Rails
I want to use arrow notation in Ruby
I want to var_dump the contents of the intent
I want to pass APP_HOME to logback in Gradle
rsync4j --I want to touch rsync in Java.
[Xcode] I want to manage images in folders
I want to be eventually even in kotlin
I want you to put the story that the error was solved when you stabbed the charger in the corner of your head
I want to truncate after the decimal point
I want to find the MD5 checksum of a file in Java and get the result as a string in hexadecimal notation.
I want to see only the latest because the Docker log has become too large
I called YouTube video from DB with haml and tried to embed and display it
[Active Admin] I want to specify the scope of the collection to be displayed in select_box
[Android Studio] I want to set restrictions on the values registered in EditText [Java]
I want to place RadioButtons in the same RadioGroup at any position on the screen.
I want to ESLint in Maven project. I want to run Node.js without putting it in my environment.
I want to use PowerMock in a class that combines parameterized tests and ordinary tests
[Rails] I want to display the link destination of link_to in a separate tab
# 1_JAVA I want to get the index number by specifying one character in the character string.
I didn't understand the topological sort, so I looked it up and implemented it in BFS, and then tried to solve the AtCoder problem.