For various reasons, I will send it to my comrades who are forced to measure the performance with the TOP command of Linux.
A file that outputs the Linux TOP command in batch mode, Format it into a csv file using python3.
top.csv (output example)
timestamp,PID,USER,PR,NI,VIRT,RES,SHR,S,%CPU,%MEM,TIME+,COMMAND
10:00:00,1000,root,20,0,160000,2000,1640,R,10.0,0.2,0:00.02,top
10:00:00,3400,httpd,20,0,150000,2000,1700,S,0.0,0.3,0:07.98,nginx:
Prepare the file output in TOP batch mode
TOP command file
top -b -d 20 -c > top_org.log
top_org.log
top - 10:00:00 up 1 days, 44 min, 2 users, load average: 0.00, 0.01, 0.01
Tasks: 100 total, 1 running, 99 sleeping, 0 stopped, 1 zombie
%Cpu(s): 2.0 us, 4.0 sy, 0.0 ni, 80.0 id, 5.0 wa, 0.0 hi, 2.0 si, 0.0 st
KiB Mem : 1000000 total, 60000 free, 700000 used, 200000 buff/cache
KiB Swap: 2000000 total, 90000 free, 2000000 used. 70000 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1000 root 20 0 160000 2000 1640 R 10.0 0.2 0:00.02 top -b -d 20 -c
4500 apache 20 0 440000 1000 8 S 0.0 0.1 0:00.01 /usr/sbin/httpd
17000 mysql 20 0 1130000 7000 0 S 0.0 0.7 20:00.00 /usr/sbin/mysqld
2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 [kthreadd]
4 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 [kworker/0:0H]
6 root 20 0 0 0 0 S 0.0 0.0 0:00.00 [ksoftirqd/0]
・ ・ ・ (Omitted below)
At the end of this article, there is unsplit source code. Please refer to that. At that time, please change only the following parts.
・ USER to be extracted -Location of "TOP command file" -Location of "CSV file with formatted TOP command file" Is set in the following part. Please change it according to your desired conditions and environment.
.py
'''
Setting information
'''
#USER to extract (If not set, all USER will be extracted.,Set by delimiter)
user_array =['root']
#Get current Dir
current_dir = os.getcwd()
#input file name(Full PATH)
input_file_name=f"{current_dir}\\before\\top_org.log"
#output file name(Full PATH)
output_file_name=f"{current_dir}\\after\\top_csv.csv"
I will proceed in this order.
Specify the USER to be extracted and the location where the TOP command file is located.
.py
# -*- coding: utf-8
import re
import os
import csv
'''
Setting information
'''
#USER to extract (If not set, all USER will be extracted.,Set by delimiter)
user_array =['root']
#Get current Dir
current_dir = os.getcwd()
#input file name(Full PATH)
input_file_name=f"{current_dir}\\before\\top.log"
#output file name(Full PATH)
output_file_name=f"{current_dir}\\after\\top.log"
#What is the beginning of the process line in the result of one TOP command?
process_row_start = 8
#What column is the position of the USER column in the TOP command result?
user_column = 2
#What is the position of the COMMAND column in the TOP command result?
command_column = 12
.py
#input file read
f = open(f"{input_file_name}", "r")
toplog_lines = f.readlines()
f.close()
Read the input file (TOP command file) line by line and store it in the variable toplog_lines
.
.py
#timestamp regular expression
r_top_timestamp = re.compile("top - ([0-9:]+)+")
timestamp_list = []
roop_cnt = 0
for toplog_line in toplog_lines :
#If the timestamp matches with the regular expression, put the line number of the TOP command file in the array.
if r_top_timestamp.search(toplog_line) != None:
timestamp_list.append(roop_cnt)
roop_cnt += 1
Here, the timestamp (top --10:00:00 up 1 days, 44 min, 2 users, load average: 0.00, 0.01, 0.01
) is
Check the line number of the TOP command file and store the line number in timestamp_list
.
First, define the required variables in the process extraction loop.
.py
#The start position of the process line with the TOP command
process_row_start = 8
#Position of USER column in process row with TOP command
user_column = 2
#Position of COMMAND column in process row with TOP command
command_column = 12
#Variable for counting the number of TOP command lines at one time
rows_count = 0
#Variable for storing timestamp
tmp_timestamp = ''
#Array (tmp) for storing the character string to be output to csv
tmp_output_csv_list = []
#Array for storing the character string to be output to csv (used for actual writing)
output_csv_list = []
Next, loop the TOP command file line by line. We are implementing the following. ・ Extract TOP command execution time (time stamp) -Check the process column by column, and if it is the extraction target USER, csv output target
.py
for toplog_line in toplog_lines :
#Count the number of TOP lines at one time+1
rows_count +=1
#Remove line breaks at the end of lines
toplog_line = toplog_line.rstrip()
#If only line breaks, go to the next line
if not toplog_line :
continue
#For a timestamp line, add it to the list and go to the next line
if r_top_timestamp.search(toplog_line) != None:
print(toplog_line)
tmp_timestamp = r_top_timestamp.search(toplog_line).group(1)
#One TOP command result, line count initialization
rows_count = 1
continue
#In case of process line, extract if User matches
if rows_count >= process_row_start:
column_number = 0
row_data = toplog_line.split(" ")
#Set timestamp
tmp_output_csv_list = [tmp_timestamp]
#Repeat until end of line
for column_data in row_data:
if column_data =="":
#If blank, go to the next column
continue
column_number += 1
#Put the data up to the COMMAND column in the tmp list
if column_number <= command_column :
tmp_output_csv_list.append(column_data)
else :
continue
#Check if it is a record to be extracted
#Extraction target USER or user_When array is not specified, record to be extracted is selected
if column_number == user_column :
user_key_flg = True
for key_user in user_array:
#Extraction target USER or user_Records to be extracted when array is not specified
if ( str(column_data) == key_user ):
user_key_flg = True
break
else:
user_key_flg = False
if user_key_flg == True :
pass
else:
break
# for-else: Add to CSV extraction list only when the condition is met
else:
output_csv_list.append(tmp_output_csv_list)
Output the contents of the csv extraction list ʻoutput_csv_list` to csv.
.py
#CSV export
csv_header = ['timestamp','USER','PR','NI','VIRT','RES','SHR','%CPU','%MEM','TIME+','COMMAND']
with open(f'{output_file_name}','w') as f:
csv_writer = csv.writer( f, delimiter = ',', lineterminator = '\n')
csv_writer.writerow(csv_header)
csv_writer.writerows(output_csv_list)
csv_writer = csv.writer (f, delimiter =',', lineterminator ='\ n')
,
delimiter =','
,
If you set delimiter ='\ t'
, it will also be a tsv file (tab-delimited file).
as you like.
.py
'''
Extract the process line of the Top command with USER and output it to the csv file
timestamp,USER,PR,NI,VIRT,RES,SHR,%CPU,%MEM,TIME+,COMMAND
'''
# -*- coding: utf-8
import re
import os
import csv
'''
Setting information
'''
#USER to extract (If not set, all USER will be extracted.,Set by delimiter)
user_array =['apache','httpd']
#Get current Dir
current_dir = os.getcwd()
#input file name(Full PATH)
input_file_name=f"{current_dir}\\before\\top.log"
#output file name(Full PATH)
output_file_name=f"{current_dir}\\after\\top.log"
#The start position of the process line with the TOP command
process_row_start = 8
#Position of USER column in process row with TOP command
user_column = 2
#Position of COMMAND column in process row with TOP command
command_column = 12
#timestamp regular expression
r_top_timestamp = re.compile("top - ([0-9:]+)+")
########main processing#############
if __name__ == '__main__' :
'''----------------------
read toplog file
----------------------'''
#input file read
f = open(f"{input_file_name}", "r")
toplog_lines = f.readlines()
f.close()
'''----------------------
Extract timestamp row
----------------------'''
timestamp_list = []
roop_cnt = 0
for toplog_line in toplog_lines :
#Put the line number in the array if the timestamp matches with the regular expression
if r_top_timestamp.search(toplog_line) != None:
timestamp_list.append(roop_cnt)
roop_cnt += 1
'''--------------------------
Extract the process of the target User
--------------------------'''
rows_count = 0
tmp_timestamp = ''
tmp_output_csv_list = []
output_csv_list = []
for toplog_line in toplog_lines :
#Count the number of TOP lines at one time+1
rows_count +=1
#Remove line breaks at the end of lines
toplog_line = toplog_line.rstrip()
#If only line breaks, go to the next line
if not toplog_line :
continue
#For timestamp lines, add to list
if r_top_timestamp.search(toplog_line) != None:
print(toplog_line)
tmp_timestamp = r_top_timestamp.search(toplog_line).group(1)
#One TOP command result, line count initialization
rows_count = 1
continue
#In case of process line, extract if User matches
if rows_count >= process_row_start:
column_number = 0
row_data = toplog_line.split(" ")
#Set timestamp
tmp_output_csv_list = [tmp_timestamp]
#Repeat until end of line
for column_data in row_data:
if column_data =="":
#If blank, go to the next column
continue
column_number += 1
#Put the data up to the COMMAND column in the tmp list
if column_number <= command_column :
tmp_output_csv_list.append(column_data)
else :
continue
#Check if it is a record to be extracted
#Extraction target USER or user_Records to be extracted when array is not specified
if column_number == user_column :
user_key_flg = True
for key_user in user_array:
#Extraction target USER or user_Records to be extracted when array is not specified
if ( str(column_data) == key_user ):
user_key_flg = True
break
else:
user_key_flg = False
if user_key_flg == True :
pass
else:
break
#If the for statement is exited by something other than break, add it to the CSV extraction list.
else:
output_csv_list.append(tmp_output_csv_list)
'''--------------------------
File writing
--------------------------'''
#CSV export
csv_header = ['timestamp','USER','PR','NI','VIRT','RES','SHR','%CPU','%MEM','TIME+','COMMAND']
with open(f'{output_file_name}','w') as f:
csv_writer = csv.writer( f, delimiter = ',', lineterminator = '\n')
csv_writer.writerow(csv_header)
csv_writer.writerows(output_csv_list)
that's all.
I hope that there will be no tools and only the TOP command results will be passed, reducing the burden on those who are said to "collect".
Recommended Posts