--Integrate a large number of CSV files into one CSV file.
--Prepare data without CSV file header. --Collect the CSV files you want to integrate in a folder. --Specify the output file name of the result of integration.
import csv, os
import pandas as pd
#Specify the folder containing the CSV file(1)Reference
csv_folder_path = os.path.join(".","csv_folder", "headerRemoved")
#Get a list of file names in list format
csv_files_list = os.listdir(csv_folder_path)
#Create a list to store the lines in all csv files
csv_rows=[]
#Specify the file name from the file list to be read, and csv all lines_Store in rows list.
for csv_filename in csv_files_list:
csv_file_obj = open(os.path.join(csv_folder_path, csv_filename))
reader_obj = csv.reader(csv_file_obj)
for row in reader_obj:
csv_rows.append(row)
csv_file_obj.close()
#Convert list to dataframe type.
df = pd.DataFrame(csv_rows)
#Specify the range of columns to export (0 to 44 only)(3)Export range
df = df.iloc[:,range(0,44)]
#Convert dataframe to csv and save(2)Output file name
df.to_csv(os.path.join(".","merged_file.csv"), index=False)
--Get a list of filenames from the folder that contains the CSV file --Create a file object for each file according to the file name list, create a Reader object from it, and read lines from the file line by line. --Repeat on all files and finally collect all lines from all files in one list --Convert the list to DataFrame type before exporting. --Here, the range of columns required before exporting is specified. --Finally, write to a CSV file with the file name specified in pd.to_csv. At this time, index is not written by setting index = False.
--By the way, since it is converted to df, you can retrieve the display of only any column. The 12th and 38th columns can be specified in this order by doing the following.
df.iloc[:,[38,12]]
――Next, I want to plot various graphs from df.
Recommended Posts