--I want to delete all the heading lines of a large number of csv files ――I'm using Jupyter Notebook
--This file is placed in the current directory --Put the csv file you want to header process in the Machine11_trd folder in the current directory.
import csv, os
#Create the path of the folder you want to process(1)Reference
csv_folder_path = os.path.join(".","InputFolder")
#Create the path of the output destination folder(2)Destination
headerRemoved_path = os.path.join(csv_folder_path,"headerRemoved")
#Create a new folder to contain the header-deleted files.
os.makedirs(headerRemoved_path, exist_ok=True)
#Loop all files in the specified folder under the current directory
#Extract the file name list in the specified folder
for csv_filename in os.listdir(csv_folder_path):
if not csv_filename.endswith(".csv"):
continue #Skip if it is not a csv file
#print("Heading is being deleted"+ csv_filename + "...")
#Read line by line from CSV file(Skip the first line)
csv_rows=[]
csv_file_obj = open(os.path.join(csv_folder_path, csv_filename))
reader_obj = csv.reader(csv_file_obj)
for row in reader_obj:
#Do not add on the first or second line,Others will be added(3)Header condition
if reader_obj.line_num == 1 or reader_obj.line_num == 2:
continue #Skip the target line
csv_rows.append(row)
csv_file_obj.close()
#Export CSV file to specified folder
csv_file_obj = open(os.path.join(headerRemoved_path, csv_filename),"w",newline="")
csv_writer = csv.writer(csv_file_obj)
for row in csv_rows:
csv_writer.writerow(row)
csv_file_obj.close()
--Skip the first and second rows and save all rows in csv_rows [](change as needed) --Create a new CSV file in the headerRemove folder created in the current directory and write it out. --Do these with all CSVs in the target folder.
--At first, I tried to do it with datafram, but I couldn't read the ones with different numbers of columns well, so I read the list line by line and converted the list to dataframe type.
Recommended Posts