For automated processing that creates csv input / output and converts all at once I didn't see the material in one place, so it's a memorandum.
Library call
import csv
import pandas as pd
import numpy as np
import os
import glob
from pathlib import Path
Shift-JIS / sjis files for each folder are converted to UTF-8 all at once and output to another folder.
###################################################
#UTF files of Shift JIS in a specific folder-Convert to 8
#Read what is in the in folder and keep the original
#Save to out folder
print("Shift JIS to UTF-8 Start!")
p = Path("./in")
files = list (p.glob("*.csv"))
for file in files:
shift_jis_file = pd.read_csv(file,encoding='Shift_JISx0213')
file_path = f'out/{file.name}'
shift_jis_file.to_csv(file_path)
print("Shift JIS to UTF-8 End!")
###################################################
Read the file name to be processed (This time, list the file names you want to process in filelist.csv)
#Read the list of files you want to process
#(Only the file name is listed in one column)
filelist = pd.read_csv('./filelist.csv')
#Loop processing for each file
for index, row in filelist.iterrows():
#Read the file name to be processed line by line
a_list=str(row.values)
#Input / output path definition: Read file name in parentheses, variable storage without apostrophe, folder name and join
inputpath=str('./out/'+a_list)
inputpath=inputpath.replace("'", '')
inputpath=inputpath.replace('[', '')
inputpath=inputpath.replace(']', '')
#Input / output path definition: Read file name in parentheses, variable storage without apostrophe, folder name and join
outputfolder=str('./converted/')
outputpath=str('./converted/'+a_list)
outputpath=outputpath.replace("'", '')
outputpath=outputpath.replace('[', '')
outputpath=outputpath.replace(']', '')
#Input / output path definition: Read file name in parentheses, variable storage without apostrophe, folder name and join
mergedpath=str('./merged/tsestockdata.csv')
mergedpath=mergedpath.replace("'", '')
mergedpath=mergedpath.replace('[', '')
mergedpath=mergedpath.replace(']', '')
Actual raw data file (processing of file converted to UTF-8), Column addition operations such as deleting unnecessary lines and adding specific character strings all at once
#Read file
df = pd.read_csv(inputpath, header=None)
#Extract a specific value and add the extracted value to the last column of the column
tradingdate=inputpath[-14:-4]
df['TradingDate']=tradingdate
##########Delete unnecessary rows, rename columns
df.columns = df.iloc[0]
df = df.reindex(df.index.drop(0)).reset_index(drop=True)
df = df.reindex(df.index.drop(0)).reset_index(drop=True)
df.columns.name = None
df.columns = ['Rank', 'Code', 'Market','Company','EndingPrice','PriceChange','PercentChange','Volume','TradingDate']
Individual export of csv
##########Csv export process for deleting unnecessary rows and renaming column names
df.to_csv(path_or_buf=outputpath, sep=',', na_rep='', float_format=None, columns=None, header=True,
index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None,
quotechar='"', line_terminator='\n', chunksize=None, date_format=None,
doublequote=True, escapechar=None, decimal='.')
print("Convert File Processed:"+a_list)
Merge csv files (because it is easier to call one file aggregated by BI)
##########Merge all csv files in a specific folder
DATA_PATH = outputfolder
All_Files = glob.glob('{}*.csv'.format(DATA_PATH))
list2 = []
for file in All_Files:
list2.append(pd.read_csv(file))
df = pd.concat(list2, sort=False)
df.to_csv(mergedpath, encoding='utf_8')
print("Merge Process End!!")
##########
Recommended Posts