A complete personal note for data analysis.
I haven't put short things like `df.head ()`
because I made it so that I can copy it.
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 100)
import seaborn as sns
import matplotlib.pyplot as plt
import japanize_matplotlib
import os
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
filename = "hoge.csv"
df = pd.read_csv(filename, encoding='utf-8')
dirname = "/foo/bar/.../"
filename = "hoge.csv"
filepath = os.path.join(dirname, filename)
df = pd.read_csv(filepath, encoding='utf-8')
filename = "huga.csv"
df.to_csv(filename, header=True, index=False)
df = df.rename(columns={"before01":"after01", "before02":"after02"})
df = df.astype({"col": "category"})
df = pd.concat([upper,lower])
df = pd.concat([left,right], axis=1)
LEFT JOIN
df = pd.merge(left, right, on="key", how='left')
df = pd.merge(left, right, left_on="lkey", right_on="rkey", how='left')
df = pd.merge(left, right, left_on=["lkey01", "lkey02"], right_on=["rkey01", "rkey02"], how='left')
GROUP BY
df = df.groupby(by="col01", as_index=False).sum()
df = df.groupby(by=["col01", "col02"], as_index=False).agg({"col01": ['mean', 'count'], "col02":['std', 'var']})
#Reassignment of index(Used as a set with roughly)
df.reset_index(drop=True, inplace=True)
filename = "hoge.csv"
df.to_csv(filename, header=True, index=False)
!pip install pandas-profiling
import pandas_profiling as pdp
profile = pdp.ProfileReport(df)
profile.to_file(outputfile="myoutputfile.html")
After reading the data, do this first.
import collections
lis = ["Alice", "Alice", "Bob", "Bob", "Bob", "Carol"]
c = collections.Counter(lis)
c.most_common(3)
for i tqdm(range(n)):
foo bar
#Intensional expression
[foo for i in tqdm(range(n))]
%%timeit
foo bar
import gc
gc.collect()
list01 = []
list02 = []
for i tqdm(range(n)):
v01 = ???
list01.append(v01)
v02 = ???
list02.append(v02)
df = pd.DataFrame({"col01":list01, "col02":list02})
Recommended Posts