--History
version.cmd
python --version
:: Python 3.7.6
jupyter --version
:: jupyter core : 4.6.1
:: jupyter-notebook : 6.0.3
:: qtconsole : 4.6.0
:: ipython : 7.12.0
:: ipykernel : 5.1.4
:: jupyter client : 5.3.4
:: jupyter lab : 1.2.6
:: nbconvert : 5.6.1
:: ipywidgets : 7.5.1
:: nbformat : 5.0.4
:: traitlets : 4.3.3
import.py
import pandas as pd #A library that provides functions to support data analysis
import numpy as np #Numerical calculation extension module
import matplotlib #Package for data visualization
import matplotlib.pyplot as plt #Interface for automatic plotting, seems
from datetime import datetime as dt #Module for manipulating dates and times
from sklearn.preprocessing import StandardScaler #Module for data standardization
Reference source: Japanese with matplotlib
matplot_japanese.py
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Hiragino Maru Gothic Pro', 'Yu Gothic', 'Meirio', 'Takao', 'IPAexGothic', 'IPAPGothic', 'VL PGothic', 'Noto Sans CJK JP']
figsize.py
plt.figure(figsize=(20,2))
#SQL
import pymysql
import sqlalchemy
from sqlalchemy import create_engine
#Connection information
url = 'mysql+pymysql://root:[email protected]:3306/databasename?charset=utf8'
engine = sqlalchemy.create_engine(url, echo=False)
#Run
query = "SELECT * FROM Table"
dataset = pd.read_sql(query,con = engine)
csv.py
dataset = pd.read_csv("pass of csv", encoding = "utf-8")
dataset.to_csv("pass of csv", encoding="shift-jis")
clipboard.py
!pip install pyperclip
import pyperclip
pyperclip.copy(STR_XXX)
info.py
dataset.info()
dataset.describe() #max,min,mean,std,Quartile, etc.
index.py
#Indexing data frames
dataset2 = dataset.set_index('StoreCD')
#Type change
dataset['colum']=dataset['colum'].astype(int)
--Decompose the crosstabbed data into simple ROW data
melt_concat.py
#Data frame decomposition
meltDF1 = pd.melt(dataset,id_vars='index_column',var_name='horizon_axis_column_name',value_name='value_column_name')
#Combine data frames
concatDF = pd.concat([meltDF1,meltDF2])
null.py
dataset.fillna(0,inplace=True) #0 fill
dataset.isnull() #Check throughout the data frame
dataset.isnull().any() #Check by column
dummy.py
target_col = 'a'
str_colmns = ['b','c','d'] #Non-numeric column
dummie_cols = ['b'] #Columns that you want to dummy in non-numeric columns
exclude_cols = [col for col in str_colmns if col not in dummie_cols] #
#Dummy variable
df = pd.get_dummies(data=df, columns=dummie_cols)
#Specify the column to be used as a feature from the column after dummyization
feature_cols = [col for col in df.columns if col not in exclude_cols]
corr.py
corrDF = df[feature_cols].corr()
heatmap.py
import seaborn as sns
sns.heatmap(corrDF,annot=False) #annot=Display numbers with True
distplot.py
import seaborn as sns
g=sns.FacetGrid(df,hue="target_column",height=3)
g.map(sns.distplot,"feature_column",kde=False)
g.add_legend()
--You can set linked colors
color.py
flatui = ['#969696', '#DA5019']
sns.set_palette(flatui)
comment ――Because it is troublesome to find out from the past code and copy and paste
Recommended Posts