import numpy as np
import pandas as pd
import sys
# encoding
print(sys.getdefaultencoding())
# numpy
x = np.array([[1,2,3],[4,5,6]],dtype=np.float64)
#Read text
y = np.loadtxt('text1',delimiter=',',skiprows=0,comments='#')
pandas.DataFrame
df=pd.read_csv('stock.csv',encoding='Shift_jis',names=('index','dekidaka','owarine'))
df.head() #First 5 lines
pd.DataFrame({'a':[1,2,3],'b':[4,5,6]})
df.loc[:,['index','owaine']]
df.loc[100:115,['index','dekidaka']]
df.iloc[1:22,1:3] # 1 ~ 3 index,dekidaka,owarine
df.iloc[:,[0,2]] # 0 and 2 index,owarine
df.iloc[::2] #Even
df.iloc[1::2] #Odd
df['index'] < '1900' # False or True
df[(df['index'] == '1900')] # 171.0 1883, 49800 261
#merge
samp1 = pd.read_csv('sample1.csv',encoding='Shift_jis')
samp2 = pd.read_csv('sample2.csv',encoding='Shift_jis')
samp3 = pd.read_csv('sample3.csv',encoding='Shift_jis')
# concat(Vertical connection)
conc=pd.concat([samp1,samp2],ignore_index=True)
# merge(Horizontal connection)
merg=pd.merge(conc,samp3[["label1","label2"]],on="label1",how="left") #
#Data extraction
merg["label2_y"] # only label2_y 1000 ~ 1003
merg[["label2_x","label2_y"]].iloc[:,0:2] #
####################
#Summary statistics
merg["label1"].iloc[1:5].describe()
#Addition
merg["label1"] + merg["label2_y"] #Addition
#total
merg["label1"].sum()
#Missing value
merg.isnull()
#Missing information
merg.isnull().sum()
#Maximum value+minimum value
print(merg.max() + merg.min())
#Data type confirmation
merg.dtype
#Type conversion float64 ⇒ numeric
merg["label1"]=pd.to_numeric(merg["label1"])
# float64 ⇒ datetime
merg["label1"].dt.strftime("%Y%m")
# grouping
merg.groupby(["label1"]).sum()["label2_y"]
#Unique number
print(len(pd.unique(merg.label3))) #18 pieces, just having a space on the left end is considered different
#Align lowercase letters to uppercase
merg["label3"]=merg["label3"].str.upper()
print(len(pd.unique(merg.label3))) #17
merg["label3"]=merg["label3"].str.replace(" ","")
print(len(pd.unique(merg.label3))) #16
#sort
merg.sort_values(by=["label1"],ascending=True)
Recommended Posts