import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
import pandas as pd
input = {'a': ['red', 'yellow', 'blue'], 'b': [0.5, 0.25, 0.125]}
df = pd.DataFrame(input)
import pandas as pd
#Excel
df = pd.read_excel('file name.xlsx')
# CSV
df = pd.read_csv('filename.csv', low_memory=False, sep=',', delim_whitespace=False, names=col_names, header=True)
train.describe(include='all')
import seaborn as sns
sns.pairplot(df, vars=df.columns, hue="target")
df.isnull().sum()
df.nunique()
df.value_counts()
df3['Column name'].plot.hist(bins=40)
#In index order
df.sort_index()
One Hot Encoding
import pandas as pd
import numpy as np
df = pd.DataFrame({'A': ['Senior citizens', 'adult', 'adult', "Toddler", "Toddler"], 'B': [2020,2020,2021,2021,1993],
'C': [1.0, 2.0, 1.0, np.nan, np.inf], "D":[0,1,2,3,4]})
pd.get_dummies(df, columns=["A", "B"])
#One Hot
df = pd.get_dummies(df, columns=["Column name"], drop_first=True)
#Get only rows that meet the conditions
df = df[df['Column name'] ==value]
#Label names with the word "curry" at 1 and names without the word "curry" at 0
train['curry'] = train['name'].apply(lambda x : 1 if x.find("curry") >=0 else 0)
#Combine Dataframes vertically
pd.concat([df1, df2, df3], axis=0, ignore_index=True)
#Combine Dataframes horizontally
pd.concat([df1, df2, df3], axis=1)
#Rename column
df = df.rename(columns={'Change before':'After change'})
#Add column
df = df.assign('Column name'='value')
#Delete column
df = df.drop('Column name', axis=1)
#Delete lines that contain even one NULL
df = df.dropna(how='any')
#Replace NULL
df = df.fillna({'Column name':value})
One Hot Decode
animals = pd.DataFrame({"monkey":[0,1,0,0,0],"rabbit":[1,0,0,0,0],"fox":[0,0,1,0,0]})
def get_animal(row):
for c in animals.columns:
if row[c]==1:
return c
animals.apply(get_animal, axis=1)
#csv output
df.to_csv('file name.csv', index=False)
Recommended Posts