Summary for myself Updated from time to time Describe the command you used and examined ** I just need to know myself, so the term may be wrong in some places **
!! View
#command
#Argument option description
import pandas
Series
#One-dimensional data object
#Array in my image
ser = pandas.Series()
DataFrame
#Two-dimensional data object
#My image is similar to a DB table
df = pandas.DataFrame()
#x,y,...Sort in order of
df.sort_values(x,y,...)
#Removed the argument index.
df.drop(x)
axis = 1 #Delete column
#Combine data frames
df.merge(x,y,on=z) #join x and y tables with z column as key
suffixes=() #Suffix to be added when there are duplicate items Separated by commas, the first is the left df suffix, and the second is the right suffix.
#Swap rows and columns
df.transpose()
df.concat([x,y,z,...])
#A list of dfs you want to combine into a list of arguments
#Maximum value
df.max()
#minimum value
df.min()
#Extract information for each item in the data frame
df.info()
#No special arguments are required
#Extract by row number / column number
df.iloc[line,Column] #Argument:And all
#Extract by row name / column name
df.loc[line,Column] #Argument:And all
#WHERE IN in SQL
df.isin()
#Arguments are lists, etc.
#Returns the record for the argument from the beginning
df.head()
#Get median value
df.median()
#Replace Nan
df.filna()
#Get summary statistics
df.describe()
#Return the following statistics in DataFrame for all numeric columns
#count:Number of elements
#unique:Number of unique (unique) value elements
#top:Mode
#freq:Mode frequency (number of occurrences)
#mean:Arithmetic mean
#std:standard deviation
#min:minimum value
#max:Maximum value
#50%:Median
#25%: 1/Quartile
#75%: 3/Quartile
Groupby
#groupby
group = df.groupby()
as_index=False #If False, the reference value of aggregation will not be an index
how = left,right,outer
#Item name in argument
#number
group.size()
#Aggregate specific items in various ways
df.agg({'Items to be aggregated':['Aggregation method list']
#Read csv. This is when reading data whose delimiter is a comma
df.read_csv()
encoding: #Specify the character code
header= #Set what row the column name is
name= #Set column name
dtype= #Specify data type with dictionary type
sep= #Specifying the delimiter
engine=
usecols = #Specify the column to read in the list.
#Read table. This is when the delimiter reads the tab data
df.read_table()
encoding: #Specify the character code
header= #Set what row the column name is
name= #Set column name
#Have the DB read
df.read_sql()
#The first argument is SQL
#The second argument is the connection object
df.to_csv()
encoding= #Character code
index= #Output index together or default is True
#Overwrite column name
df.columns = [list]
df.rename(columns={Current column name:New column name})
#Overwrite index
df.index = [list]
#Column name / index name change
df.rename({Current name: New name})
axis=1 #Change column name. If not specified, it will be the line name.
#Reindex
df.reset_index()
drop=True #Delete existing index
#Insert line
df.[Column name] = x
#Replace
df.replace({Current character:New character}) #The argument is dictionary type{Character to replace:Character after replacement}
#Insert argument as new line, insert line is list, Series, numpy.array
df.append()
#Add column
df.assign()
#Apply function to each column / row
df.apply()
axis=1 #Line by line
axis=0 #By column
#Function as an argument. Lambda style is fine.
#Extract the DataFrame line by line and apply it to for.
for index,row in df.iterrows()
#The return value is index and other row elements
This article is very easy to understand. ↓ Recursion Substitution Eradication Committee for Data Processing in Python / pandas
#bar graph
df.plot.bar()
#Distinguishing Nan
df.isnull()
#Remove Nan
df.dropna()
axis=1 #Delete column.
#Replace nan
df.fillna()
#Find duplicate lines
#The return value is True if it overlaps with the index, otherwise False column
df.dupulicated()
keep = False #If you do not specify, you cannot retrieve it as a duplicate.
#Remove completely duplicate lines
df.drop_duplicates()
#Pair plot
grr = pd.scatter_matrix(df)
#df is the data to plot
c= #Value to scale
figsize=(x,y) #Figure size
marker= #Marker shape
hist_kwds={} #Histogram settings
s= #Marker size
alpha= #Transparency
Recommended Posts