――I will summarize how to use Pandas, which I usually use, for each purpose as my memorandum. --Scheduled to be updated regularly.
# Extract columns from dataframe and go to series
aaa = df['sts']
# The first line is output as "first time"
for index, item in enumerate(aaa):
if index < 1:
print ("first time")
else:
print ("second and subsequent times")
# Define the columns to retrieve as an array
use_idx = ['aaa','bbb','ccc']
# Extract the defined columns and create a new dataframe
df2 = df[ use_idx ]
# Convert date to datetime type
data['date'] = pd.to_datetime(date['date'])
# Extract the day of the week from the date (SUN = 6, MON = 0)
date['day_of_week'] = date['date'].dt.dayofweek
data = [1,8,14,56]
# Name the column with the optional name
s = pd.Series(data, name=['day', 'hour', 'c1', 'c2'])
name = df.columns.values
df = df.drop('AAAA', axis = 1)
# Combine strings to existing column names
newcol = []
for i in df.columns:
print(i + "_2day")
newcol.append(i + "_2day")
# Respecify column name
df.columns = newcol
# Sorting
dfl = df.sort_values(by="A")
# Multiple sorts
dfl = df.sort_values(by=["A","B","C","D"])
# Join by column
df_union = pd.concat( [df, df2], axis=1 )
# Join by line
df_union = pd.concat( [df, df2], axis=0 )
* Add the "sort = False" option as a countermeasure against changing the order of columns.
# Inner join on condition of key
df = pd.merge(left, right, on='key')
# Left outer join on condition of key
df = pd.merge(left, right, on='key', how = 'left')
"""
The combination method is specified by the how keyword.
inner: Default. Inner join. Leave only the keys contained in both data.
left: Left outer join. Leave all the keys for the first data.
right: Right outer join. Leave all the keys for the second data.
outer: Full outer join. Leave all keys.
"""
# Set index to xxxx column
df.set_index('xxxx')
# Reset index
# Sequential number is set to index and original index is added to data column
df.reset_index('xxxx')
# Reset index
# The old index is deleted and the new index is set
df.reset_index(drop=True)
"""
df2 = df.reset_index(drop=True)
Note that reset_index will not be reflected unless you do the above ()
"""
#dataframe df and series df_Combine ser
#series is to_frame()Convert to dataframe using
df2 = pd.concat( [ df, df_ser.to_frame() ], axis=1 )
--The datetime type in pandas is timestamp, which can be used in almost the same way. --When changing to datetime type, use the following function.
to_pydatetime()
hoge = pd.DataFrame(hoge)
#ascending order
hoge = hoge.sort_index()
#descending order
hoge = hoge.sort_index(ascending=False)
#One parameter
sql "select * from aaa where opstime >= %s;"
df = pd.read_sql(sql, con=conn, params=("2020-02-13"))
#Two parameters
sql "select * from aaa where opstime >= %s and opstime <= %s;"
df = pd.read_sql(sql, con=conn, params=("2020-02-13","2020-02-15"))
#Create a data frame with 3 rows and 3 columns and a null value
df = pd.DataFrame(index=range(3), columns=['a', 'b' , 'c'])
Recommended Posts