#python
--sql
[]( "Order to display on this page" python pandas numpy matplotlib Other sql R )
[]( How to write this page
‘‘‘ python code comment ・ Write all comments here, not in the text )
python
#python
df_used_method1_used_method2\
= df.copy()\
.used_method1()\
.used_method2()\
1.Description of the function itself
"""Write a description here"""
2.Library import
import pandas as pd
3.Variable specification
START_DATE = 20200527
4.main()Definition of the function used in
def func1(parameter1:str) -> pd.DataFrame:
5.main()Definition of
def main()
6.
if __name__ == "main":
main()
def func1(parameter_path:str, parameter_date:int, parameter_df:pd.DataFrame) -> pd.DataFrame:
"""
Write a docstring
"""
#main()It seems necessary when taking an argument in
SQL
--sql
select
datamart1.column1
, datamart1.column2
, datamart2.column3
from
datamart1
left join
datamart2
on = column2
where
datamart1.column1 > 10
;
select
column1
, column2
, sum(column3)
, avg(column4)
from
datamart1
group by
column1
, column2
;
SQL rules
#python
--sql
#python
--sql
#python
#Extracted in series format
df.loc[:,'column']
df['column']
df.column
#Extract in data frame format
df.loc[:,['column1','column2']]
df[['column1','column2']]
--sql
select
column1
from
datamart
;
select
*
from
datamart
;
#python
df.head(10)
--sql
select top(10)
column1
from
datamart
;
select
column1
from
datamart
limit
10
;
#python
df.drop_duplicates()
--sql
select unique
*
from
datamart
;
#python
df_assign\
= df.copy()\
.assign(column = 10)
df['column'] = 10
--sql
select
10 as column1
from
dataframe
;
#python
df_query_loc\
= df.copy()\
.query('column1 == 10')\
.loc[:,['column1','column2']]
df[df.column1 == 10, ['column1','column2']]
--sql
select
column1
from
datamart1
where
column1 == 10
;
select
column1
, count(column1)
from
datamart1
group by
column1
having
count(column1) == 10
;
#python
df.rename(columns = {'column1':'new_column1','column2':'new_column2'})
pd.DataFrame(df.values, columns = ['new_column1','new_column2'])
df.columns = ['new_column1','new_column2']
df.columns = 'new_' + df.columns
--sql
select
column1 as new_column1
from
datamart1
;
#python
pd.merge(
df1,
df2,
on = ['column1','column2'],
how = 'left')
pd.merge(
df1,
df2,
left_on = ['column1','column2'],
right_on = ['column3','column4'],
how = 'left')
df1.merge(
df2,
on = 'column1',
how = 'left')
--sql
select
datamart1.column1
, datamart1.column2
, datamart2.column3
from
datamart1
left join
datamart2
on = column2
#python_pandas
#Vertical
pd.concat([df1,df2],axis=0)
df1.append(df2)
#side
pd.concat([df1,df2],axis=1)
#python_numpy
#Vertical
np.concatenate([nparray1,nparray2], axis=0)
#side
np.concatenate([nparray1,nparray2], axis=1)
--sql
select
column1
from
datamart1
union -- union all / intersect / except
select
column2
from
datamart2
#python
len(df)
--sql
select
count(*)
from
datamart1
;
#python
#Shape check
np.array().shape
#python
df.sort_values()
--sql
select
*
from
datamart1
order by
column1
#python
--sql
#python
#Generate nparray with all the same values
#Example, 3D
np.tile(Numbers, (n, n, n))
[i for i in range(100)]
#When using an if statement
[i for i in range(100) if i > 50]
#python
df\
.groupby(['column1','column2'])\
.agg({'column3':'sum',
'column4':'mean'})\
.reset_index()
-- sql
select
column1
, column2
, sum(column3)
, avg(column4)
from
datamart1
group by
column1
, column2
;
df_tmp\
.groupby('column1')\
.agg({'column2':'nunique'})
df_tmp\
.groupby('column1')\
.agg({'column2':['sum', 'min', 'max']})
#python
#average
np.average(nparray, axis = n)
#For data of two or more dimensions, the result depends on the average dimension
np.array().mean()
#division
np.array(nparray1, nparray2)
#Get in list format
str_data.split('Specific string')
str_data[-n]
str_data[:-n]
str_data.find('The string to look for')
#If the string is included"1"If not included"-1"
#python
import cv2
cv2.imread('filename.png')
#python
import PIL
PIL.image.open('filename.png')
#python
cv2.imshow("image_name", cv2.imread('filename.png'))
#"image_name"Is optionally set
#python
PIL.image.open('filename.png').show()
#python
#Image data np conversion
image = PIL.image.open('filename.png')
np.array(image)
#python
np.array(image)[Top pixel:Lower pixel,Left pixel:Right pixel]
#python
#Add a square
plt.figure()
plt.grid(false)
cv2.rectangle(np.array(image), (Right pixel,Top pixel), (Left pixel,Lower pixel), (255, 255, 255), 4)
plt.imshow(np.array(image),cmap='gray',vmin=0,vmax=65555)
signal.convolve2D
#Change the number of images to extract with the last number
Image.open('~.tif').seek(0)
#python
plt.figure(figsize = (6,4))
#python
plt.x_lim([right,left])
plt.y_lim([Up,under])
#python
aspect = (ax.get_x_lim()[1] - ax.get_xlim()[1]) / (ax.get_y_lim()[1] - ax.get_y_lim()[0])
ax.set_aspect(aspect)
#python
fig.patch.set_alpha(0)
#Only fig is supported
#python
#Do not display grid lines
#python
#Use 1, subplot,
plt.subplot(Vertical,side,1)
plt.plot(~)
plt.subplot(Vertical,side,2)
plt.plot(~)
↓
plt.subplot(Vertical,side,Vertical×side)
plt.show(~)
#2、add_Use subplot,
ax1 = plt.figure().add_subplot(Vertical,side,1)
ax1 = plot(~)
↓
plt.show()
#3, use subplots,
fig, axes = plt.subplots(~, figsize=(6,4)) #fig, is not used
axes[1].plot
#python
#Process using the library
#python
import time
#Beginning of processing
time_start = time.time()
#End of processing
time_end = time.time()
print(time_end-time_start)
import datetime
from dateutil.relativedelta import relativedelta
date_datetime = datetime.datetime.strptime('2020-05-24', '%Y-%m-%d')
import datetime
from dateutil.relativedelta import relativedelta
datetime.datetime.strftime(date_datetime, '%Y-%m-%d')
date_datetime + datetime.timedelta(days=7)
date_datetime + relativedelta(weeks = 1)
datetime.weekday(date_datetime)
#python
#directory"src"Read the function from the following file
import sys
sys.path.append('src/')
from filename.py import func_name
#python
import pathlib
filepath = r'path_file name.csv'
read_file = pathlib.WindowPath()
import glob
glob.glob('path/*')
import os
if os.path.exists(path_name) == False
os.makedirs(path_name)
#python_pandas
#Read df
pd.DataFrame(data,columns=[column1,column2])
pd.read_csv('filename')
#python_numpy
#Loading npz
np.load(filename)
#python_pandas
#Read df
pd.DataFrame().to_csv('filename', index=False)
#python_numpy
#Save npz
np.savez(pathname, filename1 = np.array('1'), filename2 = np.array('2'))
#python_matplotlib
#Save image
plt.savefig('filename')
#python_sklearn
#Save model
import sklearn
import pickle
reg = sklearn.LinearRegression().fit(x,y)
with open('filename.pickle', mode = 'wb', as f
pickle.dump(reg, f)
#zip
for i_column1, i_column2 in zip(df_tmp.column1, df_tmp.column2):
#itertools
for i,j in
permutations(i,j):permutation
combinations(i,j):combination
products(i,j):Cartesian product
for i in enumerate(df_tmp.column):
#def, use
def_func_name(parameter1, parameter2):
processing
return Return value
#Use lambda,
lambda parameter:Return value formula(parameter)
#Use lambda,_Expressed in one line
(lambda parameter:Return value formula(parameter))(parameter_instance)