http://rest-term.com/archives/2999/ http://algorithm.joho.info/programming/python-numpy-sample-code/ Il y a un bon résumé dedans, il suffit donc de les regarder, mais je vais me faire une note pour réparer ma mémoire. (Un anglais approprié est également écrit en fonction de diverses circonstances)
Numpy
>>> import numpy as np
>>> x = np.array([1, 2, 3])
>>> x
array([1, 2, 3])
>>> y = np.array([[1, 2, 3], [4, 5, 6]])
>>> y
array([[1, 2, 3],
[4, 5, 6]])
>>> y.shape
(2, 3)
>>> m = np.arange(0, 30, 2)
>>> m
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])
>>> np.linspace(1, 4, 9)
array([ 1. , 1.375, 1.75 , 2.125, 2.5 , 2.875, 3.25 , 3.625, 4. ])
>>> m = np.arange(0, 30, 2)
>>> m.reshape(3, 5)
array([[ 0, 2, 4, 6, 8],
[10, 12, 14, 16, 18],
[20, 22, 24, 26, 28]])
>>> m
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28])
>>> m = np.arange(0, 30, 2)
>>> m.resize(3, 3)
>>> m
array([[ 0, 2, 4],
[ 6, 8, 10],
[12, 14, 16]])
>>> np.ones((4, 3))
array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
>>>
>>> np.ones((2, 3), int)
array([[1, 1, 1],
[1, 1, 1]])
>>> np.zeros((4, 3))
array([[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.],
[ 0., 0., 0.]])
>>> np.eye(5)
array([[ 1., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0.],
[ 0., 0., 1., 0., 0.],
[ 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 1.]])
>>> np.diag([[ 1, 3, 5], [ 7, 9, 11], [13, 15, 17]])
array([ 1, 9, 17])
>>> np.array([1, 2, 3] * 3)
array([1, 2, 3, 1, 2, 3, 1, 2, 3])
>>> np.repeat([1, 2, 3], 3)
array([1, 1, 1, 2, 2, 2, 3, 3, 3])
>>> x = np.array([[1, 2, 3]])
>>> y = np.array([[4, 5, 6], [7, 8, 9]])
>>> np.vstack([x, y])
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
>>> x = np.array([[1, 2], [3, 4]])
>>> y = np.array([[5, 6, 7], [8, 9, 0]])
>>> np.hstack([x, y])
array([[1, 2, 5, 6, 7],
[3, 4, 8, 9, 0]])
>>> np.random.randint(0, 10, (4, 3))
array([[6, 7, 8],
[5, 4, 9],
[5, 4, 9],
[5, 9, 2]])
>>> np.random.randint(0, 10, (4, 3))
array([[5, 7, 5],
[8, 4, 3],
[2, 9, 6],
[7, 9, 5]])
>>> x = np.array([[1, 2, 3], [4, 5, 6]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> y = np.array([[7, 8, 9], [10, 11, 12]])
>>> y
array([[ 7, 8, 9],
[10, 11, 12]])
>>> x + y
array([[ 8, 10, 12],
[14, 16, 18]])
>>> x + x + y
array([[ 9, 12, 15],
[18, 21, 24]])
>>> x * y
array([[ 7, 16, 27],
[40, 55, 72]])
>>> x ** 2
array([[ 1, 4, 9],
[16, 25, 36]])
>>> x ** 3
array([[ 1, 8, 27],
[ 64, 125, 216]])
>>> x.dot(y)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: shapes (2,3) and (2,3) not aligned: 3 (dim 1) != 2 (dim 0)
>>>
>>> z = np.array([[1], [2], [3]])
>>> z
array([[1],
[2],
[3]])
>>> x.dot(z)
array([[14],
[32]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.T
array([[1, 4],
[2, 5],
[3, 6]])
>>> x.T.T
array([[1, 2, 3],
[4, 5, 6]])
>>>
>>> z
array([[1],
[2],
[3]])
>>> z.T
array([[1, 2, 3]])
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>>
>>> x.dtype
dtype('int64')
>>>
>>> x.astype('f')
array([[ 1., 2., 3.],
[ 4., 5., 6.]], dtype=float32)
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.max()
6
>>> np.max(x)
6
>>> x.min()
1
>>> np.min(x)
1
>>> x.sum()
21
>>> np.sum(x)
21
>>> x.mean()
3.5
>>> np.mean(x)
3.5
>>> np.average(x)
3.5
>>> x.std()
1.707825127659933
>>> np.std(x)
1.707825127659933
>>> x
array([[1, 2, 3],
[4, 5, 6]])
>>> x.argmax()
5
>>> x.argmin()
0
>>>
>>> y = np.array([[1, 2, 3], [1, 2, 3]])
>>> y
array([[1, 2, 3],
[1, 2, 3]])
>>> y.argmax()
2
>>> y.argmin()
0
>>> s = np.arange(13) ** 2
>>> s
array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100, 121, 144])
>>> s[0]
0
>>> s[11]
121
>>> s[0:3]
array([0, 1, 4])
>>> s[0], s[11], s[0:3]
(0, 121, array([0, 1, 4]))
>>> s[-4:]
array([ 81, 100, 121, 144])
>>> s[-4:-1]
array([ 81, 100, 121])
>>> s[-4::-1]
array([81, 64, 49, 36, 25, 16, 9, 4, 1, 0])
>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r[2, 2]
14
>>> r[3, 3:6]
array([21, 22, 23])
>>> r[3, 3:7]
array([21, 22, 23])
>>> r[:2, :-1]
array([[ 0, 1, 2, 3, 4],
[ 6, 7, 8, 9, 10]])
>>> r[:-1, ::2]
array([[ 0, 2, 4],
[ 6, 8, 10],
[12, 14, 16],
[18, 20, 22],
[24, 26, 28]])
>>> r[r > 30]
array([31, 32, 33, 34, 35])
>>> r[r > 20]
array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])
>>> r[r > 20] = 20
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 20, 20, 20],
[20, 20, 20, 20, 20, 20],
[20, 20, 20, 20, 20, 20]])
>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r2 = r[2:4, 2:4]
>>> r2
array([[14, 15],
[20, 21]])
>>>
>>> r2[:] = -1
>>> r2
array([[-1, -1],
[-1, -1]])
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, -1, -1, 16, 17],
[18, 19, -1, -1, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
r2 = r [2: 4, 2: 4]
, la référence est passée à r2
, donc éditer r2
signifie éditer r
. ..>>> r = np.arange(36)
>>> r.resize((6, 6))
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
>>>
>>> r2 = r[2:4, 2:4].copy()
>>> r2
array([[14, 15],
[20, 21]])
>>>
>>> r2[:] = -1
>>> r2
array([[-1, -1],
[-1, -1]])
>>> r
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23],
[24, 25, 26, 27, 28, 29],
[30, 31, 32, 33, 34, 35]])
r2 = r [2: 4, 2: 4] .copy ()
, le nouveau tableau copié de r
est passé à r2
, donc r2
et r
sont séparés. objet. La modification de «r2» n'affecte pas «r».>>> r = np.random.randint(0, 10, (4, 3))
>>> r
array([[1, 6, 3],
[3, 6, 0],
[4, 9, 3],
[5, 9, 3]])
>>>
>>> for row in r:
... print(row)
...
[1 6 3]
[3 6 0]
[4 9 3]
[5 9 3]
>>>
>>> for i, row in enumerate(r):
... print(i, ' : ', row)
...
0 : [1 6 3]
1 : [3 6 0]
2 : [4 9 3]
3 : [5 9 3]
>>> r
array([[1, 6, 3],
[3, 6, 0],
[4, 9, 3],
[5, 9, 3]])
>>> r2 = r ** 2
>>> r2
array([[ 1, 36, 9],
[ 9, 36, 0],
[16, 81, 9],
[25, 81, 9]])
>>> for x, y, z in zip(r, r2, r):
... print(x, y, z)
...
[1 6 3] [ 1 36 9] [1 6 3]
[3 6 0] [ 9 36 0] [3 6 0]
[4 9 3] [16 81 9] [4 9 3]
[5 9 3] [25 81 9] [5 9 3]
Pandas
Series
>>> s = pd.Series([168, 180, 174, 190, 170, 185, 179, 181, 175, 169, 182, 177, 180, 171])
>>>
>>> pd.cut(s, 3)
0 (167.978, 175.333]
1 (175.333, 182.667]
2 (167.978, 175.333]
3 (182.667, 190]
4 (167.978, 175.333]
5 (182.667, 190]
6 (175.333, 182.667]
7 (175.333, 182.667]
8 (167.978, 175.333]
9 (167.978, 175.333]
10 (175.333, 182.667]
11 (175.333, 182.667]
12 (175.333, 182.667]
13 (167.978, 175.333]
dtype: category
Categories (3, object): [(167.978, 175.333] < (175.333, 182.667] < (182.667, 190]]
>>>
>>> pd.cut(s, 3, labels=['Small', 'Medium', 'Large'])
0 Small
1 Medium
2 Small
3 Large
4 Small
5 Large
6 Medium
7 Medium
8 Small
9 Small
10 Medium
11 Medium
12 Medium
13 Small
dtype: category
Categories (3, object): [Small < Medium < Large]
Dataframe
Le tableau des médailles des Jeux Olympiques de tous les temps est utilisé comme exemple de données.
>>> df[df['Gold'] == max(df['Gold'])].index[0]
'United States'
>>> df[(df['Gold'] > 0) & (df['Gold.1'] > 0)]
Ce qui suit est utilisé comme exemple de données. / Les exemples de données sont les suivants:
>>> import pandas as pd
>>> staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR'},
... {'Name': 'Sally', 'Role': 'Course liasion'},
... {'Name': 'James', 'Role': 'Grader'}])
>>> staff_df = staff_df.set_index('Name')
>>> student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business'},
... {'Name': 'Mike', 'School': 'Law'},
... {'Name': 'Sally', 'School': 'Engineering'}])
>>> student_df = student_df.set_index('Name')
>>>
>>> staff_df
Role
Name
Kelly Director of HR
Sally Course liasion
James Grader
>>>
>>> student_df
School
Name
James Business
Mike Law
Sally Engineering
Obtenez des données sur l'étudiant ou le personnel
>>> pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)
Role School
Name
James Grader Business
Kelly Director of HR NaN
Mike NaN Law
Sally Course liasion Engineering
Obtenez des données sur les étudiants et le personnel
>>> pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True)
Role School
Name
James Grader Business
Sally Course liasion Engineering
Obtenez des données sur le personnel. Si le personnel est également un étudiant, récupérez également les données de l'école. / Obtenez des données sur le personnel. Si le personnel est également étudiant, obtenez les données de l'école.
>>> pd.merge(staff_df, student_df, how='left', left_index=True, right_index=True)
Role School
Name
Kelly Director of HR NaN
Sally Course liasion Engineering
James Grader Business
Obtenez des données sur les étudiants. Si l'étudiant est également un membre du personnel, obtenez également des données de rôle. / Obtenez des données sur l'étudiant. Si l'étudiant fait également partie du personnel, obtenez les données sur le rôle.
>>> pd.merge(staff_df, student_df, how='right', left_index=True, right_index=True)
Role School
Name
James Grader Business
Mike NaN Law
Sally Course liasion Engineering
>>> products = pd.DataFrame([{'Product ID': 4109, 'Price': 5.0, 'Product': 'Suchi Roll'},
... {'Product ID': 1412, 'Price': 0.5, 'Product': 'Egg'},
... {'Product ID': 8931, 'Price': 1.5, 'Product': 'Bagel'}])
>>> products = products.set_index('Product ID')
>>> products
Price Product
Product ID
4109 5.0 Suchi Roll
1412 0.5 Egg
8931 1.5 Bagel
>>> invoices = pd.DataFrame([{'Customer': 'Ali', 'Product ID': 4109, 'Quantity': 1},
... {'Customer': 'Eric', 'Product ID': 1412, 'Quantity': 12},
... {'Customer': 'Anda', 'Product ID': 8931, 'Quantity': 6},
... {'Customer': 'Sam', 'Product ID': 4109, 'Quantity': 2}])
>>> invoices
Customer Product ID Quantity
0 Ali 4109 1
1 Eric 1412 12
2 Anda 8931 6
3 Sam 4109 2
>>>
>>> pd.merge(products, invoices, how='right', left_index=True, right_on='Product ID')
Price Product Customer Product ID Quantity
0 5.0 Suchi Roll Ali 4109 1
1 0.5 Egg Eric 1412 12
2 1.5 Bagel Anda 8931 6
3 5.0 Suchi Roll Sam 4109 2
>>> staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 'Role': 'Director of HR'},
... {'First Name': 'Sally', 'Last Name': 'Brooks', 'Role': 'Course liasion'},
... {'First Name': 'James', 'Last Name': 'Wilde', 'Role': 'Grader'}])
>>> student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 'School': 'Business'},
... {'First Name': 'Mike', 'Last Name': 'Smith', 'School': 'Law'},
... {'First Name': 'Sally', 'Last Name': 'Brooks', 'School': 'Engineering'}])
>>> staff_df
First Name Last Name Role
0 Kelly Desjardins Director of HR
1 Sally Brooks Course liasion
2 James Wilde Grader
>>> student_df
First Name Last Name School
0 James Hammond Business
1 Mike Smith Law
2 Sally Brooks Engineering
>>> pd.merge(staff_df, student_df, how='inner', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])
First Name Last Name Role School
0 Sally Brooks Course liasion Engineering
>>> df.groupby('A').agg('sum')
>>> df.groupby('A').agg({'B': sum})
Recommended Posts