--Calculate multidimensional arrays at once at high speed.
--Example of ndarray
import numpy as np
data = np.random.randn(2,3) #Random numbers in 2 columns and 3 rows
print(data)
# [[-0.4440664 -0.07889544 -0.84781375]
# [ 0.59333292 -0.03008522 1.54106015]]
print(data * 10) #Multiply
# [[-4.44066398 -0.78895438 -8.47813747]
# [ 5.93332925 -0.3008522 15.41060155]]
print(data + data) #Addition
# [[-0.8881328 -0.15779088 -1.69562749]
# [ 1.18666585 -0.06017044 3.08212031]]
print(data.shape) #Number of vertical and horizontal elements
# (2, 3)
print(data.dtype) #Element type
# float64
--Generate with np.array (list)
.
-- np.zeros (10)
np.zeros ((3,6))
creates an ndarray with all elements 0.
--Similarly, np.ones ()
creates an ndarray with all elements 1.
--Fill np.full ()
with the specified value.
--With np.arange (10)
, generate an ndarray in which elements 0 to 9 are entered in order.
import numpy as np
data = [[1, 2, 3, 4], [5, 6, 7, 8]]
arr = np.array(data) #Generate ndarray from list
print(arr)
# [[1 2 3 4]
# [5 6 7 8]]
print(arr.ndim) #dimension
# 2
print(arr.shape) #Element count
# (2, 4)
import numpy as np
arr1 = np.array([-3.7, -1.2, 0.5, 4.5])
print(arr1)
# [-3.7 -1.2 0.5 4.5]
arr2 = arr1.astype(np.int32) #To cast
print(arr2)
# [-3 -1 0 4]
arr3 = np.array(['-3.7', '-1.2', '0.5', '4.5'], dtype=np.string_)
print(arr3)
# [b'-3.7' b'-1.2' b'0.5' b'4.5']
print(arr3.dtype)
# |S4
arr4 = arr3.astype(np.float64) #To cast
print(arr4)
# [-3.7 -1.2 0.5 4.5]
--Calculations of the same size are calculated at the same position. --The operation between ndarrays of different sizes is called broadcast.
import numpy as np
arr1 = np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])
print(arr1 ** 2)
# [[ 1. 4. 9. 16.]
# [25. 36. 49. 64.]]
print(arr1 - arr1)
# [[0. 0. 0. 0.]
# [0. 0. 0. 0.]]
print(1 / arr1)
# [[1. 0.5 0.33333333 0.25 ]
# [0.2 0.16666667 0.14285714 0.125 ]]
arr2 = np.array([[0., 4., 1., 5.], [3., 9., 4., 9.]])
print(arr1 < arr2)
# [[False True False True]
# [False True False True]]
--The index reference cuts out a part from the data. ――You can specify a scalar for a part of the cut out, and the specified value will be propagated to the whole cut out. (broadcast) --Slices are views, not copies.
import numpy as np
arr1 = np.arange(10)
print(arr1)
# [0 1 2 3 4 5 6 7 8 9]
print(arr1[5:8])
# [5 6 7]
arr1[5:8] = 12
print(arr1)
# [ 0 1 2 3 4 12 12 12 8 9]
arr_slice = arr1[5:8]
arr_slice[1] = 12345
print(arr_slice)
# [ 12 12345 12]
print(arr1)
# [ 0 1 2 3 4 12 12345 12 8 9]
--Two-dimensional slice
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) #2D
print(arr2d)
# [[1 2 3]
# [4 5 6]
# [7 8 9]]
print(arr2d[:2]) #Vertical 0,Take out the first
# [[1 2 3]
# [4 5 6]]
print(arr2d[:2, 1:]) #Vertical 0,1st, 1 on the side street,Take out the second
# [[2 3]
# [5 6]]
print(arr2d[1, :2]) #1st in the vertical, 0 in the horizontal road,Take out the first
# [4 5]
print(arr2d[:, :1]) #All vertical, horizontal 0,Take out the first
# [[1]
# [4]
# [7]]
arr2d[:2, 1:] = 0 #Substitute everything in that range
print(arr2d)
# [[1 0 0]
# [4 0 0]
# [7 8 9]]
--Based on the condition of the array with 7 arrays (validity value array), the rows of the two-dimensional array of (7,4) can be extracted. --The number of elements on the axis of the referenced array and the number of elements on the boolean array must match.
import numpy as np
names = np.array(['Yamada', 'Suzuki', 'Sato', 'Yamada', 'Tanaka', 'Tanaka', 'Sato'])
data = np.random.randn(7, 4) #Create an array with 7 rows vertically and 4 columns horizontally with random numbers
print(data)
# [[-0.92866442 -0.81744986 1.11821763 -0.55440628]
# [-0.09511771 0.99145963 0.38475434 0.59748055]
# [ 0.0444708 -0.00381292 0.97888419 1.242504 ]
# [ 0.89214068 -1.0411466 0.90850611 -2.02933442]
# [ 0.78789041 -0.84593788 -0.5624772 0.32488453]
# [ 0.50153002 -0.25411512 0.30855623 -1.31825153]
# [-0.6596584 1.53735231 -0.37044833 1.93782111]]
print(names == 'Yamada') # 'Yamada'Matches, 0,4th returns True
# [ True False False True False False False]
print(data[names == 'Yamada']) #from data to 0,Take out the 4th
# [[-0.92866442 -0.81744986 1.11821763 -0.55440628]
# [ 0.89214068 -1.0411466 0.90850611 -2.02933442]]
mask = (names == 'Yamada') | (names == 'Sato') #You can also take it out with or
print(data[(names == 'Yamada') | (names == 'Sato')])
# [[-0.92866442 -0.81744986 1.11821763 -0.55440628]
# [ 0.0444708 -0.00381292 0.97888419 1.242504 ]
# [ 0.89214068 -1.0411466 0.90850611 -2.02933442]
# [-0.6596584 1.53735231 -0.37044833 1.93782111]]
data[names == 'Yamada'] = 0 # 0,Fourth, put 0
print(data)
# [[ 0. 0. 0. 0. ]
# [-0.09511771 0.99145963 0.38475434 0.59748055]
# [ 0.0444708 -0.00381292 0.97888419 1.242504 ]
# [ 0. 0. 0. 0. ]
# [ 0.78789041 -0.84593788 -0.5624772 0.32488453]
# [ 0.50153002 -0.25411512 0.30855623 -1.31825153]
# [-0.6596584 1.53735231 -0.37044833 1.93782111]]
--Fancy index reference is a method that uses an integer array for index reference. --Unlike slicing, fancy index references always return a copy of the original data
import numpy as np
arr = np.arange(32).reshape(8, 4) # 8,Create an array of 4
print(arr)
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]
# [12 13 14 15]
# [16 17 18 19]
# [20 21 22 23]
# [24 25 26 27]
# [28 29 30 31]]
print(arr[[0, 4, 5, -1]]) # 0, 4, 5, -Returns one line
# [[ 0 1 2 3]
# [16 17 18 19]
# [20 21 22 23]
# [28 29 30 31]]
print(arr[[1, 5, 7, 2], [0, 3, 1, 2]]) # (1,0),(5,3),(7,1),(2,2)return it
# [ 4 23 29 10]
--Transpose of ndarray returns a special view that reconstructs the original matrix. Does not make a copy.
--There are two ways to apply the transpose
function and to refer to T
, which is one of the attributes of ndarray.
import numpy as np
arr1 = np.arange(15).reshape(3, 5) # 3,Create an array of 5
print(arr1)
# [[ 0 1 2 3 4]
# [ 5 6 7 8 9]
# [10 11 12 13 14]]
print(arr1.T)
# [[ 0 5 10]
# [ 1 6 11]
# [ 2 7 12]
# [ 3 8 13]
# [ 4 9 14]]
arr2 = np.arange(24).reshape((2, 3, 4)) # (2,3,4)Create an array of
print(arr2)
# [[[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
#
# [[12 13 14 15]
# [16 17 18 19]
# [20 21 22 23]]]
print(arr2.transpose((1, 0, 2))) #Change the order of the axes,(3,2,4)Made into an array of
# [[[ 0 1 2 3]
# [12 13 14 15]]
#
# [[ 4 5 6 7]
# [16 17 18 19]]
#
# [[ 8 9 10 11]
# [20 21 22 23]]]
-The universal function is a function that returns the investigation result for each element for ndarray.
-Unary ufunc
Takes one ndarray
-Abs, fabs, sqrt, square, exp, log, log10, log2, log1p, sign, cell, floor, rint modf, isnan, isfinite, isinf, cos, sin, tan, etc.
-Two terms ufunc
Take two ndarrays
-Add, subtract, multiply, divide, floor_dvide, power, maximum, fmax, minimum, fmin, mod, copysign, grater, less, equal, logical_and, etc.
--Display the result of sqrt (x ^ 2 + y ^ 2)
for grid point data
--np.meshgrid
takes two one-dimensional functions and enumerates all combinations of each element.
import numpy as np
import matplotlib.pyplot as plt
points = np.arange(-5, 5, 0.01) #1000 grid points
xs, ys = np.meshgrid(points, points) #Returns all combinations
z = np.sqrt(xs ** 2 + ys ** 2)
print(z)
plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()
plt.title("Image plot of $\\sqrt{x^2 + x^2}$ for a grid of values")
plt.show()
--np.where ()
returns the second argument when the first argument is True, and the third argument otherwise.
--Each argument can be a list value or a scalar value.
import numpy as np
import matplotlib.pyplot as plt
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
#Takes x when cond is True, y otherwise
#Comprehension (slow)
reult1 = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)]
#use where
result2 = np.where(cond, xarr,yarr)
print(result2)
# [1.1 2.2 1.3 1.4 2.5]
Specify which axis to process with ʻaxis --ʻArr.sum ()
, ʻarr.mean ()Overall sum and average --ʻArr.sum (axis = 1)
, ʻarr.mean (axis = 0)` In two dimensions, the sum of rows and the average of columns, respectively.
--When ʻarris
np.array,
(arr> 0) .sum () is the number of positive numbers (True). --When
bool = boolean array --If there is True even in
bools.any () 1, then True --
bools.all ()` True if everything is True
--When ʻarris
np.array, sort itself with ʻarr.sort ()
. (Destructive)
--For multidimensional arrays, specify any axis, such as ʻarr.sort (1) `.
--np.unique (arr)
removes duplicates and returns the sorted result.
--np.inld (arr, [2, 3, 6])
is True if the list of arr contains 2,3,6, otherwise False is a list of the same length as arr. return.
--Many use pandas
to read text files and tabular data.
--Here we focus on the binary format.
--To save ʻarr, save it uncompressed with
np.save ('some_array', arr). The extension is automatically added with
.npy. --Similarly, read with ʻarr = np.load ('save_array.npy')
.
--- Save multiple arrs uncompressed with np.savez ('array_archive.npz', a = arr1, b = arr2)
. ʻA,
b are key dictionaries. --Read with ʻarch = np.load ('array_archive.npz')
. Not loadz
.
--Extract ʻarr1 with ʻarch ['a']
. Extract ʻarr2 with ʻarch ['b']
.
--Compress and save with np.savez_compressed ('arrays_compressed.npz',, a = arr1, b = arr2)
. Reading is the same as above.
--For calculating the inner product. Use dot
.
import numpy as np
x = np.array([[1., 2., 3.], [4., 5., 6., ]])
y = np.array([[6., 23.], [-1., 7.], [8., 9.]])
print(x.dot(y)) #inner product
# [[ 28. 64.]
# [ 67. 181.]]
print(np.dot(x, y)) #Another way of writing
# [[ 28. 64.]
# [ 67. 181.]]
print(x @ y) # @Can also be used.
# [[ 28. 64.]
# [ 67. 181.]]
--Use normal for a 4x4 matrix with np.random.normal (size = (4,4))
to generate random numbers based on a normal distribution.
--randint
Returns an integer random number within the given integer range.
import numpy as np
import matplotlib.pyplot as plt
samples = np.random.normal(size=(4,4))
print(samples)
# [[ 1.45907882 1.78873804 -0.52480754 0.20770224]
# [-1.55474475 -1.67045483 -1.3589208 1.25584424]
# [ 0.90562937 -1.50742692 1.48579887 1.48081589]
# [ 1.3478]5606 -0.20653648 0.13308665 -0.24455952]
--Random walk, after 5000 trials, find the index that first reached 30 or -30 at once
import numpy as np
import matplotlib.pyplot as plt
nwalks = 5000
nsteps = 1000
#Randomly generate 0 or 1
draws = np.random.randint(0, 2, size=(nwalks, nsteps))
print(draws)
# [[1 1 1 ... 0 1 1]
# [1 1 0 ... 0 0 1]
# [0 0 1 ... 1 1 0]
# ...
# [0 0 1 ... 0 0 0]
# [0 0 1 ... 1 0 0]
# [1 0 1 ... 1 1 0]]
#0 and 1-Divide into 1 and 1
steps = np.where(draws > 0, 1, -1)
print(steps)
# [[ 1 1 1 ... -1 1 1]
# [ 1 1 -1 ... -1 -1 1]
# [-1 -1 1 ... 1 1 -1]
# ...
# [-1 -1 1 ... -1 -1 -1]
# [-1 -1 1 ... 1 -1 -1]
# [ 1 -1 1 ... 1 1 -1]]
#Add in the horizontal direction
walks = steps.cumsum(1)
print(walks)
# [[ 1 2 3 ... 10 11 12]
# [ 1 2 1 ... -44 -45 -44]
# [ -1 -2 -1 ... -28 -27 -28]
# ...
# [ -1 -2 -1 ... 6 5 4]
# [ -1 -2 -1 ... -6 -7 -8]
# [ 1 0 1 ... 28 29 28]]
print(walks.max())
# 128
print(walks.min())
# -123
#Have you reached 30? True in rows/Return with False
hits30 = (np.abs(walks) >= 30).any(1)
print(hits30)
# [False False True ... True True True]
#30-Number of reached 30
print(hits30.sum())
# 3377
# 30/-Take out the row that reached 30 and find the very first index
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
print(crossing_times)
# [671 313 161 ... 307 289 89]
#average
print(crossing_times.mean())
# 500.09327805744744
#Display on the graph
max_row = walks[(walks == walks.max()).any(1)][0]
min_row = walks[(walks == walks.min()).any(1)][0]
plt.plot(max_row)
plt.plot(min_row)
plt.show()
--Maximum and minimum graph of random walk
reference
--Introduction to Data Analysis with Python 2nd Edition
Recommended Posts