distances = [1, 1.6, 3, 5, 10, 21.097, 42.195]
print(dist_in_mile)
[0.62137, 0.994192, 1.86411, 3.1068499999999997, 6.213699999999999, 13.10904289, 26.21870715]
# list comprehension
dist_in_mile = [d * 0.62137 for d in distances]
print(dist_in_mile)
[0.62137, 0.994192, 1.86411, 3.1068499999999997, 6.213699999999999, 13.10904289, 26.21870715]
# generator
dist_in_mile = (d * 0.62137 for d in distances)
print(list(dist_in_mile))
[0.62137, 0.994192, 1.86411, 3.1068499999999997, 6.213699999999999, 13.10904289, 26.21870715]
# lambda expression + map
dist_in_mile = list(map(lambda d : d * 0.62137, distances))
print(dist_in_mile)
[0.62137, 0.994192, 1.86411, 3.1068499999999997, 6.213699999999999, 13.10904289, 26.21870715]
A = [
[1, 2],
[4, 5]
]
B = [
[4, 3],
[2, 1]
]
# C = ?
print(C)
[[8, 5], [26, 17]]
def get_mat_dot(A, B):
I = len(A)
K_A = len(A[0])
K_B = len(B)
J = len(B[0])
if K_A != K_B:
raise ValueError("shapes ({},{}) and ({},{}) not aligned: {} (dim 1) != {} (dim 0)".format(I, K_A, K_B, J, K_A, K_B))
C = [[0 for j in range(J)] for i in range(I)]
for i in range(I):
for k in range(K_A):
for j in range(J):
C[i][j] += A[i][k] * B[k][j]
return C
A = [
[1, 2],
[4, 5]
]
B = [
[4, 3],
[2, 1]
]
get_mat_dot(A, B)
[[8, 5], [26, 17]]
A = [
[1, 2],
[4, 5]
]
B = [
[4, 3],
[2, 1],
[4, 9]
]
get_mat_dot(A, B)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-12-1100e305aa2a> in <module> 9 ] 10 ---> 11 get_mat_dot(A, B) <ipython-input-10-1982f18a70b8> in get_mat_dot(A, B) 5 J = len(B[0]) 6 if K_A != K_B: ----> 7 raise ValueError("shapes ({},{}) and ({},{}) not aligned: {} (dim 1) != {} (dim 0)".format(I, K_A, K_B, J, K_A, K_B)) 8 C = [[0 for j in range(J)] for i in range(I)] 9 for i in range(I): ValueError: shapes (2,2) and (3,2) not aligned: 2 (dim 1) != 3 (dim 0)
import numpy as np
distances = [1, 1.6, 3, 5, 10, 21.097, 42.195]
distances = np.array(distances)
dist_in_mile = distances * 0.62137
print(dist_in_mile)
[ 0.62137 0.994192 1.86411 3.10685 6.2137 13.10904289 26.21870715]
import numpy as np
A = [
[1, 2],
[4, 5]
]
B = [
[4, 3],
[2, 1]
]
A = np.array(A)
B = np.array(B)
C = A.dot(B)
print(C)
[[ 8 5] [26 17]]
NumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object, various derived objects (such as masked arrays and matrices), and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier transforms, basic linear algebra, basic statistical operations, random simulation and much more.
創建一種稱為 ndarray 的類別,彌補了原生 list 缺少的向量化運算(vectorization)功能
list
的差異¶At the core of the NumPy package, is the ndarray object. This encapsulates n-dimensional arrays of homogeneous data types, with many operations being performed in compiled code for performance.
np.array()
函數由既有的 list 轉換而成np.array()
函數由既有的 list 轉換而成¶import numpy as np
homogeneous_list = [1, 2, 3, 4, 5]
print(type(homogeneous_list))
arr = np.array(homogeneous_list)
print(type(arr))
print(arr)
print(arr.dtype)
<class 'list'> <class 'numpy.ndarray'> [1 2 3 4 5] int64
dtype
參數指定資料型態¶int
:可指定 8 到 64 位元的整數型態float
:可指定 16 到 64 位元的浮點數型態bool
:布林型態import numpy as np
homogeneous_list = [1, 2, 3, 4, 5]
arr = np.array(homogeneous_list, dtype=int)
print(arr.dtype)
arr = np.array(homogeneous_list, dtype=float)
print(arr.dtype)
int64 float64
#arr = np.array(1, 2, 3, 4) # WRONG
arr = np.array([1, 2, 3, 4]) # RIGHT
np.zeros(shape)
創建指定外觀充滿 0 的數值陣列np.ones(shape)
創建指定外觀充滿 1 的數值陣列np.full(shape, fill_value)
創建指定外觀充滿 fill_value 的數值陣列np.arange(start, stop, step)
創建從 start (包含)間隔 step 至 stop (不包含)的等差數列,使用方式同內建函數 range()
np.linspace(start, stop, num)
創建從 start (包含)至 stop (包含)的均勻切割為 num 個資料點的數值陣列np.random.random(size)
創建指定外觀介於 0, 1 之間、並符合均勻分佈的數值陣列np.random.normal(loc, scale, size)
創建指定外觀以 loc 為平均數、 scale 為標準差常態分佈的數值陣列np.random.randint(low, high, size)
創建指定外觀於 low (包含)到 high (不包含)之間隨機抽樣之正整數的數值陣列np.zeros(shape)
np.ones(shape)
np.full(shape, fill_value)
import numpy as np
print(np.zeros(5, dtype=int))
print(np.ones((2, 2), dtype=float))
print(np.full((2, 2), 5566, dtype=int))
[0 0 0 0 0] [[1. 1.] [1. 1.]] [[5566 5566] [5566 5566]]
np.arange(start, stop, step)
創建從 start (包含)間隔 step 至 stop (不包含)的等差數列,使用方式同內建函數 range()
np.linspace(start, stop, num)
創建從 start (包含)至 stop (包含)的均勻切割為 num 個資料點的數值陣列import numpy as np
print(np.arange(1, 10, 2))
print(np.linspace(1, 9, 5, dtype=int))
[1 3 5 7 9] [1 3 5 7 9]
np.random.random(size)
np.random.normal(loc, scale, size)
np.random.randint(low, high, size)
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
arr = np.random.random(10000)
fig = plt.figure()
ax = plt.axes()
ax.hist(arr)
plt.show()
arr = np.random.normal(0, 1, 10000)
fig = plt.figure()
ax = plt.axes()
ax.hist(arr)
plt.show()
print(np.random.randint(1, 7, size=6))
[1 2 1 5 1 6]
arr.ndim
:檢視 arr 有幾個維度arr.shape
:檢視 arr 的外型arr.size
:檢視 arr 的資料筆數,對一維陣列的意涵就像內建函數 len()
作用在 list 上一般arr.dtype
:檢視 arr 中同質資料的型態import numpy as np
zero_d = np.array(5566) # 零維陣列,純量
one_d = np.array([55, 66, 5566]) # 一維陣列
two_d = np.ones((3, 3), dtype=int) # 二維陣列
print("ndim:")
print(zero_d.ndim)
print(one_d.ndim)
print(two_d.ndim)
print("shape:")
print(zero_d.shape)
print(one_d.shape)
print(two_d.shape)
print("size:")
print(zero_d.size)
print(one_d.size)
print(two_d.size)
print("dtype:")
print(zero_d.dtype)
print(one_d.dtype)
print(two_d.dtype)
ndim: 0 1 2 shape: () (3,) (3, 3) size: 1 3 9 dtype: int64 int64 int64
arr.itemsize
檢視 arr 中每一筆資料所使用的位元組(bytes)多寡arr.nbytes
檢視 arr 整個物件所使用的位元組(bytes)多寡,一般來說我們期待 arr.nbytes = arr.itemsize x arr.sizeimport numpy as np
arr = np.arange(1, 6)
print(arr.dtype) # 64 bits
print(arr.itemsize) # 8 bytes = 64 bits
print(arr.itemsize * arr.size)
print(arr.nbytes)
int64 8 40 40
從 ndarray 中取出單個資料值的方式與 list 相同,使用 arr[INDEX]
取值
import numpy as np
arr = np.array([55, 66, 56, 5566])
print("From start to stop:")
print(arr[0])
print(arr[1])
print(arr[2])
print(arr[arr.size - 1])
print("From stop to start:")
print(arr[-1])
print(arr[-2])
print(arr[-3])
print(arr[-arr.size])
From start to stop: 55 66 56 5566 From stop to start: 5566 56 66 55
[i, j, …]
的方式取出位於第 i 列(row)、第 j 欄(column)… 的資料¶import numpy as np
np.random.seed(42)
arr = np.random.randint(1, 10, size=(3, 4))
print(arr)
print(arr[1, 1]) # 3 located at (1, 1)
print(arr[2, -3]) # 4 located at (2, -3)
[[7 4 8 5] [7 3 7 8] [5 4 8 8]] 3 4
import numpy as np
np.random.seed(42)
arr = np.random.randint(1, 10, size=(3, 4))
print(arr)
arr[2, 3] = 5566
print(arr)
arr[2, 3] = 55.66
print(arr)
arr[2, 3] = False
print(arr)
[[7 4 8 5] [7 3 7 8] [5 4 8 8]] [[ 7 4 8 5] [ 7 3 7 8] [ 5 4 8 5566]] [[ 7 4 8 5] [ 7 3 7 8] [ 5 4 8 55]] [[7 4 8 5] [7 3 7 8] [5 4 8 0]]
與 list 相同,使用 arr[start:stop:step]
取出陣列的片段
import numpy as np
arr = np.arange(10, 20)
print(arr[::]) # all defaults
print(arr[::2]) # step=2
print(arr[:5]) # stop=5, exclusive
print(arr[5:]) # start=5, inclusive
print(arr[::-1]) # step=-1, reverse
[10 11 12 13 14 15 16 17 18 19] [10 12 14 16 18] [10 11 12 13 14] [15 16 17 18 19] [19 18 17 16 15 14 13 12 11 10]
arr[start:stop:step]
的規則¶import numpy as np
np.random.seed(0)
arr = np.random.randint(1, 100, size=(10))
odd_indices = [0, 2, 8]
print(arr)
print(arr[odd_indices])
[45 48 65 68 68 10 84 22 37 88] [45 65 37]
import numpy as np
np.random.seed(0)
arr = np.random.randint(1, 100, size=(10))
is_odd = arr % 2 == 1
print(arr)
print(is_odd)
print(arr[is_odd])
[45 48 65 68 68 10 84 22 37 88] [ True False True False False False False False True False] [45 65 37]
arr.reshape(m, n, ...)
將數值陣列重塑成運算所需要的外觀arr.ravel()
將外觀為 (m, n, …) 的數值陣列調整回一維# reshape
import numpy as np
arr = np.arange(1, 10)
print(arr)
print(arr.shape)
print(arr.reshape(3, 3))
print(arr.reshape(3, 3).shape)
[1 2 3 4 5 6 7 8 9] (9,) [[1 2 3] [4 5 6] [7 8 9]] (3, 3)
# reshape conveniently
import numpy as np
arr = np.arange(1, 10)
print(arr.reshape(3, -1))
print(arr.reshape(-1, 3))
[[1 2 3] [4 5 6] [7 8 9]] [[1 2 3] [4 5 6] [7 8 9]]
import numpy as np
arr = np.arange(1, 10).reshape(3, 3)
print(arr.shape)
print(arr.ndim)
print(arr.ravel().shape)
print(arr.ravel().ndim)
(3, 3) 2 (9,) 1
np.set_printoptions(threshold=)
印出更多內容print(np.arange(10000))
print(np.arange(10000).reshape(100,100))
[ 0 1 2 ... 9997 9998 9999] [[ 0 1 2 ... 97 98 99] [ 100 101 102 ... 197 198 199] [ 200 201 202 ... 297 298 299] ... [9700 9701 9702 ... 9797 9798 9799] [9800 9801 9802 ... 9897 9898 9899] [9900 9901 9902 ... 9997 9998 9999]]
# 還原預設值
#np.set_printoptions(edgeitems=3,infstr='inf',
# linewidth=75, nanstr='nan', precision=8,
# suppress=False, threshold=1000, formatter=None)
ndarray 有一個重要的預設特性稱為「不複製」,因此不論在切割或重新宣告的情境中都是創建陣列的 View,而非複製另一個陣列,這代表著對以 View 型式存在的子陣列(Sub-array)更新會改動到原始陣列
import numpy as np
arr = np.arange(1, 10)
mat = arr.reshape(3, 3)
mat[1, 1] = 5566
print(mat)
print(arr)
[[ 1 2 3] [ 4 5566 6] [ 7 8 9]] [ 1 2 3 4 5566 6 7 8 9]
import numpy as np
arr = np.arange(1, 10)
mat = arr.copy()
mat = mat.reshape(3, 3)
mat[1, 1] = 5566
print(mat)
print(arr)
[[ 1 2 3] [ 4 5566 6] [ 7 8 9]] [1 2 3 4 5 6 7 8 9]
np.concatenate([arr0, arr1, ...], axis)
np.vstack([arr0, arr1, ...])
np.hstack([arr0, arr1, ...])
import numpy as np
upper_arr = np.arange(1, 5).reshape(2, 2)
lower_arr = np.arange(5, 9).reshape(2, 2)
print("Merge with np.concatenate():")
print(np.concatenate([upper_arr, lower_arr])) # default axis=0
print("Merge with np.vstack():")
print(np.vstack([upper_arr, lower_arr]))
Merge with np.concatenate(): [[1 2] [3 4] [5 6] [7 8]] Merge with np.vstack(): [[1 2] [3 4] [5 6] [7 8]]
axis=1
的時候效果與 np.hstack()
函數相同,是以水平方向進行合併¶import numpy as np
left_arr = np.zeros(4, dtype=int).reshape(-1, 1)
right_arr = np.ones(4, dtype=int).reshape(-1, 1)
print("Merge with np.concatenate():")
print(np.concatenate([left_arr, right_arr], axis=1))
print("Merge with np.hstack():")
print(np.hstack([left_arr, right_arr]))
Merge with np.concatenate(): [[0 1] [0 1] [0 1] [0 1]] Merge with np.hstack(): [[0 1] [0 1] [0 1] [0 1]]
np.split()
np.vsplit()
np.hsplit()
import numpy as np
arr = np.arange(11, 21)
arr0, arr1, arr2 = np.split(arr, [2, 5])
print(arr0, arr1, arr2)
[11 12] [13 14 15] [16 17 18 19 20]
import numpy as np
arr = np.arange(24).reshape(6, 4)
print(arr)
print("======")
arr0, arr1, arr2 = np.vsplit(arr, [1, 3])
print(arr0)
print("======")
print(arr1)
print("======")
print(arr2)
[[ 0 1 2 3] [ 4 5 6 7] [ 8 9 10 11] [12 13 14 15] [16 17 18 19] [20 21 22 23]] ====== [[0 1 2 3]] ====== [[ 4 5 6 7] [ 8 9 10 11]] ====== [[12 13 14 15] [16 17 18 19] [20 21 22 23]]
import numpy as np
arr = np.arange(24).reshape(4, 6)
print(arr)
print("======")
arr0, arr1, arr2 = np.hsplit(arr, [1, 3])
print(arr0)
print("======")
print(arr1)
print("======")
print(arr2)
[[ 0 1 2 3 4 5] [ 6 7 8 9 10 11] [12 13 14 15 16 17] [18 19 20 21 22 23]] ====== [[ 0] [ 6] [12] [18]] ====== [[ 1 2] [ 7 8] [13 14] [19 20]] ====== [[ 3 4 5] [ 9 10 11] [15 16 17] [21 22 23]]
%timeit
得知若想以迭代對一百萬筆隨機整數進行「倒數」的運算要花多少時間import numpy as np
long_arr = np.random.randint(1, 101, size=1000000)
%timeit [1/i for i in long_arr]
323 ms ± 3.47 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
long_arr = np.random.randint(1, 101, size=1000000)
%timeit 1 / long_arr
2.17 ms ± 254 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
np.add()
:同 + 運算符np.subtract()
:同 - 運算符np.multiply()
:同 * 運算符np.divide()
:同 / 運算符np.power()
:同 ** 運算符np.floor_divide()
:同 // 運算符np.mod()
:同 % 運算符import numpy as np
arr = np.array([2, 2, 2, 2, 2])
powers = np.arange(1, 6)
print(arr)
print(powers)
print(arr**powers)
print("Broadcasting:")
print(2 ** powers)
[2 2 2 2 2] [1 2 3 4 5] [ 2 4 8 16 32] Broadcasting: [ 2 4 8 16 32]
import numpy as np
arr = np.arange(9)
print(arr % 2 == 0)
print(arr[arr % 2 == 0])
[ True False True False True False True False True] [0 2 4 6 8]
import numpy as np
arr = np.floor(100* np.random.random(20))
print(arr)
[61. 34. 49. 86. 81. 34. 39. 18. 40. 22. 22. 67. 66. 28. 30. 23. 51. 65. 34. 1.]
print(ans)
[34. 86. 34. 18. 40. 22. 22. 66. 28. 30. 34.]
import numpy as np
arr = np.floor(100* np.random.random(50))
arr = arr.astype(int)
print(arr)
[ 9 77 75 40 11 32 69 49 48 99 41 95 45 42 27 88 9 40 77 35 90 59 64 14 40 32 43 45 60 5 37 73 14 9 30 64 51 17 54 35 87 60 73 8 70 87 16 73 48 15]
print(primes)
[11 41 59 43 5 37 73 17 73 73]
np.vectorize()
轉換為一個通用函數¶import numpy as np
def is_prime(x):
div_cnt = 0
for i in range(1, x+1):
if x % i == 0:
div_cnt += 1
if div_cnt > 2:
break
return div_cnt == 2
is_prime_ufunc = np.vectorize(is_prime)
print(arr)
print("Whether or not it is a prime:", )
print(is_prime_ufunc(arr))
print(arr[is_prime_ufunc(arr)])
[ 9 77 75 40 11 32 69 49 48 99 41 95 45 42 27 88 9 40 77 35 90 59 64 14 40 32 43 45 60 5 37 73 14 9 30 64 51 17 54 35 87 60 73 8 70 87 16 73 48 15] Whether or not it is a prime: [False False False False True False False False False False True False False False False False False False False False False True False False False False True False False True True True False False False False False True False False False False True False False False False True False False] [11 41 59 43 5 37 73 17 73 73]
通用與聚合函數最大差異點就在於輸入與輸出的數值陣列長度,不同於通用函數,聚合函數所輸出的數值陣列多數僅有長度 1,或遠小於輸入數值陣列的長度。
import numpy as np
mat = np.arange(1, 16).reshape(3, 5).astype(float)
print(mat)
print(np.sum(mat)) # 1 個輸出
print(np.sum(mat, axis=0)) # 5 個輸出
print(np.sum(mat, axis=1)) # 3 個輸出
[[ 1. 2. 3. 4. 5.] [ 6. 7. 8. 9. 10.] [11. 12. 13. 14. 15.]] 120.0 [18. 21. 24. 27. 30.] [15. 40. 65.]
mat[2, 4] = np.nan
print(mat)
print(np.sum(mat))
print(np.nansum(mat))
[[ 1. 2. 3. 4. 5.] [ 6. 7. 8. 9. 10.] [11. 12. 13. 14. nan]] nan 105.0
hw()
array([[ 1, 2, 3, 4, 5, 6, 7, 8, 9], [ 2, 4, 6, 8, 10, 12, 14, 16, 18], [ 3, 6, 9, 12, 15, 18, 21, 24, 27], [ 4, 8, 12, 16, 20, 24, 28, 32, 36], [ 5, 10, 15, 20, 25, 30, 35, 40, 45], [ 6, 12, 18, 24, 30, 36, 42, 48, 54], [ 7, 14, 21, 28, 35, 42, 49, 56, 63], [ 8, 16, 24, 32, 40, 48, 56, 64, 72], [ 9, 18, 27, 36, 45, 54, 63, 72, 81]])
arr = np.arange(10)
hw(arr)
3.0276503540974917
arr = np.arange(1)
hw(arr)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-62-5f30b5046560> in <module> 1 arr = np.arange(1) ----> 2 hw(arr) <ipython-input-60-b3445b322417> in hw(x) 3 def hw(x): 4 if x.size == 1: ----> 5 raise ValueError("The length of array must be larger than 1.") 6 return x.std(ddof=1) ValueError: The length of array must be larger than 1.