Tips for Numpy

作者 Lutein 日期 2018-04-20
Tips for Numpy

create array

arr = np.arange(10)

np.full((3, 3), True, dtype=bool)
np.ones((3,3), dtype=bool)
#> array([[ True, True, True],
#> [ True, True, True],
#> [ True, True, True]], dtype=bool)

#get the common items
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.intersect1d(a,b)
#> array([2, 4])
#remove b items in a
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])

# From 'a' remove all of 'b'
np.setdiff1d(a,b)
#> array([1, 2, 3, 4])

#create random float array in range(5,10) with given size
rand_arr = np.random.uniform(5,10, size=(5,3))

condition

#find odd numbers
# Input
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

arr[arr % 2 == 1]
#> array([1, 3, 5, 7, 9])

#change odd numbers to -1
arr[arr % 2 == 1] = -1
#> array([0, -1, 2, -1, 4, -1, 6, -1, 8, -1])

#change odd numbsers without changing original array
arr = np.arange(10)
out = np.where(arr % 2 == 1, -1, arr)

#get the index of common items
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.where(a == b)

#get numbers in given range
a = np.arange(15)

# Method 1
index = np.where((a >= 5) & (a <= 10))
a[index]

# Method 2:
index = np.where(np.logical_and(a>=5, a<=10))
a[index]
#> (array([6, 9, 10]),)

# Method 3: (thanks loganzk!)
a[(a >= 5) & (a <= 10)]

shape

#reshape
arr = np.arange(10)
arr.reshape(2, -1) # Setting to -1 automatically decides the number of cols
#> array([[0, 1, 2, 3, 4],
#> [5, 6, 7, 8, 9]])

#stack horizontally
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

# Answers
# Method 1:
np.concatenate([a, b], axis=0)

# Method 2:
np.vstack([a, b])

# Method 3:
np.r_[a, b]
#> array([[0, 1, 2, 3, 4],
#> [5, 6, 7, 8, 9],
#> [1, 1, 1, 1, 1],
#> [1, 1, 1, 1, 1]])

#stack vertically
np.concatenate([a, b], axis=1)

# Method 2:
np.hstack([a, b])

# Method 3:
np.c_[a, b]

#create from known array
a = np.array([1,2,3])
np.r_[np.repeat(a, 3), np.tile(a, 3)]
#> array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])

#swap two columns
arr[:, [1,0,2]]
#reverse rows
arr[::-1]
#reverse colunms
arr[:, ::-1]

function

#将用于标量的函数向量化,使之可以用到array上
#例如定义函数maxx为找到x,y中的更大值
def maxx(x,y):
if x >= y:
return x
else:
return y
maxx(1,5) #output 5

pair_max = np.vectorize(maxx, otype=[float])
a = np.array([3,1,4])
b = np.array([4,2,0])
pair_max(a,b)#[4.0, 2.0, 4.0]

print

#主要是set_printoptions函数
np.set_printoptions(threshold=6)#显示6个数字的长度,如果不压缩全部显示则设为np.nan
np.set_printoptions(precision=3)#小数点后3位

tips for DL

# 如何在读取数据集时保留数据格式:dtype=object
# Solution
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

# Print the first 3 rows
iris[:3]
#> array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
#> [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
#> [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)

#计算均值,中位数,标准差
mu, med, sd = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
#normalization to (0,1)
S = (S - S.min())/S.ptp()

#判断是否有空元素
np.isnan(iris_2d).any()