【科學刷題】機器學習崗特有的算法題

時間 2021-08-12

標籤 node python git github 算法數組網絡 app dom ide 欄目 Python 简体版

原文原文鏈接

文章目錄

Alias Sampling

class Solution:

    def __init__(self, w: List[int]):
        N = len(w)
        sum_ = sum(w)
        prob = [p / sum_ for p in w]
        alias = [0] * N
        alias_prob = [p * N for p in prob]
        small_q = []
        large_q = []
        for i, p in enumerate(alias_prob):
            if p < 1:
                small_q.append(i)
            else:
                large_q.append(i)
        while small_q and large_q:
            small = small_q.pop(0)
            large = large_q.pop(0)
            alias[small] = large
            alias_prob[large] -= (1 - alias_prob[small])
            if alias_prob[large] < 1:
                small_q.append(large)
            else:
                large_q.append(large)
        self.alias = alias
        self.N = N
        self.alias_prob = alias_prob

    def pickIndex(self) -> int:
        ix = random.randint(0, self.N - 1)
        p = random.random()
        if p < self.alias_prob[ix]:
            return ix
        else:
            return self.alias[ix]

補充其餘與隨機數相關的算法：git

線性同餘法

linear congruential generator (LCG)github

R a n d S e e d = ( A ∗ R a n d S e e d + B ) % M RandSeed = (A * RandSeed + B) \% M RandSeed=(A∗RandSeed+B)%M算法

LCG的週期最大爲 M，但大部分狀況都會少於M。要令LCG達到最大週期，應符合如下條件：數組

中心極限生成高斯分佈

反函數法網絡

通常，一種機率分佈，若是其分佈函數爲 y = F ( x ) y=F(x) y=F(x)，那麼，y的範圍是0~1，求其反函數 G G G，而後產生0到1之間的隨機數做爲輸入，那麼輸出的就是符合該分佈的隨機數了：app

y = G ( x ) y= G(x) y=G(x)dom

中心極限定理ide

分大量的批次，每一個批次生成12個 [ 0 , 1 ] [0,1] [0,1]間的隨機數，而後求和。求和後的隨機變量方差爲 1 / 12 × 12 = 1 1/12\times 12=1 1/12×12=1，均值爲 1 / 2 × 12 = 6 1/2 \times 12=6 1/2×12=6。而後-6。只要產生的批次足夠多，就能生成整體服從 N ( 0 , 1 ) \mathcal{N}(0,1) N(0,1)的分佈。

import numpy as np
import pylab as plt

n = 12
N = 5000
x = np.zeros([N])
for j in range(N):
    a = np.random.rand(n)
    u = sum(a)
    x[j] = u - n * 0.5
plt.hist(x)
plt.show()

Box Muller

import numpy as np
import pylab as plt

N = 1000
x1 = np.random.rand(1, N)
x2 = np.random.rand(1, N)
y1 = np.sqrt(-2 * np.log(x1)) * np.cos(2 * np.pi * x2)
y2 = np.sqrt(-2 * np.log(x1)) * np.sin(2 * np.pi * x2)
y = np.hstack([y1, y2])
plt.hist(y)
plt.show()

接受拒絕採樣生成高斯分佈

逆採樣(Inverse Sampling)和拒絕採樣(Reject Sampling)原理詳解

接受拒絕採樣求 π \pi π

from math import sqrt
from random import random

def sample_pi(max_cnt=100000):
    acc_cnt=0
    for i in  range(max_cnt):
        x=random()
        y=random()
        if sqrt(x**2+y**2)<1:
            acc_cnt+=1
    print("pi =",acc_cnt/max_cnt*4)

sample_pi()

高斯分佈生成均勻分佈

正態分佈能夠生成均勻分佈嗎?

a r c t a n ( z 0 z 1 ) + 0.5 arctan(\frac{z0}{z1})+0.5 arctan(z1z0)+0.5

生成二維標準正態分佈的方法就是取兩個獨立的標準正態分佈變量X和Y放在一塊兒(X, Y)就好了

而後二維標準正態分佈在直角座標系裏有各向同性，也就是(X, Y)這個點所指的方向和X軸(或者任何一個給定方向)的夾角是均勻分佈的

很好理解， t a n ( θ ) = y x tan(\theta )=\frac{y}{x} tan(θ)=xy

反着用Box-Muller

AUC曲線計算

按照prob對【標籤-樣本】pairs進行排序，若是模型具備良好的排序能力，結果應該是 [ 0 , 0 , 1 , 1 , 1 ] [0,0,1,1,1] [0,0,1,1,1]，正樣本數 M = 3 M=3 M=3，負樣本數 N = 2 N=2 N=2

只考慮正樣本獲取對應的排序值rankList，爲 [ 3 , 4 , 5 ] [3,4,5] [3,4,5]，求和爲12 。右邊公式算出來是6，分母6，結果1，符合定義。

def calAUC(prob,labels):
  f = list(zip(prob,labels))
  rank = [values2 for values1,values2 in sorted(f,key=lambda x:x[0])]
  rankList = [i+1 for i in range(len(rank)) if rank[i]==1]
  posNum = 0
  negNum = 0
  for i in range(len(labels)):
    if(labels[i]==1):
      posNum+=1
    else:
      negNum+=1
  auc = 0
  auc = (sum(rankList)- (posNum*(posNum+1))/2)/(posNum*negNum)
  print(auc)
  return auc

蓄水池抽樣

博客園 - 蓄水池採樣算法

用 O ( N ) O(N) O(N)的時間複雜度從 N N N個數中無放回等可能抽樣 K K K個數

用於不知道數據規模的狀況，保證每一個樣本被抽中的機率是等可能的

算法步驟

假設數據序列的規模爲 n n n，須要採樣的數量的爲 k k k。

首先構建一個可容納 k k k個元素的數組，將序列的前 k k k個元素放入數組中。

而後從第 k + 1 k+1 k+1個元素開始，以 k / n k/n k/n （ n n n表示動態增加的數據規模）的機率來決定該元素是否被替換到數組中（數組中的元素被替換的機率是相同的）。當遍歷完全部元素以後，數組中剩下的元素即爲所需採起的樣本。

class Solution:

    def __init__(self, head: ListNode):
        """ @param head The linked list's head. Note that the head is guaranteed to be not null, so it contains at least one node. """
        self.res=[]
        self.K=1
        self.head=head



    def getRandom(self) -> int:
        """ Returns a random node's value. """
        k=self.K
        p=self.head
        while k and p:
            self.res.append(p.val)
            k-=1
            p=p.next
        p=self.head
        i=0
        while p:
            ix=random.randint(0, i)
            if ix < self.K:
                self.res[ix]=p.val
            i+=1
            p=p.next
        return self.res[0]

字節跳動推薦算法崗面經，三次技術面經過

這題也是蓄水池抽樣

shuffle算法本質上也是蓄水池抽樣，就是動做換成swap

全Shuffle和抽m個Shuffle：

from random import randint


def shuffle(nums):
    n = len(nums)
    for i in range(n):
        ri = randint(0, i)
        nums[i], nums[ri] = nums[ri], nums[i]
    return nums


def shuffle_m(nums, m):
    n = len(nums)
    for i in range(n):
        ri = randint(0, i)
        if ri < m:
            nums[i], nums[ri] = nums[ri], nums[i]
    return nums[:m]


print(shuffle(list(range(10))))
print(shuffle_m(list(range(10)),3))

1 手推公式與實現簡單模型

1.1 實現Logistic Regression

主要背這幾個代碼

交叉熵

L = − 1 N Σ [ y ⋅ log ⁡ ( y ^ ) + ( 1 − y ) ⋅ log ⁡ ( 1 − y ^ ) ] L=-\frac{1}{N}\Sigma[y\cdot \log(\hat{y})+(1-y)\cdot \log(1-\hat{y})] L=−N1Σ[y⋅log(y^)+(1−y)⋅log(1−y^)]

本質上就是信息熵的 − p log ⁡ p -p \log{p} −plogp的公式，不難記憶

loss = -(y @ np.log(y_hat) + (1 - y) @ np.log(1 - y_hat)) / n

y_hat 計算

就sigmoid， σ ( X W ) \sigma(XW) σ(XW)

return 1 / (1 + np.exp(-X @ self.w))

梯度計算

∂ L ∂ W = ( y ^ − y ) X \frac{\partial{L}}{\partial{W}}=(\hat{y}-y)X ∂W∂L=(y^−y)X

dw = (y_hat - y) @ X
dw /= n

class LR:
    def __init__(self, lr=0.1, max_iter=1000, eps=1e-5):
        self.eps = eps
        self.max_iter = max_iter
        self.lr = lr

    def predict_proba(self, X):
        return 1 / (1 + np.exp(-X @ self.w))

    def fit(self, X: np.ndarray, y: np.ndarray):
        n, m = X.shape
        X = np.hstack([X, np.ones([n, 1])])
        m += 1
        self.w = np.zeros([m])
        pre_loss = 0
        for i in range(self.max_iter):
            y_hat = self.predict_proba(X)
            # 交叉熵
            loss = -(y @ np.log(y_hat) + (1 - y) @ np.log(1 - y_hat)) / n
            if abs(loss - pre_loss) < self.eps:
                print('偏差再也不減小')
                break
            pre_loss = loss
            print(loss)
            # 計算梯度
            dw = (y_hat - y) @ X
            dw /= n
            # 梯度降低
            self.w -= dw * self.lr
            print(self.w)

1.2 實現Linear Regression

1.3 實現Kmeans

1.4 實現UserCF

1.5 實現神經網絡

代碼：https://github.com/auto-flow/numpy-machine-learning/

1.5.1 線性層

符號	含義
Z Z Z	線性變換輸出
A A A	激活層輸出
L L L	損失值
L \mathcal{L} L	損失函數

前向傳播：

Z = X W + b Z=XW+b Z=XW+b

A = σ ( Z ) A=\sigma(Z) A=σ(Z)

L = L ( Y , A ) L=\mathcal{L}(Y,A) L=L(Y,A)

反向傳播：

神經元上的梯度

∂ L ∂ Z = ∂ L ∂ A ∂ A ∂ Z \frac{\partial{L}}{\partial{Z}} = \frac{\partial{L}}{\partial{A}} \frac{\partial{A}}{\partial{Z}} ∂Z∂L=∂A∂L∂Z∂A

權重梯度

∂ L ∂ W = ∂ L ∂ A ∂ A ∂ Z ∂ Z ∂ W \frac{\partial{L}}{\partial{W}} = \frac{\partial{L}}{\partial{A}} \frac{\partial{A}}{\partial{Z}} \frac{\partial{Z}}{\partial{W}} ∂W∂L=∂A∂L∂Z∂A∂W∂Z

搞清楚以上概念以後，咱們能夠用PyTorch風格的函數接口寫一個簡單的神經網絡：

Linear線性層的正向傳播與反向傳播：

反正就是 Z = X W + b Z=XW+b Z=XW+b

class Linear(Module):
    def forward(self, x):
        self.input = x
        # Z = XW + b
        self.output = np.dot(self.input, self.weight) + self.bias
        return self.output

反向傳播的話，先求 W W W和 b b b的梯度，再求神經元上的梯度

求 b b b的梯度：

∂ L ∂ b = ∂ L ∂ Z ∂ Z ∂ b \frac{\partial{L}}{\partial{b}} = \frac{\partial{L}}{\partial{Z}} \frac{\partial{Z}}{\partial{b}} ∂b∂L=∂Z∂L∂b∂Z，其中 ∂ Z ∂ b = 1 \frac{\partial{Z}}{\partial{b}} = 1 ∂b∂Z=1,因此b的梯度直接是輸出神經元的梯度 ∂ L ∂ Z \frac{\partial{L}}{\partial{Z}} ∂Z∂L，也就是代碼中的grad

求W的梯度：

∂ L ∂ W = ∂ L ∂ Z ∂ Z ∂ W \frac{\partial{L}}{\partial{W}} = \frac{\partial{L}}{\partial{Z}} \frac{\partial{Z}}{\partial{W}} ∂W∂L=∂Z∂L∂W∂Z，其中 ∂ Z ∂ W = X \frac{\partial{Z}}{\partial{W}} = X ∂W∂Z=X,

這裏須要注意一點，舉個例子，若是當前層的線性映射是 4 × 9 4\times 9 4×9，那麼 g r a d = [ B , 9 ] ， W = [ 4 , 9 ] ， b = [ 9 ] grad=[B, 9]，W=[4, 9]，b=[9] grad=[B,9]，W=[4,9]，b=[9]

求bgrad ，直接在axis=0方面求mean， b g r a d = [ 9 ] bgrad = [9] bgrad=[9]

求wgrad ， X × g r a d = [ B , 4 ] × [ B , 9 ] = [ B , 4 , 9 ] X\times grad=[B,4]\times[B,9]=[B,4,9] X×grad=[B,4]×[B,9]=[B,4,9]，而後對axis=0求mean，獲得 w g r a d = [ 4 , 9 ] wgrad=[4, 9] wgrad=[4,9]

求上一層的grad， ∂ L ∂ X = ∂ L ∂ Z ∂ Z ∂ X = ∂ L ∂ Z W \frac{\partial{L}}{\partial{X}} = \frac{\partial{L}}{\partial{Z}} \frac{\partial{Z}}{\partial{X}} = \frac{\partial{L}}{\partial{Z}} W ∂X∂L=∂Z∂L∂X∂Z=∂Z∂LW， ∂ L ∂ Z \frac{\partial{L}}{\partial{Z}} ∂Z∂L就是下一層的grad

def backward(self, grad):
        self.bgrad = np.mean(grad, axis=0)
        # dL/dW = dL/dZ * dZ/dW = X * dL/dZ
        self.wgrad += np.mean(mul(self.input, grad), axis=0)
        # dL/dX = dL/dZ * W
        # 至關於用權重W對梯度grad作線性變換，將梯度傳到上一層的神經元
        grad = np.dot(grad, self.weight.T)
        return grad

注意，numpy的 [ B , 4 ] × [ B , 9 ] = [ B , 4 , 9 ] [B,4]\times[B,9]=[B,4,9] [B,4]×[B,9]=[B,4,9]是這麼算的：

def mul(A: np.ndarray, B: np.ndarray):
    # A: 100x9 B: 100x1
    # AxB = 100x9x1
    na = A.shape[1]
    nb = B.shape[1]
    A = np.repeat(A[:, :, np.newaxis], nb, 2)
    B = np.repeat(B[:,  np.newaxis,:], na, 1)
    return A * B

1.5.2 激活函數

1.5.2.1 ReLU

ReLU的前向傳播：對於input<0的部分置爲0，至關於引入稀疏性，對於服從標準正態的輸入讓一半的神經元爲0

ReLU的反向傳播：記錄以前的input<0的神經元索引，使這些神經元的梯度爲0

class Relu(Module):
    def __init__(self):
        super(Relu, self).__init__()
        self.input = None
        self.output = None

    def forward(self, x):
        self.input = x
        x[self.input <= 0] *= 0
        self.output = x
        return self.output

    def backward(self, grad):
        grad[self.input > 0] *= 1 # 這麼寫實際上是***子放屁
        grad[self.input <= 0] *= 0
        return grad

1.5.2.2 Sigmoid

σ ( x ) = 1 1 + e x p ( − x ) \sigma(x)=\frac{1}{1+exp(-x)} σ(x)=1+exp(−x)1

σ ′ ( x ) = σ ( x ) ⋅ ( 1 − σ ( x ) ) \sigma^{\prime}(x)=\sigma(x)\cdot (1-\sigma(x)) σ′(x)=σ(x)⋅(1−σ(x))

因此代碼中的output就是在記錄 σ ( x ) \sigma(x) σ(x)值，反向傳播本質就是grad乘以 σ ( x ) ⋅ ( 1 − σ ( x ) ) \sigma(x)\cdot (1-\sigma(x)) σ(x)⋅(1−σ(x))

class Sigmoid(Module):
    def __init__(self):
        super(Sigmoid, self).__init__()
        self.input = None
        self.output = None

    def forward(self, x):
        self.input = x
        self.output = 1 / (1 + np.exp(-self.input))
        return self.output

    def backward(self, grad):
        grad *= self.output * (1 - self.output)
        return grad

1.5.2.3 Tanh

t a n h ( x ) = e x p ( x ) − e x p ( − x ) e x p ( x ) + e x p ( − x ) tanh(x)=\frac{exp(x)-exp(-x)}{exp(x)+exp(-x)} tanh(x)=exp(x)+exp(−x)exp(x)−exp(−x)

記憶：上減下加，左加右減

t a n h ′ ( x ) = 1 − t a n h 2 ( x ) tanh^{\prime}(x)=1-tanh^2(x) tanh′(x)=1−tanh2(x)

class Tanh(Module):
    def __init__(self):
        super(Tanh, self).__init__()
        self.input = None
        self.output = None

    def forward(self, x):
        self.input = x
        self.output = (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
        return self.output

    def backward(self, grad):
        grad *= (1 - np.square(self.output))
        return grad

1.5.3 損失函數

1.5.3.1 MSE

L = L ( y , y ^ ) = 1 2 N Σ ( y − y ^ ) 2 L=\mathcal{L}(y,\hat{y})=\frac{1}{2N}\Sigma(y-\hat{y})^2 L=L(y,y^)=2N1Σ(y−y^)2

∂ L ∂ y ^ = y ^ − y \frac{\partial{L}}{\partial{\hat{y}}}=\hat{y}-y ∂y^∂L=y^−y

注意 y ^ \hat{y} y^放在前面，由於帶負號。

class MSE(object):
    def __init__(self):
        self.label = None
        self.pred = None
        self.grad = None
        self.loss = None

    def __call__(self, pred, label):
        return self.forward(pred, label)

    def forward(self, pred, label):
        self.pred, self.label = pred, label
        self.loss = np.mean(0.5 * np.square(self.pred - self.label))
        return self.loss

    def backward(self, grad=None):
        self.grad = (self.pred - self.label)
        return self.grad