本博客讲解的案例来源于Journal of the American Statistical Association期刊(顶刊)上的内容:
Blei D M, Kucukelbir A, McAuliffe J D. Variational inference: A review for statisticians[J]. Journal of the American Statistical Association, 2017, 112(518): 859-877.
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
classgmm():
def__init__(self, data, num_clusters, sigma=1):
'''initialization
model parameters(m,s2,phi)--need update
'''self.data = data
self.K = num_clusters
self.n = data.shape[0]
self.sigma = sigma
self.phi = np.random.random([self.n,self.K]) # phi(matrix n*k)self.m = np.random.randint(np.min(data), np.max(data), self.K).astype(float) # m(matrix 1*k)self.s2 = np.random.random(self.K) # s2(matrix 1*k)defcompute_elbo(self):
'''calculate ELOB '''
p1 = -np.sum((self.m**2 + self.s2) / (2 * self.sigma**2))
p2 = (-0.5 * np.add.outer(self.data**2, self.m**2 + self.s2) + np.outer(self.data, self.m))*(self.phi)
p3 = -np.sum(np.log(self.phi))
p4 = np.sum(0.5 * np.sum(np.log(self.s2)))
elbo_c = p1 + np.sum(p2) + p3 + p4
return elbo_c
defupdate_bycavi(self):
'''update (m,s2,phi)'''# phi update
e = np.outer(self.data, self.m) + (-0.5 * (self.m**2 + self.s2))[np.newaxis,:] #[np.newaxis,:] is to matrixself.phi = np.exp(e) / np.sum(np.exp(e), axis=1)[:, np.newaxis] #normalization K*N matrix#m updateself.m = np.sum(self.data[:, np.newaxis] * self.phi, axis=0)/(1.0 / self.sigma**2 + np.sum(self.phi, axis=0))
# s2 updateself.s2 = 1.0 / (1.0 / self.sigma**2 + np.sum(self.phi, axis = 0))
deftrainmodel(self, epsilon, iters):
'''train model
epsilon: epsilon-convergence
iters: iteration number
'''
elbo = []
elbo.append(self.compute_elbo())
# use cavi to update elbo until epsilon-convergencefor i inrange(iters):
self.update_bycavi()
elbo.append(self.compute_elbo())
print("elbo is: ", elbo[i])
if np.abs(elbo[-1] - elbo[-2]) <= epsilon:
print("iter of convergence:",i)
breakreturn elbo
defplot(self,size):
sns.set_style("whitegrid")
for i inrange(int(self.n/size)):
sns.distplot(data[size*i: (i+1)*size], rug=True)
x = np.linspace(self.m[i] - 3*self.sigma, self.m[i] + 3*self.sigma, 100)
plt.plot(x,mlab.normpdf(x, self.m[i], self.sigma),color='black')
plt.show()
if __name__ == "__main__":
# generate data
number = 1000
clusters = 5
mu = np.array([1, 5, 7, 9, 15])
data = []
# x-N(Mu,1)for i inrange(clusters):
data.append(np.random.normal(mu[i], 1, number))
# concatenate data
data = np.concatenate(np.array(data))
model = gmm(data, clusters)
model.trainmodel(1e-3, 1000)
print("converged_means:", sorted(model.m))
model.plot(number)
执行该程序,控制台输出的结果为:
在这里插入图片描述
另外,matplotlib.pyplot绘图的结果为:
在这里插入图片描述
参考论文
Blei D M, Kucukelbir A, McAuliffe J D. Variational inference: A review for statisticians[J]. Journal of the American Statistical Association, 2017, 112(518): 859-877.