SlideShare une entreprise Scribd logo
1  sur  10
Télécharger pour lire hors ligne
R Code
For
Expectation-Maximization (EM)
Algorithm for Gaussian Mixtures
Avjinder Singh Kaler
This is the R code for EM algorithm. Here, R code is used for 1D, 2D and 3 clusters dataset.
One can modify this code and use for his own project.
Expectation-Maximization (EM) is an iterative
algorithm for finding maximum likelihood estimates of
parameters in statistical models, where the model depends
on unobserved latent variables. The EM iteration alternates
between performing an expectation (E) step, which creates a
function for the expectation of the log-likelihood evaluated
using the current estimate for the parameters, and a
maximization (M) step, which computes parameters
maximizing the expected log-likelihood found on the E step.
Iterate these steps until convergence detect. In simple
language, each iteration consists of an E-step and an M-step.
Convergence is generally detected by computing the value of
the log-likelihood after each iteration and halting when it
appears not to be changing in a significant manner from one
iteration to the next.
#FaithFUL Dataset 1D
#
#load data
#
data(faithful)
Y<-faithful$waiting
#
#plot histogram of waiting times
#
postscript("geyser.eps",width=5,height=3,onefile=F)
par(mar=c(2,2,1,1),mgp=c(1,0.5,0),cex=0.7,lwd=0.5,las=1)
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),main="",axes=F)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,35,5),pos=40,las=1)
dev.off()
#
#log-likelihood function
#
ln<-function(p,Y) {
-sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))))
}
#
#EM algorithm
#
emstep<-function(Y,p) {
EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/
(p[1]*dnorm(Y,p[2],sqrt(p[4]))
+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
p[1]<-mean(EZ)
p[2]<-sum(EZ*Y)/sum(EZ)
p[3]<-sum((1-EZ)*Y)/sum(1-EZ)
p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ)
p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ)
p
}
emiteration<-function(Y,p,n=10) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#
#starting values
#
p<-c(0.5,40,90,16,16)
#
#2 iterations of EM algorithm
#
p<-emiteration(Y,p,2)
p
#check for convergence
p<-emstep(Y,p)
p
#
#plot histogram with fitted distribution
#
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),
main="",axes=F)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,35,5),pos=40,las=1)
x<-seq(40,100,0.1)
y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5]))
lines(x,y*3*length(Y))
#5 iterations of EM algorithm
#log-likelihood function
#
ln<-function(p,Y) {
-sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))))
}
#
#EM algorithm
#
emstep<-function(Y,p) {
EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/
(p[1]*dnorm(Y,p[2],sqrt(p[4]))
+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
p[1]<-mean(EZ)
p[2]<-sum(EZ*Y)/sum(EZ)
p[3]<-sum((1-EZ)*Y)/sum(1-EZ)
p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ)
p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ)
p
}
emiteration<-function(Y,p,n=10) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#
#starting values
#
p<-c(0.5,40,90,16,16)
#
#5 iterations of EM algorithm
#
p<-emiteration(Y,p,5)
p
#check for convergence
p<-emstep(Y,p)
p
#
#plot histogram with fitted distribution
#
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),
main="",axes=F)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,35,5),pos=40,las=1)
x<-seq(40,100,0.1)
y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5]))
lines(x,y*3*length(Y))
#10 iterations of EM algorithm
#log-likelihood function
#
ln<-function(p,Y) {
-sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))))
}
#
#EM algorithm
#
emstep<-function(Y,p) {
EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/
(p[1]*dnorm(Y,p[2],sqrt(p[4]))
+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
p[1]<-mean(EZ)
p[2]<-sum(EZ*Y)/sum(EZ)
p[3]<-sum((1-EZ)*Y)/sum(1-EZ)
p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ)
p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ)
p
}
emiteration<-function(Y,p,n=10) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#
#starting values
#
p<-c(0.5,40,90,16,16)
#
#10 iterations of EM algorithm
#
p<-emiteration(Y,p,10)
p
#check for convergence
p<-emstep(Y,p)
p
#
#plot histogram with fitted distribution
#
hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)",
xlim=c(40,100),
main="",axes=F)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,35,5),pos=40,las=1)
x<-seq(40,100,0.1)
y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5]))
lines(x,y*3*length(Y))
#Faithful Dataset 2D
#load data
#
#
#Bivariate mixture model
#
data(faithful)
Y<-faithful$waiting
X<-faithful$eruptions
plot(Y,X,xlab="Waiting time between eruptions (min)",
ylab="Eruption times (min)",
xlim=c(40,100),ylim=c(0,6),
main="",axes=F,cex=0.7)
axis(1,seq(40,100,6),pos=0)
axis(2,seq(0,6,1),pos=40,las=1)
#
#density of bivariate normal
#
dbinorm<-function(x,m,s){
1/sqrt(det(2*pi*s))*exp(-0.5*t(x-m)%*%solve(s)%*%(x-m))
}
f.hat<-function(x,p) {
p$p*dbinorm(x,p$m1,p$s1)+(1-p$p)*dbinorm(x,p$m2,p$s2)
}
Estep<-function(x,p) {
p$p*dbinorm(x,p$m1,p$s1)/
(p$p*dbinorm(x,p$m1,p$s1)
+(1-p$p)*dbinorm(x,p$m2,p$s2))
}
emstep<-function(Y,p) {
EZ<-apply(Y,1,Estep,p)
p$p<-mean(EZ)
p$m1<-(t(Y)%*%diag(EZ)%*%rep(1,nrow(Y)))/sum(EZ)
p$m2<-(t(Y)%*%diag(1-EZ)%*%rep(1,nrow(Y)))/sum(1-EZ)
MY<-matrix(p$m1,nrow(Y),2,byrow=T)
p$s1<-(t(Y-MY)%*%diag(EZ)%*%(Y-MY))/sum(EZ)
MY<-matrix(p$m2,nrow(Y),2,byrow=T)
p$s2<-(t(Y-MY)%*%diag(1-EZ)%*%(Y-MY))/sum(1-EZ)
p
}
#----------------------------------
X<-faithful$waiting
Y<-faithful$eruptions
Z<-cbind(X,Y)
#starting values
#
p=p.0<-list(p=0.5,m1=c(20,6),m2=c(70,6),
s1=matrix(c(10,0,0,1),2,2),s2=matrix(c(10,0,0,1),2,2))
#--------------------------------------
lh=NULL
P=matrix(0,20,length(p))
for(i in 1:20){
p<-emiteration(Z,p,i)
lh[i]=sum(log(apply(Z,1,f.hat,p)))
}
plot(lh,type='o',xlab='iterations',ylab='likelihood')
#---------------------------------------
require(ggplot2)
#---Graph the mixture:
n.l=m.l=30
x=seq(40,100,length= n.l)
y=seq(0,6,length = m.l)
xy=as.matrix(expand.grid(x,y))
# 2 Number of Iterations
p=p.0
p<-emiteration(Z,p,2)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
class.lab1=round(apply(Z,1,Estep,p))
plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)",
ylab='Eruption time (min)',col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list('iteration = 2',font=3))
# 5 Number of Iterations
p=p.0
p<-emiteration(Z,p,5)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
class.lab1=round(apply(Z,1,Estep,p))
plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)",
ylab='Eruption time (min)',col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list('iteration = 5',font=3))
# 10 Number of Iterations
p=p.0
p<-emiteration(Z,p,10)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
class.lab1=round(apply(Z,1,Estep,p))
plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)",
ylab='Eruption time (min)',col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list('iteration = 10',font=3))
# 20 Number of Iterations
p=p.0
p<-emiteration(Z,p,20)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
class.lab1=round(apply(Z,1,Estep,p))
plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)",
ylab='Eruption time (min)',col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list('iteration = 20',font=3))
# 50 Number of Iterations
p=p.0
p<-emiteration(Z,p,50)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
class.lab1=round(apply(Z,1,Estep,p))
plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)",
ylab='Eruption time (min)',col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list('iteration = 50',font=3))
# Simulated Data for 3 Clusters
#load data
library(mvtnorm)
library(graphics)
library(MASS)
require(ggplot2)
X=rmvnorm(100, mean=c(45,4), sigma=diag(c(2,1)))
Y=rmvnorm(100, mean=c(55,6), sigma=diag(c(2,1)))
U=rmvnorm(100, mean=c(65,7), sigma=diag(c(3,1)))
Z=rbind(X,Y, U)
X11()
km_Z=kmeans(Z,3)$cluster
plot(Z,col=km_Z)
#log-likelihood function
#
ln<-function(p,Y) {
-sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))))
}
#
#EM algorithm
#
emstep<-function(Y,p) {
EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/
(p[1]*dnorm(Y,p[2],sqrt(p[4]))
+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
p[1]<-mean(EZ)
p[2]<-sum(EZ*Y)/sum(EZ)
p[3]<-sum((1-EZ)*Y)/sum(1-EZ)
p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ)
p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ)
p
}
emiteration<-function(Y,p,n=2) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#density of bivariate normal
#
dbinorm<-function(x,m,s){
1/sqrt(det(2*pi*s))*exp(-0.5*t(x-m)%*%solve(s)%*%(x-m))
}
f.hat<-function(x,p) {
p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+
(1-(p$p1+p$p2))*dbinorm(x, p$m3, p$s3)
}
Estep1<-function(x,p) {
p$p1*dbinorm(x,p$m1,p$s1)/
(p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+
(1-(p$p1+p$p2))*dbinorm(x, p$m3, p$s3))
}
Estep2<-function(x,p){
p$p2*dbinorm(x,p$m2,p$s2)/
(p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+
(1-(p$p1+p$p2))*dbinorm(x, p$m3, p$s3))
}
Estep3<-function(x,p){
p$p3*dbinorm(x,p$m3,p$s3)/
(p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+
p$p3*dbinorm(x, p$m3, p$s3))
}
emstep<-function(Y,p){
EZ1<-apply(Y,1,Estep1,p)
EZ2<-apply(Y,1,Estep2,p)
p$p1<-mean(EZ1)
p$p2<-mean(EZ2)
p$m1<-(t(Y)%*%diag(EZ1)%*%rep(1,nrow(Y)))/sum(EZ1)
p$m2<-(t(Y)%*%diag(EZ2)%*%rep(1,nrow(Y)))/sum(EZ2)
p$m3<-(t(Y)%*%diag(1-(EZ1+EZ2))%*%rep(1,nrow(Y)))/sum(1-(EZ1+EZ2))
MY<-matrix(p$m1,nrow(Y),2,byrow=T)
p$s1<-(t(Y-MY)%*%diag(EZ1)%*%(Y-MY))/sum(EZ1)
MY<-matrix(p$m2,nrow(Y),2,byrow=T)
p$s2<-(t(Y-MY)%*%diag(EZ2)%*%(Y-MY))/sum(EZ2)
MY<-matrix(p$m3,nrow(Y),2,byrow=T)
p$s3<-(t(Y-MY)%*%diag(1-(EZ1+EZ2))%*%(Y-MY))/sum(1-(EZ1+EZ2))
p
}
emiteration<-function(Y,p,n=10) {
for (i in (1:n)) {
p<-emstep(Y,p)
}
p
}
#starting values
#
p=p.0<-list(p1=1/3,p2=1/3,p3=1/3,m1=c(45,2),m2=c(60,4),m3=c(70,8),
s1=matrix(c(1,0,0,1),2,2),s2=matrix(c(1,0,0,1),2,2),
s3=matrix(c(1,0,0,1),2,2))
#--------------------------------------
lh=NULL
P=matrix(0,20,length(p))
for(i in 1:20){
p<-emiteration(Z,p,i)
lh[i]=sum(log(apply(Z,1,f.hat,p)))
}
X11()
plot(lh,type='o',xlab='iterations',ylab='likelihood')
#---------------------------------------
#---Graph the mixture:
n.l=m.l=30
x=seq(30,70,length= n.l)
y=seq(0,10,length = m.l)
xy=as.matrix(expand.grid(x,y))
# 1 Number of iterations
p=p.0
p<-emiteration(Z,p,1)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
X11()
class.lab1=round(apply(Z,1,Estep1,p))
class.lab2=round(apply(Z,1,Estep2,p))
plot(Z, col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list(paste('iteration =', 1, sep=" "),font=3))
# 2 Number of iterations
p=p.0
p<-emiteration(Z,p,2)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
X11()
class.lab1=round(apply(Z,1,Estep1,p))
class.lab2=round(apply(Z,1,Estep2,p))
plot(Z, col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list(paste('iteration =', 2, sep=" "),font=3))
# 5 Number of iterations
p=p.0
p<-emiteration(Z,p,5)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
X11()
class.lab1=round(apply(Z,1,Estep1,p))
class.lab2=round(apply(Z,1,Estep2,p))
plot(Z, col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list(paste('iteration =', 5, sep=" "),font=3))
# 10 Number of iterations
p=p.0
p<-emiteration(Z,p,10)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
X11()
class.lab1=round(apply(Z,1,Estep1,p))
class.lab2=round(apply(Z,1,Estep2,p))
plot(Z, col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list(paste('iteration =', 10, sep=" "),font=3))
# 20 Number of iterations
p=p.0
p<-emiteration(Z,p,20)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
X11()
class.lab1=round(apply(Z,1,Estep1,p))
class.lab2=round(apply(Z,1,Estep2,p))
plot(Z, col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list(paste('iteration =', 20, sep=" "),font=3))
# 50 Number of iterations
p=p.0
p<-emiteration(Z,p,50)
z=apply(xy,1,f.hat,p)
z.mat1=matrix(z,n.l,m.l)
#postrior
X11()
class.lab1=round(apply(Z,1,Estep1,p))
class.lab2=round(apply(Z,1,Estep2,p))
plot(Z, col=class.lab1+2)
contour(x,y,z.mat1,label="", add=T)
title(main=list(paste('iteration =', 50, sep=" "),font=3))

Contenu connexe

Tendances

Dijkstra’s algorithm
Dijkstra’s algorithmDijkstra’s algorithm
Dijkstra’s algorithm
faisal2204
 
Classical relations and fuzzy relations
Classical relations and fuzzy relationsClassical relations and fuzzy relations
Classical relations and fuzzy relations
Baran Kaynak
 
Graph theory narsingh deo
Graph theory narsingh deoGraph theory narsingh deo
Graph theory narsingh deo
Umang Gupta
 
Lecture 18: Gaussian Mixture Models and Expectation Maximization
Lecture 18: Gaussian Mixture Models and Expectation MaximizationLecture 18: Gaussian Mixture Models and Expectation Maximization
Lecture 18: Gaussian Mixture Models and Expectation Maximization
butest
 
Multivariate data analysis
Multivariate data analysisMultivariate data analysis
Multivariate data analysis
Setia Pramana
 

Tendances (20)

Dijkstra’s algorithm
Dijkstra’s algorithmDijkstra’s algorithm
Dijkstra’s algorithm
 
Introduction to principal component analysis (pca)
Introduction to principal component analysis (pca)Introduction to principal component analysis (pca)
Introduction to principal component analysis (pca)
 
Approximation Algorithms TSP
Approximation Algorithms   TSPApproximation Algorithms   TSP
Approximation Algorithms TSP
 
Classical relations and fuzzy relations
Classical relations and fuzzy relationsClassical relations and fuzzy relations
Classical relations and fuzzy relations
 
Iris - Most loved dataset
Iris - Most loved datasetIris - Most loved dataset
Iris - Most loved dataset
 
Principal Component Analysis (PCA) and LDA PPT Slides
Principal Component Analysis (PCA) and LDA PPT SlidesPrincipal Component Analysis (PCA) and LDA PPT Slides
Principal Component Analysis (PCA) and LDA PPT Slides
 
Graph theory narsingh deo
Graph theory narsingh deoGraph theory narsingh deo
Graph theory narsingh deo
 
Stepwise regression Backward selection
Stepwise regression Backward selectionStepwise regression Backward selection
Stepwise regression Backward selection
 
Exploratory data analysis in R - Data Science Club
Exploratory data analysis in R - Data Science ClubExploratory data analysis in R - Data Science Club
Exploratory data analysis in R - Data Science Club
 
Statistical inference: Probability and Distribution
Statistical inference: Probability and DistributionStatistical inference: Probability and Distribution
Statistical inference: Probability and Distribution
 
Graph Theory: Trees
Graph Theory: TreesGraph Theory: Trees
Graph Theory: Trees
 
Asymptotic notation
Asymptotic notationAsymptotic notation
Asymptotic notation
 
Lecture 18: Gaussian Mixture Models and Expectation Maximization
Lecture 18: Gaussian Mixture Models and Expectation MaximizationLecture 18: Gaussian Mixture Models and Expectation Maximization
Lecture 18: Gaussian Mixture Models and Expectation Maximization
 
chapter 1
chapter 1chapter 1
chapter 1
 
Machine Learning Strategies for Time Series Prediction
Machine Learning Strategies for Time Series PredictionMachine Learning Strategies for Time Series Prediction
Machine Learning Strategies for Time Series Prediction
 
Poisson distribution
Poisson distributionPoisson distribution
Poisson distribution
 
Multivariate data analysis
Multivariate data analysisMultivariate data analysis
Multivariate data analysis
 
Linear Algebra – A Powerful Tool for Data Science
Linear Algebra – A Powerful Tool for Data ScienceLinear Algebra – A Powerful Tool for Data Science
Linear Algebra – A Powerful Tool for Data Science
 
Prim's algorithm
Prim's algorithmPrim's algorithm
Prim's algorithm
 
Graph theory
Graph theoryGraph theory
Graph theory
 

Similaire à R Code for EM Algorithm

Multistage condition-monitoring-system-of-aircraft-gas-turbine-engine
Multistage condition-monitoring-system-of-aircraft-gas-turbine-engineMultistage condition-monitoring-system-of-aircraft-gas-turbine-engine
Multistage condition-monitoring-system-of-aircraft-gas-turbine-engine
Cemal Ardil
 
Data structures notes for college students btech.pptx
Data structures notes for college students btech.pptxData structures notes for college students btech.pptx
Data structures notes for college students btech.pptx
KarthikVijay59
 
Company_X_Data_Analyst_Challenge
Company_X_Data_Analyst_ChallengeCompany_X_Data_Analyst_Challenge
Company_X_Data_Analyst_Challenge
Mark Yashar
 
Pairwise testing sagar_hadawale
Pairwise  testing sagar_hadawalePairwise  testing sagar_hadawale
Pairwise testing sagar_hadawale
Sagar Hadawale
 
c++ program I need to sort arrays using an insertion sort and a mer.pdf
c++ program I need to sort arrays using an insertion sort and a mer.pdfc++ program I need to sort arrays using an insertion sort and a mer.pdf
c++ program I need to sort arrays using an insertion sort and a mer.pdf
dhavalbl38
 

Similaire à R Code for EM Algorithm (20)

Multistage condition-monitoring-system-of-aircraft-gas-turbine-engine
Multistage condition-monitoring-system-of-aircraft-gas-turbine-engineMultistage condition-monitoring-system-of-aircraft-gas-turbine-engine
Multistage condition-monitoring-system-of-aircraft-gas-turbine-engine
 
Debs 2011 pattern rewritingforeventprocessingoptimization
Debs 2011  pattern rewritingforeventprocessingoptimizationDebs 2011  pattern rewritingforeventprocessingoptimization
Debs 2011 pattern rewritingforeventprocessingoptimization
 
Chap08alg
Chap08algChap08alg
Chap08alg
 
Chap08alg
Chap08algChap08alg
Chap08alg
 
Data structures notes for college students btech.pptx
Data structures notes for college students btech.pptxData structures notes for college students btech.pptx
Data structures notes for college students btech.pptx
 
Company_X_Data_Analyst_Challenge
Company_X_Data_Analyst_ChallengeCompany_X_Data_Analyst_Challenge
Company_X_Data_Analyst_Challenge
 
The language is C. Thanks. Write a function that computes the intege.pdf
The language is C. Thanks. Write a function that computes the intege.pdfThe language is C. Thanks. Write a function that computes the intege.pdf
The language is C. Thanks. Write a function that computes the intege.pdf
 
Rsa documentation
Rsa documentationRsa documentation
Rsa documentation
 
Sorting pnk
Sorting pnkSorting pnk
Sorting pnk
 
Interm codegen
Interm codegenInterm codegen
Interm codegen
 
Introduction to Algorithms
Introduction to AlgorithmsIntroduction to Algorithms
Introduction to Algorithms
 
Conference poster 6
Conference poster 6Conference poster 6
Conference poster 6
 
Performance Portability Through Descriptive Parallelism
Performance Portability Through Descriptive ParallelismPerformance Portability Through Descriptive Parallelism
Performance Portability Through Descriptive Parallelism
 
Pairwise testing sagar_hadawale
Pairwise  testing sagar_hadawalePairwise  testing sagar_hadawale
Pairwise testing sagar_hadawale
 
Brief introduction to Algorithm analysis
Brief introduction to Algorithm analysis Brief introduction to Algorithm analysis
Brief introduction to Algorithm analysis
 
Unit ii algorithm
Unit   ii algorithmUnit   ii algorithm
Unit ii algorithm
 
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
[FT-11][suhorng] “Poor Man's” Undergraduate Compilers
 
TIME EXECUTION OF DIFFERENT SORTED ALGORITHMS
TIME EXECUTION   OF  DIFFERENT SORTED ALGORITHMSTIME EXECUTION   OF  DIFFERENT SORTED ALGORITHMS
TIME EXECUTION OF DIFFERENT SORTED ALGORITHMS
 
Algorithm Analysis.pdf
Algorithm Analysis.pdfAlgorithm Analysis.pdf
Algorithm Analysis.pdf
 
c++ program I need to sort arrays using an insertion sort and a mer.pdf
c++ program I need to sort arrays using an insertion sort and a mer.pdfc++ program I need to sort arrays using an insertion sort and a mer.pdf
c++ program I need to sort arrays using an insertion sort and a mer.pdf
 

Plus de Avjinder (Avi) Kaler

Plus de Avjinder (Avi) Kaler (20)

Unleashing Real-World Simulations: A Python Tutorial by Avjinder Kaler
Unleashing Real-World Simulations: A Python Tutorial by Avjinder KalerUnleashing Real-World Simulations: A Python Tutorial by Avjinder Kaler
Unleashing Real-World Simulations: A Python Tutorial by Avjinder Kaler
 
Tutorial for Deep Learning Project with Keras
Tutorial for Deep Learning Project  with KerasTutorial for Deep Learning Project  with Keras
Tutorial for Deep Learning Project with Keras
 
Tutorial for DBSCAN Clustering in Machine Learning
Tutorial for DBSCAN Clustering in Machine LearningTutorial for DBSCAN Clustering in Machine Learning
Tutorial for DBSCAN Clustering in Machine Learning
 
Python Code for Classification Supervised Machine Learning.pdf
Python Code for Classification Supervised Machine Learning.pdfPython Code for Classification Supervised Machine Learning.pdf
Python Code for Classification Supervised Machine Learning.pdf
 
Sql tutorial for select, where, order by, null, insert functions
Sql tutorial for select, where, order by, null, insert functionsSql tutorial for select, where, order by, null, insert functions
Sql tutorial for select, where, order by, null, insert functions
 
Kaler et al 2018 euphytica
Kaler et al 2018 euphyticaKaler et al 2018 euphytica
Kaler et al 2018 euphytica
 
Association mapping identifies loci for canopy coverage in diverse soybean ge...
Association mapping identifies loci for canopy coverage in diverse soybean ge...Association mapping identifies loci for canopy coverage in diverse soybean ge...
Association mapping identifies loci for canopy coverage in diverse soybean ge...
 
Genome-Wide Association Mapping of Carbon Isotope and Oxygen Isotope Ratios i...
Genome-Wide Association Mapping of Carbon Isotope and Oxygen Isotope Ratios i...Genome-Wide Association Mapping of Carbon Isotope and Oxygen Isotope Ratios i...
Genome-Wide Association Mapping of Carbon Isotope and Oxygen Isotope Ratios i...
 
Genome-wide association mapping of canopy wilting in diverse soybean genotypes
Genome-wide association mapping of canopy wilting in diverse soybean genotypesGenome-wide association mapping of canopy wilting in diverse soybean genotypes
Genome-wide association mapping of canopy wilting in diverse soybean genotypes
 
Tutorial for Estimating Broad and Narrow Sense Heritability using R
Tutorial for Estimating Broad and Narrow Sense Heritability using RTutorial for Estimating Broad and Narrow Sense Heritability using R
Tutorial for Estimating Broad and Narrow Sense Heritability using R
 
Tutorial for Circular and Rectangular Manhattan plots
Tutorial for Circular and Rectangular Manhattan plotsTutorial for Circular and Rectangular Manhattan plots
Tutorial for Circular and Rectangular Manhattan plots
 
Genomic Selection with Bayesian Generalized Linear Regression model using R
Genomic Selection with Bayesian Generalized Linear Regression model using RGenomic Selection with Bayesian Generalized Linear Regression model using R
Genomic Selection with Bayesian Generalized Linear Regression model using R
 
Genome wide association mapping
Genome wide association mappingGenome wide association mapping
Genome wide association mapping
 
Nutrient availability response to sulfur amendment in histosols having variab...
Nutrient availability response to sulfur amendment in histosols having variab...Nutrient availability response to sulfur amendment in histosols having variab...
Nutrient availability response to sulfur amendment in histosols having variab...
 
Sugarcane yield and plant nutrient response to sulfur amended everglades hist...
Sugarcane yield and plant nutrient response to sulfur amended everglades hist...Sugarcane yield and plant nutrient response to sulfur amended everglades hist...
Sugarcane yield and plant nutrient response to sulfur amended everglades hist...
 
R code descriptive statistics of phenotypic data by Avjinder Kaler
R code descriptive statistics of phenotypic data by Avjinder KalerR code descriptive statistics of phenotypic data by Avjinder Kaler
R code descriptive statistics of phenotypic data by Avjinder Kaler
 
Population genetics
Population geneticsPopulation genetics
Population genetics
 
Quantitative genetics
Quantitative geneticsQuantitative genetics
Quantitative genetics
 
Abiotic stresses in plant
Abiotic stresses in plantAbiotic stresses in plant
Abiotic stresses in plant
 
Seed rate calculation for experiment
Seed rate calculation for experimentSeed rate calculation for experiment
Seed rate calculation for experiment
 

Dernier

如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样
如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样
如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样
wsppdmt
 
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
gajnagarg
 
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
gajnagarg
 
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
gajnagarg
 
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Klinik kandungan
 
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
ZurliaSoop
 
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
HyderabadDolls
 
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
gajnagarg
 
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
nirzagarg
 
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
nirzagarg
 

Dernier (20)

7. Epi of Chronic respiratory diseases.ppt
7. Epi of Chronic respiratory diseases.ppt7. Epi of Chronic respiratory diseases.ppt
7. Epi of Chronic respiratory diseases.ppt
 
如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样
如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样
如何办理英国诺森比亚大学毕业证(NU毕业证书)成绩单原件一模一样
 
Ranking and Scoring Exercises for Research
Ranking and Scoring Exercises for ResearchRanking and Scoring Exercises for Research
Ranking and Scoring Exercises for Research
 
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In bhavnagar [ 7014168258 ] Call Me For Genuine Models...
 
Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...
Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...
Digital Advertising Lecture for Advanced Digital & Social Media Strategy at U...
 
Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...
Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...
Charbagh + Female Escorts Service in Lucknow | Starting ₹,5K To @25k with A/C...
 
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
Top profile Call Girls In Indore [ 7014168258 ] Call Me For Genuine Models We...
 
5CL-ADBA,5cladba, Chinese supplier, safety is guaranteed
5CL-ADBA,5cladba, Chinese supplier, safety is guaranteed5CL-ADBA,5cladba, Chinese supplier, safety is guaranteed
5CL-ADBA,5cladba, Chinese supplier, safety is guaranteed
 
Fun all Day Call Girls in Jaipur 9332606886 High Profile Call Girls You Ca...
Fun all Day Call Girls in Jaipur   9332606886  High Profile Call Girls You Ca...Fun all Day Call Girls in Jaipur   9332606886  High Profile Call Girls You Ca...
Fun all Day Call Girls in Jaipur 9332606886 High Profile Call Girls You Ca...
 
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
 
Vadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book now
Vadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book nowVadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book now
Vadodara 💋 Call Girl 7737669865 Call Girls in Vadodara Escort service book now
 
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
 
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
Jual Obat Aborsi Surabaya ( Asli No.1 ) 085657271886 Obat Penggugur Kandungan...
 
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
Jodhpur Park | Call Girls in Kolkata Phone No 8005736733 Elite Escort Service...
 
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24  Building Real-Time Pipelines With FLaNKDATA SUMMIT 24  Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
 
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Latur [ 7014168258 ] Call Me For Genuine Models We ...
 
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
Top profile Call Girls In Begusarai [ 7014168258 ] Call Me For Genuine Models...
 
TrafficWave Generator Will Instantly drive targeted and engaging traffic back...
TrafficWave Generator Will Instantly drive targeted and engaging traffic back...TrafficWave Generator Will Instantly drive targeted and engaging traffic back...
TrafficWave Generator Will Instantly drive targeted and engaging traffic back...
 
Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...
Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...
Gulbai Tekra * Cheap Call Girls In Ahmedabad Phone No 8005736733 Elite Escort...
 
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
 

R Code for EM Algorithm

  • 1. R Code For Expectation-Maximization (EM) Algorithm for Gaussian Mixtures Avjinder Singh Kaler This is the R code for EM algorithm. Here, R code is used for 1D, 2D and 3 clusters dataset. One can modify this code and use for his own project.
  • 2. Expectation-Maximization (EM) is an iterative algorithm for finding maximum likelihood estimates of parameters in statistical models, where the model depends on unobserved latent variables. The EM iteration alternates between performing an expectation (E) step, which creates a function for the expectation of the log-likelihood evaluated using the current estimate for the parameters, and a maximization (M) step, which computes parameters maximizing the expected log-likelihood found on the E step. Iterate these steps until convergence detect. In simple language, each iteration consists of an E-step and an M-step. Convergence is generally detected by computing the value of the log-likelihood after each iteration and halting when it appears not to be changing in a significant manner from one iteration to the next.
  • 3. #FaithFUL Dataset 1D # #load data # data(faithful) Y<-faithful$waiting # #plot histogram of waiting times # postscript("geyser.eps",width=5,height=3,onefile=F) par(mar=c(2,2,1,1),mgp=c(1,0.5,0),cex=0.7,lwd=0.5,las=1) hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)", xlim=c(40,100),main="",axes=F) axis(1,seq(40,100,6),pos=0) axis(2,seq(0,35,5),pos=40,las=1) dev.off() # #log-likelihood function # ln<-function(p,Y) { -sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))) } # #EM algorithm # emstep<-function(Y,p) { EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/ (p[1]*dnorm(Y,p[2],sqrt(p[4])) +(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))) p[1]<-mean(EZ) p[2]<-sum(EZ*Y)/sum(EZ) p[3]<-sum((1-EZ)*Y)/sum(1-EZ) p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ) p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ) p } emiteration<-function(Y,p,n=10) { for (i in (1:n)) { p<-emstep(Y,p) } p } # #starting values # p<-c(0.5,40,90,16,16) # #2 iterations of EM algorithm # p<-emiteration(Y,p,2) p #check for convergence p<-emstep(Y,p) p # #plot histogram with fitted distribution # hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)", xlim=c(40,100), main="",axes=F) axis(1,seq(40,100,6),pos=0)
  • 4. axis(2,seq(0,35,5),pos=40,las=1) x<-seq(40,100,0.1) y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5])) lines(x,y*3*length(Y)) #5 iterations of EM algorithm #log-likelihood function # ln<-function(p,Y) { -sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))) } # #EM algorithm # emstep<-function(Y,p) { EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/ (p[1]*dnorm(Y,p[2],sqrt(p[4])) +(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))) p[1]<-mean(EZ) p[2]<-sum(EZ*Y)/sum(EZ) p[3]<-sum((1-EZ)*Y)/sum(1-EZ) p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ) p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ) p } emiteration<-function(Y,p,n=10) { for (i in (1:n)) { p<-emstep(Y,p) } p } # #starting values # p<-c(0.5,40,90,16,16) # #5 iterations of EM algorithm # p<-emiteration(Y,p,5) p #check for convergence p<-emstep(Y,p) p # #plot histogram with fitted distribution # hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)", xlim=c(40,100), main="",axes=F) axis(1,seq(40,100,6),pos=0) axis(2,seq(0,35,5),pos=40,las=1) x<-seq(40,100,0.1) y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5])) lines(x,y*3*length(Y)) #10 iterations of EM algorithm #log-likelihood function # ln<-function(p,Y) { -sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))) } # #EM algorithm # emstep<-function(Y,p) { EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/ (p[1]*dnorm(Y,p[2],sqrt(p[4])) +(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))
  • 5. p[1]<-mean(EZ) p[2]<-sum(EZ*Y)/sum(EZ) p[3]<-sum((1-EZ)*Y)/sum(1-EZ) p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ) p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ) p } emiteration<-function(Y,p,n=10) { for (i in (1:n)) { p<-emstep(Y,p) } p } # #starting values # p<-c(0.5,40,90,16,16) # #10 iterations of EM algorithm # p<-emiteration(Y,p,10) p #check for convergence p<-emstep(Y,p) p # #plot histogram with fitted distribution # hist(Y,breaks=seq(40,100,3),xlab="Waiting time between eruptions (min)", xlim=c(40,100), main="",axes=F) axis(1,seq(40,100,6),pos=0) axis(2,seq(0,35,5),pos=40,las=1) x<-seq(40,100,0.1) y<-p[1]*dnorm(x,p[2],sqrt(p[4]))+(1-p[1])*dnorm(x,p[3],sqrt(p[5])) lines(x,y*3*length(Y)) #Faithful Dataset 2D #load data # # #Bivariate mixture model # data(faithful) Y<-faithful$waiting X<-faithful$eruptions plot(Y,X,xlab="Waiting time between eruptions (min)", ylab="Eruption times (min)", xlim=c(40,100),ylim=c(0,6), main="",axes=F,cex=0.7) axis(1,seq(40,100,6),pos=0) axis(2,seq(0,6,1),pos=40,las=1) # #density of bivariate normal # dbinorm<-function(x,m,s){ 1/sqrt(det(2*pi*s))*exp(-0.5*t(x-m)%*%solve(s)%*%(x-m)) } f.hat<-function(x,p) { p$p*dbinorm(x,p$m1,p$s1)+(1-p$p)*dbinorm(x,p$m2,p$s2) } Estep<-function(x,p) { p$p*dbinorm(x,p$m1,p$s1)/
  • 6. (p$p*dbinorm(x,p$m1,p$s1) +(1-p$p)*dbinorm(x,p$m2,p$s2)) } emstep<-function(Y,p) { EZ<-apply(Y,1,Estep,p) p$p<-mean(EZ) p$m1<-(t(Y)%*%diag(EZ)%*%rep(1,nrow(Y)))/sum(EZ) p$m2<-(t(Y)%*%diag(1-EZ)%*%rep(1,nrow(Y)))/sum(1-EZ) MY<-matrix(p$m1,nrow(Y),2,byrow=T) p$s1<-(t(Y-MY)%*%diag(EZ)%*%(Y-MY))/sum(EZ) MY<-matrix(p$m2,nrow(Y),2,byrow=T) p$s2<-(t(Y-MY)%*%diag(1-EZ)%*%(Y-MY))/sum(1-EZ) p } #---------------------------------- X<-faithful$waiting Y<-faithful$eruptions Z<-cbind(X,Y) #starting values # p=p.0<-list(p=0.5,m1=c(20,6),m2=c(70,6), s1=matrix(c(10,0,0,1),2,2),s2=matrix(c(10,0,0,1),2,2)) #-------------------------------------- lh=NULL P=matrix(0,20,length(p)) for(i in 1:20){ p<-emiteration(Z,p,i) lh[i]=sum(log(apply(Z,1,f.hat,p))) } plot(lh,type='o',xlab='iterations',ylab='likelihood') #--------------------------------------- require(ggplot2) #---Graph the mixture: n.l=m.l=30 x=seq(40,100,length= n.l) y=seq(0,6,length = m.l) xy=as.matrix(expand.grid(x,y)) # 2 Number of Iterations p=p.0 p<-emiteration(Z,p,2) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior class.lab1=round(apply(Z,1,Estep,p)) plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)", ylab='Eruption time (min)',col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list('iteration = 2',font=3)) # 5 Number of Iterations p=p.0 p<-emiteration(Z,p,5) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior class.lab1=round(apply(Z,1,Estep,p)) plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)", ylab='Eruption time (min)',col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list('iteration = 5',font=3))
  • 7. # 10 Number of Iterations p=p.0 p<-emiteration(Z,p,10) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior class.lab1=round(apply(Z,1,Estep,p)) plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)", ylab='Eruption time (min)',col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list('iteration = 10',font=3)) # 20 Number of Iterations p=p.0 p<-emiteration(Z,p,20) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior class.lab1=round(apply(Z,1,Estep,p)) plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)", ylab='Eruption time (min)',col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list('iteration = 20',font=3)) # 50 Number of Iterations p=p.0 p<-emiteration(Z,p,50) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior class.lab1=round(apply(Z,1,Estep,p)) plot(X,Y,xlim=c(40,100),ylim=c(0,6),pch=20,cex=.75,xlab="Waiting time (min)", ylab='Eruption time (min)',col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list('iteration = 50',font=3)) # Simulated Data for 3 Clusters #load data library(mvtnorm) library(graphics) library(MASS) require(ggplot2) X=rmvnorm(100, mean=c(45,4), sigma=diag(c(2,1))) Y=rmvnorm(100, mean=c(55,6), sigma=diag(c(2,1))) U=rmvnorm(100, mean=c(65,7), sigma=diag(c(3,1))) Z=rbind(X,Y, U) X11() km_Z=kmeans(Z,3)$cluster plot(Z,col=km_Z) #log-likelihood function # ln<-function(p,Y) { -sum(log(p[1]*dnorm(Y,p[2],sqrt(p[4]))+(1-p[1])*dnorm(Y,p[3],sqrt(p[5])))) }
  • 8. # #EM algorithm # emstep<-function(Y,p) { EZ<-p[1]*dnorm(Y,p[2],sqrt(p[4]))/ (p[1]*dnorm(Y,p[2],sqrt(p[4])) +(1-p[1])*dnorm(Y,p[3],sqrt(p[5]))) p[1]<-mean(EZ) p[2]<-sum(EZ*Y)/sum(EZ) p[3]<-sum((1-EZ)*Y)/sum(1-EZ) p[4]<-sum(EZ*(Y-p[2])^2)/sum(EZ) p[5]<-sum((1-EZ)*(Y-p[3])^2)/sum(1-EZ) p } emiteration<-function(Y,p,n=2) { for (i in (1:n)) { p<-emstep(Y,p) } p } #density of bivariate normal # dbinorm<-function(x,m,s){ 1/sqrt(det(2*pi*s))*exp(-0.5*t(x-m)%*%solve(s)%*%(x-m)) } f.hat<-function(x,p) { p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+ (1-(p$p1+p$p2))*dbinorm(x, p$m3, p$s3) } Estep1<-function(x,p) { p$p1*dbinorm(x,p$m1,p$s1)/ (p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+ (1-(p$p1+p$p2))*dbinorm(x, p$m3, p$s3)) } Estep2<-function(x,p){ p$p2*dbinorm(x,p$m2,p$s2)/ (p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+ (1-(p$p1+p$p2))*dbinorm(x, p$m3, p$s3)) } Estep3<-function(x,p){ p$p3*dbinorm(x,p$m3,p$s3)/ (p$p1*dbinorm(x,p$m1,p$s1)+p$p2*dbinorm(x,p$m2,p$s2)+ p$p3*dbinorm(x, p$m3, p$s3)) } emstep<-function(Y,p){ EZ1<-apply(Y,1,Estep1,p) EZ2<-apply(Y,1,Estep2,p) p$p1<-mean(EZ1) p$p2<-mean(EZ2) p$m1<-(t(Y)%*%diag(EZ1)%*%rep(1,nrow(Y)))/sum(EZ1) p$m2<-(t(Y)%*%diag(EZ2)%*%rep(1,nrow(Y)))/sum(EZ2) p$m3<-(t(Y)%*%diag(1-(EZ1+EZ2))%*%rep(1,nrow(Y)))/sum(1-(EZ1+EZ2)) MY<-matrix(p$m1,nrow(Y),2,byrow=T) p$s1<-(t(Y-MY)%*%diag(EZ1)%*%(Y-MY))/sum(EZ1) MY<-matrix(p$m2,nrow(Y),2,byrow=T) p$s2<-(t(Y-MY)%*%diag(EZ2)%*%(Y-MY))/sum(EZ2) MY<-matrix(p$m3,nrow(Y),2,byrow=T) p$s3<-(t(Y-MY)%*%diag(1-(EZ1+EZ2))%*%(Y-MY))/sum(1-(EZ1+EZ2)) p } emiteration<-function(Y,p,n=10) { for (i in (1:n)) { p<-emstep(Y,p) }
  • 9. p } #starting values # p=p.0<-list(p1=1/3,p2=1/3,p3=1/3,m1=c(45,2),m2=c(60,4),m3=c(70,8), s1=matrix(c(1,0,0,1),2,2),s2=matrix(c(1,0,0,1),2,2), s3=matrix(c(1,0,0,1),2,2)) #-------------------------------------- lh=NULL P=matrix(0,20,length(p)) for(i in 1:20){ p<-emiteration(Z,p,i) lh[i]=sum(log(apply(Z,1,f.hat,p))) } X11() plot(lh,type='o',xlab='iterations',ylab='likelihood') #--------------------------------------- #---Graph the mixture: n.l=m.l=30 x=seq(30,70,length= n.l) y=seq(0,10,length = m.l) xy=as.matrix(expand.grid(x,y)) # 1 Number of iterations p=p.0 p<-emiteration(Z,p,1) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior X11() class.lab1=round(apply(Z,1,Estep1,p)) class.lab2=round(apply(Z,1,Estep2,p)) plot(Z, col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list(paste('iteration =', 1, sep=" "),font=3)) # 2 Number of iterations p=p.0 p<-emiteration(Z,p,2) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior X11() class.lab1=round(apply(Z,1,Estep1,p)) class.lab2=round(apply(Z,1,Estep2,p)) plot(Z, col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list(paste('iteration =', 2, sep=" "),font=3)) # 5 Number of iterations p=p.0 p<-emiteration(Z,p,5) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior X11() class.lab1=round(apply(Z,1,Estep1,p)) class.lab2=round(apply(Z,1,Estep2,p)) plot(Z, col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list(paste('iteration =', 5, sep=" "),font=3))
  • 10. # 10 Number of iterations p=p.0 p<-emiteration(Z,p,10) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior X11() class.lab1=round(apply(Z,1,Estep1,p)) class.lab2=round(apply(Z,1,Estep2,p)) plot(Z, col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list(paste('iteration =', 10, sep=" "),font=3)) # 20 Number of iterations p=p.0 p<-emiteration(Z,p,20) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior X11() class.lab1=round(apply(Z,1,Estep1,p)) class.lab2=round(apply(Z,1,Estep2,p)) plot(Z, col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list(paste('iteration =', 20, sep=" "),font=3)) # 50 Number of iterations p=p.0 p<-emiteration(Z,p,50) z=apply(xy,1,f.hat,p) z.mat1=matrix(z,n.l,m.l) #postrior X11() class.lab1=round(apply(Z,1,Estep1,p)) class.lab2=round(apply(Z,1,Estep2,p)) plot(Z, col=class.lab1+2) contour(x,y,z.mat1,label="", add=T) title(main=list(paste('iteration =', 50, sep=" "),font=3))