SlideShare a Scribd company logo
1 of 6
Prepared by Volkan OBAN
CLUSTERGRAM
SOURCE:
 https://gist.github.com/hadley/439761 (hadley/clustergram-had.r)
 http://www.r-statistics.com/tag/large-data/
CODES:
> clustergram.kmeans <- function(Data, k, ...)
+ {
+ # this is the type of function that the clustergram
+ # function takes for the clustering.
+ # using similar structure will allow implementation of differe
nt clustering algorithms
+
+ # It returns a list with two elements:
+ # cluster = a vector of length of n (the number of subjects/items)
+ # indicating to which cluster each item belong
s.
+ # centers = a k dimensional vector. Each element is 1 number that re
present that cluster
+ # In our case, we are using the weighted mean
of the cluster dimensions by
+ # Using the first component (loading) of the P
CA of the Data.
+
+ cl <- kmeans(Data, k,...)
+
+ cluster <- cl$cluster
+ centers <- cl$centers %*% princomp(Data)$loadings[,1] # 1 number per
center
+ # here we are using the weighted mean for each
+
+ return(list(
+ cluster = cluster,
+ centers = centers
+ ))
+ }
>
> clustergram.plot.matlines <- function(X,Y, k.range,
+ x.range, y.range , COL,
+ add.center.points , centers.points)
+ {
+ plot(0,0, col = "white", xlim = x.range, ylim = y.range,
+ axes = F,
+ xlab = "Number of clusters (k)", ylab = "PCA weighted Mean of th
e clusters", main = c("Clustergram of the PCA-weighted Mean of" ,"the clust
ers k-mean clusters vs number of clusters (k)"))
+ axis(side =1, at = k.range)
+ axis(side =2)
+ abline(v = k.range, col = "grey")
+
+ matlines(t(X), t(Y), pch = 19, col = COL, lty = 1, lwd = 1.5)
+
+ if(add.center.points)
+ {
+ require(plyr)
+
+ xx <- ldply(centers.points, rbind)
+ points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3)
+
+ # add points
+ # temp <- l_ply(centers.points, function(xx) {
+ # with(xx,points(y~x, pch = 19, col = "red", cex = 1.3))
+ # points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3)
+ # return(1)
+ # })
+ # We assign the lapply to a variable (temp) only to suppress the
lapply "NULL" output
+ }
+ }
>
>
>
> clustergram <- function(Data, k.range = 2:10 ,
+ clustering.function = clustergram.kmeans,
+ clustergram.plot = clustergram.plot.matlines,
+ line.width = .004, add.center.points = T)
+ {
+ # Data - should be a scales matrix. Where each column belongs to a d
ifferent dimension of the observations
+ # k.range - is a vector with the number of clusters to plot the clust
ergram for
+ # clustering.function - this is not really used, but offers a bases t
o later extend the function to other algorithms
+ # Although that would more work on the code
+ # line.width - is the amount to lift each line in the plot so they wo
n't superimpose eachother
+ # add.center.points - just assures that we want to plot points of the
cluster means
+
+ n <- dim(Data)[1]
+
+ PCA.1 <- Data %*% princomp(Data)$loadings[,1] # first principal comp
onent of our data
+
+ if(require(colorspace)) {
+ COL <- heat_hcl(n)[order(PCA.1)] # line colors
+ } else {
+ COL <- rainbow(n)[order(PCA.1)] # line colors
+ warning('Please consider installing the package "colorspace" for
prittier colors')
+ }
+
+ line.width <- rep(line.width, n)
+
+ Y <- NULL # Y matrix
+ X <- NULL # X matrix
+
+ centers.points <- list()
+
+ for(k in k.range)
+ {
+ k.clusters <- clustering.function(Data, k)
+
+ clusters.vec <- k.clusters$cluster
+ # the.centers <- apply(cl$centers,1, mean)
+ the.centers <- k.clusters$centers
+
+ noise <- unlist(tapply(line.width, clusters.vec, cumsum))[order(s
eq_along(clusters.vec)[order(clusters.vec)])]
+ # noise <- noise - mean(range(noise))
+ y <- the.centers[clusters.vec] + noise
+ Y <- cbind(Y, y)
+ x <- rep(k, length(y))
+ X <- cbind(X, x)
+
+ centers.points[[k]] <- data.frame(y = the.centers , x = rep(k , k
))
+ # points(the.centers ~ rep(k , k), pch = 19, col = "red", cex
= 1.5)
+ }
+
+
+ x.range <- range(k.range)
+ y.range <- range(PCA.1)
+
+ clustergram.plot(X,Y, k.range,
+ x.range, y.range , COL,
+ add.center.points , centers.points)
+
+
+ }
>
>
>
>
> if(F) {
+
+ #Examples:
+
+ png("d:clustergram_plots_%03d.png",650,650, pointsize = 15)
+
+ data(iris)
+ set.seed(250)
+ par(cex.lab = 1.5, cex.main = 1.2)
+ Data <- scale(iris[,-5]) # notice I am scaling the vectors)
+ clustergram(Data, k.range = 2:8, line.width = 0.004) # notice how I a
m using line.width. Play with it on your problem, according to the scale o
f Y.
+
+ set.seed(500)
+ Data <- scale(iris[,-5]) # notice I am scaling the vectors)
+ par(cex.lab = 1.2, cex.main = .7)
+ par(mfrow = c(3,2))
+ for(i in 1:6) clustergram(Data, k.range = 2:8 , line.width = .004, ad
d.center.points = T)
+ par(mfrow = c(1,1))
+
+ set.seed(250)
+ Data <- rbind(
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0,
sd = 0.3)),
+ cbind(rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1,
sd = 0.3)),
+ cbind(rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3),rnorm(100,2,
sd = 0.3))
+ )
+ clustergram(Data, k.range = 2:5 , line.width = .004, add.center.point
s = T)
+
+ set.seed(250)
+ Data <- rbind(
+ cbind(rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0,
sd = 0.3),rnorm(100,0, sd = 0.3)),
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0,
sd = 0.3),rnorm(100,0, sd = 0.3)),
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1,
sd = 0.3),rnorm(100,0, sd = 0.3)),
+ cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0,
sd = 0.3),rnorm(100,1, sd = 0.3))
+ )
+ clustergram(Data, k.range = 2:8 , line.width = .004, add.center.point
s = T)
+
+ dev.off()
+ }
>source("http://www.r-statistics.com/wp-content/uploads/2012/01/source_http
s.r.txt") # Making sure we can source code from github
>source_https("https://raw.github.com/talgalili/R-code-snippets/master/clus
tergram.r")
> data(iris)
> set.seed(250)
> par(cex.lab = 1.5, cex.main = 1.2)
> Data <- scale(iris[,-5]) # notice I am scaling the vectors)
> clustergram(Data, k.range = 2:8, line.width = 0.004)
source("http://www.r-statistics.com/wp-
content/uploads/2012/01/source_https.r.txt") #
Making sure we can source code from github
source_https("https://raw.github.com/talgalili/
R-code-snippets/master/clustergram.r")
set.seed(500)
Data <- scale(iris[,-5]) # notice I am scaling
the vectors)
par(cex.lab = 1.2, cex.main = .7)
par(mfrow = c(3,2))
for(i in 1:6) clustergram(Data, k.range = 2:8 ,
line.width = .004, add.center.points = T)
CLUSTERGRAM

More Related Content

What's hot

R-ggplot2 package Examples
R-ggplot2 package ExamplesR-ggplot2 package Examples
R-ggplot2 package ExamplesDr. Volkan OBAN
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting SpatialFAO
 
Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Dr. Volkan OBAN
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.Dr. Volkan OBAN
 
Programs in array using SWIFT
Programs in array using SWIFTPrograms in array using SWIFT
Programs in array using SWIFTvikram mahendra
 
Pandas pythonfordatascience
Pandas pythonfordatasciencePandas pythonfordatascience
Pandas pythonfordatascienceNishant Upadhyay
 
Statistics - ArgMax Equation
Statistics - ArgMax EquationStatistics - ArgMax Equation
Statistics - ArgMax EquationAndrew Ferlitsch
 
K10692 control theory
K10692 control theoryK10692 control theory
K10692 control theorysaagar264
 
Pandas Cheat Sheet
Pandas Cheat SheetPandas Cheat Sheet
Pandas Cheat SheetACASH1011
 
Day 4b iteration and functions for-loops.pptx
Day 4b   iteration and functions  for-loops.pptxDay 4b   iteration and functions  for-loops.pptx
Day 4b iteration and functions for-loops.pptxAdrien Melquiond
 
Day 4a iteration and functions.pptx
Day 4a   iteration and functions.pptxDay 4a   iteration and functions.pptx
Day 4a iteration and functions.pptxAdrien Melquiond
 
The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212Mahmoud Samir Fayed
 
Data visualization using the grammar of graphics
Data visualization using the grammar of graphicsData visualization using the grammar of graphics
Data visualization using the grammar of graphicsRupak Roy
 
Python matplotlib cheat_sheet
Python matplotlib cheat_sheetPython matplotlib cheat_sheet
Python matplotlib cheat_sheetNishant Upadhyay
 

What's hot (20)

R-ggplot2 package Examples
R-ggplot2 package ExamplesR-ggplot2 package Examples
R-ggplot2 package Examples
 
NumPy Refresher
NumPy RefresherNumPy Refresher
NumPy Refresher
 
10. Getting Spatial
10. Getting Spatial10. Getting Spatial
10. Getting Spatial
 
Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.Data Visualization with R.ggplot2 and its extensions examples.
Data Visualization with R.ggplot2 and its extensions examples.
 
ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.ggplot2 extensions-ggtree.
ggplot2 extensions-ggtree.
 
Numpy python cheat_sheet
Numpy python cheat_sheetNumpy python cheat_sheet
Numpy python cheat_sheet
 
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
CLIM Undergraduate Workshop: Tutorial on R Software - Huang Huang, Oct 23, 2017
 
Programs in array using SWIFT
Programs in array using SWIFTPrograms in array using SWIFT
Programs in array using SWIFT
 
Pandas pythonfordatascience
Pandas pythonfordatasciencePandas pythonfordatascience
Pandas pythonfordatascience
 
Statistics - ArgMax Equation
Statistics - ArgMax EquationStatistics - ArgMax Equation
Statistics - ArgMax Equation
 
Scala.io
Scala.ioScala.io
Scala.io
 
K10692 control theory
K10692 control theoryK10692 control theory
K10692 control theory
 
Pandas Cheat Sheet
Pandas Cheat SheetPandas Cheat Sheet
Pandas Cheat Sheet
 
R
RR
R
 
Day 4b iteration and functions for-loops.pptx
Day 4b   iteration and functions  for-loops.pptxDay 4b   iteration and functions  for-loops.pptx
Day 4b iteration and functions for-loops.pptx
 
Day 4a iteration and functions.pptx
Day 4a   iteration and functions.pptxDay 4a   iteration and functions.pptx
Day 4a iteration and functions.pptx
 
The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212The Ring programming language version 1.10 book - Part 33 of 212
The Ring programming language version 1.10 book - Part 33 of 212
 
Seminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mmeSeminar PSU 10.10.2014 mme
Seminar PSU 10.10.2014 mme
 
Data visualization using the grammar of graphics
Data visualization using the grammar of graphicsData visualization using the grammar of graphics
Data visualization using the grammar of graphics
 
Python matplotlib cheat_sheet
Python matplotlib cheat_sheetPython matplotlib cheat_sheet
Python matplotlib cheat_sheet
 

Similar to CLUSTERGRAM

Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavVyacheslav Arbuzov
 
Артём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisАртём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisSpbDotNet Community
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Dr. Volkan OBAN
 
Basic R Data Manipulation
Basic R Data ManipulationBasic R Data Manipulation
Basic R Data ManipulationChu An
 
A quick introduction to R
A quick introduction to RA quick introduction to R
A quick introduction to RAngshuman Saha
 
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...Revolution Analytics
 
An overview of Python 2.7
An overview of Python 2.7An overview of Python 2.7
An overview of Python 2.7decoupled
 
BUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdfBUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdfkarthikaparthasarath
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryDatabricks
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryDatabricks
 
An example of R code for Data visualization
An example of R code for Data visualizationAn example of R code for Data visualization
An example of R code for Data visualizationLiang (Leon) Zhou
 

Similar to CLUSTERGRAM (20)

Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Артём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisАртём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data Analysis
 
Joclad 2010 d
Joclad 2010 dJoclad 2010 d
Joclad 2010 d
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
 
Oh Composable World!
Oh Composable World!Oh Composable World!
Oh Composable World!
 
R programming
R programmingR programming
R programming
 
Scala by Luc Duponcheel
Scala by Luc DuponcheelScala by Luc Duponcheel
Scala by Luc Duponcheel
 
Basic R Data Manipulation
Basic R Data ManipulationBasic R Data Manipulation
Basic R Data Manipulation
 
R Language Introduction
R Language IntroductionR Language Introduction
R Language Introduction
 
A quick introduction to R
A quick introduction to RA quick introduction to R
A quick introduction to R
 
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
R + Hadoop = Big Data Analytics. How Revolution Analytics' RHadoop Project Al...
 
An overview of Python 2.7
An overview of Python 2.7An overview of Python 2.7
An overview of Python 2.7
 
A tour of Python
A tour of PythonA tour of Python
A tour of Python
 
BUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdfBUilt in Functions and Simple programs in R.pdf
BUilt in Functions and Simple programs in R.pdf
 
RHadoop の紹介
RHadoop の紹介RHadoop の紹介
RHadoop の紹介
 
R programming language
R programming languageR programming language
R programming language
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
User Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love StoryUser Defined Aggregation in Apache Spark: A Love Story
User Defined Aggregation in Apache Spark: A Love Story
 
An example of R code for Data visualization
An example of R code for Data visualizationAn example of R code for Data visualization
An example of R code for Data visualization
 

More from Dr. Volkan OBAN

Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...Dr. Volkan OBAN
 
Covid19py Python Package - Example
Covid19py  Python Package - ExampleCovid19py  Python Package - Example
Covid19py Python Package - ExampleDr. Volkan OBAN
 
Object detection with Python
Object detection with Python Object detection with Python
Object detection with Python Dr. Volkan OBAN
 
Python - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) ParametreleriPython - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) ParametreleriDr. Volkan OBAN
 
Linear Programming wi̇th R - Examples
Linear Programming wi̇th R - ExamplesLinear Programming wi̇th R - Examples
Linear Programming wi̇th R - ExamplesDr. Volkan OBAN
 
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ..."optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...Dr. Volkan OBAN
 
k-means Clustering in Python
k-means Clustering in Pythonk-means Clustering in Python
k-means Clustering in PythonDr. Volkan OBAN
 
Naive Bayes Example using R
Naive Bayes Example using  R Naive Bayes Example using  R
Naive Bayes Example using R Dr. Volkan OBAN
 
Data Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision MakingData Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision MakingDr. Volkan OBAN
 
Scikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-PythonScikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-PythonDr. Volkan OBAN
 
Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Dr. Volkan OBAN
 
Pandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetPandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetDr. Volkan OBAN
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleDr. Volkan OBAN
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleDr. Volkan OBAN
 
R Machine Learning packages( generally used)
R Machine Learning packages( generally used)R Machine Learning packages( generally used)
R Machine Learning packages( generally used)Dr. Volkan OBAN
 
treemap package in R and examples.
treemap package in R and examples.treemap package in R and examples.
treemap package in R and examples.Dr. Volkan OBAN
 
R-Data table Cheat Sheet
R-Data table Cheat SheetR-Data table Cheat Sheet
R-Data table Cheat SheetDr. Volkan OBAN
 
Rcommands-for those who interested in R.
Rcommands-for those who interested in R.Rcommands-for those who interested in R.
Rcommands-for those who interested in R.Dr. Volkan OBAN
 

More from Dr. Volkan OBAN (20)

Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
Conference Paper:IMAGE PROCESSING AND OBJECT DETECTION APPLICATION: INSURANCE...
 
Covid19py Python Package - Example
Covid19py  Python Package - ExampleCovid19py  Python Package - Example
Covid19py Python Package - Example
 
Object detection with Python
Object detection with Python Object detection with Python
Object detection with Python
 
Python - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) ParametreleriPython - Rastgele Orman(Random Forest) Parametreleri
Python - Rastgele Orman(Random Forest) Parametreleri
 
Linear Programming wi̇th R - Examples
Linear Programming wi̇th R - ExamplesLinear Programming wi̇th R - Examples
Linear Programming wi̇th R - Examples
 
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ..."optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
"optrees" package in R and examples.(optrees:finds optimal trees in weighted ...
 
k-means Clustering in Python
k-means Clustering in Pythonk-means Clustering in Python
k-means Clustering in Python
 
Naive Bayes Example using R
Naive Bayes Example using  R Naive Bayes Example using  R
Naive Bayes Example using R
 
R forecasting Example
R forecasting ExampleR forecasting Example
R forecasting Example
 
Data Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision MakingData Science and its Relationship to Big Data and Data-Driven Decision Making
Data Science and its Relationship to Big Data and Data-Driven Decision Making
 
Scikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-PythonScikit-learn Cheatsheet-Python
Scikit-learn Cheatsheet-Python
 
Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet Python Pandas for Data Science cheatsheet
Python Pandas for Data Science cheatsheet
 
Pandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheetPandas,scipy,numpy cheatsheet
Pandas,scipy,numpy cheatsheet
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an example
 
ReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an exampleReporteRs package in R. forming powerpoint documents-an example
ReporteRs package in R. forming powerpoint documents-an example
 
R Machine Learning packages( generally used)
R Machine Learning packages( generally used)R Machine Learning packages( generally used)
R Machine Learning packages( generally used)
 
treemap package in R and examples.
treemap package in R and examples.treemap package in R and examples.
treemap package in R and examples.
 
Mosaic plot in R.
Mosaic plot in R.Mosaic plot in R.
Mosaic plot in R.
 
R-Data table Cheat Sheet
R-Data table Cheat SheetR-Data table Cheat Sheet
R-Data table Cheat Sheet
 
Rcommands-for those who interested in R.
Rcommands-for those who interested in R.Rcommands-for those who interested in R.
Rcommands-for those who interested in R.
 

Recently uploaded

怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制
怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制
怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制vexqp
 
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...nirzagarg
 
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24  Building Real-Time Pipelines With FLaNKDATA SUMMIT 24  Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNKTimothy Spann
 
Digital Transformation Playbook by Graham Ware
Digital Transformation Playbook by Graham WareDigital Transformation Playbook by Graham Ware
Digital Transformation Playbook by Graham WareGraham Ware
 
Data Analyst Tasks to do the internship.pdf
Data Analyst Tasks to do the internship.pdfData Analyst Tasks to do the internship.pdf
Data Analyst Tasks to do the internship.pdftheeltifs
 
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...gajnagarg
 
Aspirational Block Program Block Syaldey District - Almora
Aspirational Block Program Block Syaldey District - AlmoraAspirational Block Program Block Syaldey District - Almora
Aspirational Block Program Block Syaldey District - AlmoraGovindSinghDasila
 
Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1ranjankumarbehera14
 
Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...nirzagarg
 
Dubai Call Girls Peeing O525547819 Call Girls Dubai
Dubai Call Girls Peeing O525547819 Call Girls DubaiDubai Call Girls Peeing O525547819 Call Girls Dubai
Dubai Call Girls Peeing O525547819 Call Girls Dubaikojalkojal131
 
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...Klinik kandungan
 
PLE-statistics document for primary schs
PLE-statistics document for primary schsPLE-statistics document for primary schs
PLE-statistics document for primary schscnajjemba
 
7. Epi of Chronic respiratory diseases.ppt
7. Epi of Chronic respiratory diseases.ppt7. Epi of Chronic respiratory diseases.ppt
7. Epi of Chronic respiratory diseases.pptibrahimabdi22
 
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...nirzagarg
 
Discover Why Less is More in B2B Research
Discover Why Less is More in B2B ResearchDiscover Why Less is More in B2B Research
Discover Why Less is More in B2B Researchmichael115558
 
一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格
一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格
一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格q6pzkpark
 
Jual Cytotec Asli Obat Aborsi No. 1 Paling Manjur
Jual Cytotec Asli Obat Aborsi No. 1 Paling ManjurJual Cytotec Asli Obat Aborsi No. 1 Paling Manjur
Jual Cytotec Asli Obat Aborsi No. 1 Paling Manjurptikerjasaptiker
 
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...gajnagarg
 

Recently uploaded (20)

怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制
怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制
怎样办理圣地亚哥州立大学毕业证(SDSU毕业证书)成绩单学校原版复制
 
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Satna [ 7014168258 ] Call Me For Genuine Models We ...
 
Cytotec in Jeddah+966572737505) get unwanted pregnancy kit Riyadh
Cytotec in Jeddah+966572737505) get unwanted pregnancy kit RiyadhCytotec in Jeddah+966572737505) get unwanted pregnancy kit Riyadh
Cytotec in Jeddah+966572737505) get unwanted pregnancy kit Riyadh
 
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24  Building Real-Time Pipelines With FLaNKDATA SUMMIT 24  Building Real-Time Pipelines With FLaNK
DATA SUMMIT 24 Building Real-Time Pipelines With FLaNK
 
Digital Transformation Playbook by Graham Ware
Digital Transformation Playbook by Graham WareDigital Transformation Playbook by Graham Ware
Digital Transformation Playbook by Graham Ware
 
Data Analyst Tasks to do the internship.pdf
Data Analyst Tasks to do the internship.pdfData Analyst Tasks to do the internship.pdf
Data Analyst Tasks to do the internship.pdf
 
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
Top profile Call Girls In Vadodara [ 7014168258 ] Call Me For Genuine Models ...
 
Aspirational Block Program Block Syaldey District - Almora
Aspirational Block Program Block Syaldey District - AlmoraAspirational Block Program Block Syaldey District - Almora
Aspirational Block Program Block Syaldey District - Almora
 
Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1Lecture_2_Deep_Learning_Overview-newone1
Lecture_2_Deep_Learning_Overview-newone1
 
Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...
Top profile Call Girls In Hapur [ 7014168258 ] Call Me For Genuine Models We ...
 
Abortion pills in Doha Qatar (+966572737505 ! Get Cytotec
Abortion pills in Doha Qatar (+966572737505 ! Get CytotecAbortion pills in Doha Qatar (+966572737505 ! Get Cytotec
Abortion pills in Doha Qatar (+966572737505 ! Get Cytotec
 
Dubai Call Girls Peeing O525547819 Call Girls Dubai
Dubai Call Girls Peeing O525547819 Call Girls DubaiDubai Call Girls Peeing O525547819 Call Girls Dubai
Dubai Call Girls Peeing O525547819 Call Girls Dubai
 
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
Jual obat aborsi Bandung ( 085657271886 ) Cytote pil telat bulan penggugur ka...
 
PLE-statistics document for primary schs
PLE-statistics document for primary schsPLE-statistics document for primary schs
PLE-statistics document for primary schs
 
7. Epi of Chronic respiratory diseases.ppt
7. Epi of Chronic respiratory diseases.ppt7. Epi of Chronic respiratory diseases.ppt
7. Epi of Chronic respiratory diseases.ppt
 
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
Top profile Call Girls In Bihar Sharif [ 7014168258 ] Call Me For Genuine Mod...
 
Discover Why Less is More in B2B Research
Discover Why Less is More in B2B ResearchDiscover Why Less is More in B2B Research
Discover Why Less is More in B2B Research
 
一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格
一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格
一比一原版(曼大毕业证书)曼尼托巴大学毕业证成绩单留信学历认证一手价格
 
Jual Cytotec Asli Obat Aborsi No. 1 Paling Manjur
Jual Cytotec Asli Obat Aborsi No. 1 Paling ManjurJual Cytotec Asli Obat Aborsi No. 1 Paling Manjur
Jual Cytotec Asli Obat Aborsi No. 1 Paling Manjur
 
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
Top profile Call Girls In Chandrapur [ 7014168258 ] Call Me For Genuine Model...
 

CLUSTERGRAM

  • 1. Prepared by Volkan OBAN CLUSTERGRAM SOURCE:  https://gist.github.com/hadley/439761 (hadley/clustergram-had.r)  http://www.r-statistics.com/tag/large-data/ CODES: > clustergram.kmeans <- function(Data, k, ...) + { + # this is the type of function that the clustergram + # function takes for the clustering. + # using similar structure will allow implementation of differe nt clustering algorithms + + # It returns a list with two elements: + # cluster = a vector of length of n (the number of subjects/items) + # indicating to which cluster each item belong s. + # centers = a k dimensional vector. Each element is 1 number that re present that cluster + # In our case, we are using the weighted mean of the cluster dimensions by + # Using the first component (loading) of the P CA of the Data. + + cl <- kmeans(Data, k,...) + + cluster <- cl$cluster + centers <- cl$centers %*% princomp(Data)$loadings[,1] # 1 number per center + # here we are using the weighted mean for each + + return(list( + cluster = cluster, + centers = centers + )) + } > > clustergram.plot.matlines <- function(X,Y, k.range, + x.range, y.range , COL, + add.center.points , centers.points) + { + plot(0,0, col = "white", xlim = x.range, ylim = y.range, + axes = F, + xlab = "Number of clusters (k)", ylab = "PCA weighted Mean of th e clusters", main = c("Clustergram of the PCA-weighted Mean of" ,"the clust ers k-mean clusters vs number of clusters (k)")) + axis(side =1, at = k.range) + axis(side =2)
  • 2. + abline(v = k.range, col = "grey") + + matlines(t(X), t(Y), pch = 19, col = COL, lty = 1, lwd = 1.5) + + if(add.center.points) + { + require(plyr) + + xx <- ldply(centers.points, rbind) + points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3) + + # add points + # temp <- l_ply(centers.points, function(xx) { + # with(xx,points(y~x, pch = 19, col = "red", cex = 1.3)) + # points(xx$y~xx$x, pch = 19, col = "red", cex = 1.3) + # return(1) + # }) + # We assign the lapply to a variable (temp) only to suppress the lapply "NULL" output + } + } > > > > clustergram <- function(Data, k.range = 2:10 , + clustering.function = clustergram.kmeans, + clustergram.plot = clustergram.plot.matlines, + line.width = .004, add.center.points = T) + { + # Data - should be a scales matrix. Where each column belongs to a d ifferent dimension of the observations + # k.range - is a vector with the number of clusters to plot the clust ergram for + # clustering.function - this is not really used, but offers a bases t o later extend the function to other algorithms + # Although that would more work on the code + # line.width - is the amount to lift each line in the plot so they wo n't superimpose eachother + # add.center.points - just assures that we want to plot points of the cluster means + + n <- dim(Data)[1] + + PCA.1 <- Data %*% princomp(Data)$loadings[,1] # first principal comp onent of our data + + if(require(colorspace)) { + COL <- heat_hcl(n)[order(PCA.1)] # line colors + } else { + COL <- rainbow(n)[order(PCA.1)] # line colors + warning('Please consider installing the package "colorspace" for prittier colors') + } + + line.width <- rep(line.width, n) + + Y <- NULL # Y matrix + X <- NULL # X matrix + + centers.points <- list() + + for(k in k.range)
  • 3. + { + k.clusters <- clustering.function(Data, k) + + clusters.vec <- k.clusters$cluster + # the.centers <- apply(cl$centers,1, mean) + the.centers <- k.clusters$centers + + noise <- unlist(tapply(line.width, clusters.vec, cumsum))[order(s eq_along(clusters.vec)[order(clusters.vec)])] + # noise <- noise - mean(range(noise)) + y <- the.centers[clusters.vec] + noise + Y <- cbind(Y, y) + x <- rep(k, length(y)) + X <- cbind(X, x) + + centers.points[[k]] <- data.frame(y = the.centers , x = rep(k , k )) + # points(the.centers ~ rep(k , k), pch = 19, col = "red", cex = 1.5) + } + + + x.range <- range(k.range) + y.range <- range(PCA.1) + + clustergram.plot(X,Y, k.range, + x.range, y.range , COL, + add.center.points , centers.points) + + + } > > > > > if(F) { + + #Examples: + + png("d:clustergram_plots_%03d.png",650,650, pointsize = 15) + + data(iris) + set.seed(250) + par(cex.lab = 1.5, cex.main = 1.2) + Data <- scale(iris[,-5]) # notice I am scaling the vectors) + clustergram(Data, k.range = 2:8, line.width = 0.004) # notice how I a m using line.width. Play with it on your problem, according to the scale o f Y. + + set.seed(500) + Data <- scale(iris[,-5]) # notice I am scaling the vectors) + par(cex.lab = 1.2, cex.main = .7) + par(mfrow = c(3,2)) + for(i in 1:6) clustergram(Data, k.range = 2:8 , line.width = .004, ad d.center.points = T) + par(mfrow = c(1,1)) + + set.seed(250) + Data <- rbind( + cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)),
  • 4. + cbind(rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3)), + cbind(rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3),rnorm(100,2, sd = 0.3)) + ) + clustergram(Data, k.range = 2:5 , line.width = .004, add.center.point s = T) + + set.seed(250) + Data <- rbind( + cbind(rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)), + cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3)), + cbind(rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,1, sd = 0.3),rnorm(100,0, sd = 0.3)), + cbind(rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,0, sd = 0.3),rnorm(100,1, sd = 0.3)) + ) + clustergram(Data, k.range = 2:8 , line.width = .004, add.center.point s = T) + + dev.off() + } >source("http://www.r-statistics.com/wp-content/uploads/2012/01/source_http s.r.txt") # Making sure we can source code from github >source_https("https://raw.github.com/talgalili/R-code-snippets/master/clus tergram.r") > data(iris) > set.seed(250) > par(cex.lab = 1.5, cex.main = 1.2) > Data <- scale(iris[,-5]) # notice I am scaling the vectors) > clustergram(Data, k.range = 2:8, line.width = 0.004)
  • 5. source("http://www.r-statistics.com/wp- content/uploads/2012/01/source_https.r.txt") # Making sure we can source code from github source_https("https://raw.github.com/talgalili/ R-code-snippets/master/clustergram.r") set.seed(500) Data <- scale(iris[,-5]) # notice I am scaling the vectors) par(cex.lab = 1.2, cex.main = .7) par(mfrow = c(3,2)) for(i in 1:6) clustergram(Data, k.range = 2:8 , line.width = .004, add.center.points = T)