#!/usr/bin/Rscript data <- read.table("ex29_hierarchical_clustering.txt", header=T, row.names=1) data <- log2(data) good <- as.matrix(data[,1:8]) poor <- as.matrix(data[,9:16]) genes.count <- dim(data)[1] # top 50 variant genes genes.top50 <- names(tail(sort(apply(data, 1, var)),50)) data.top50 <- data[genes.top50,] # top 50 variant genes (good vs. poor) tmp <- sapply(1:genes.count, function(row) var(good[row,], poor[row,])) data.top50 <- as.matrix(data[which(order(tmp, decreasing=TRUE) <= 50),]) # euclid d.euclid <- dist(data.top50) plot(density(d.euclid)) # pearson d.pearson <- as.dist(1 - cor(t(data.top50))) plot(density(d.pearson)) # hierarchical clustering h.euclid <- hclust(d.euclid) h.pearson <- hclust(d.pearson) par(mfrow=c(2,3)) plot(hclust(d.euclid, "single")) plot(hclust(d.euclid, "average")) plot(hclust(d.euclid, "complete")) plot(hclust(d.pearson, "single")) plot(hclust(d.pearson, "average")) plot(hclust(d.pearson, "complete")) # use samples for clustering data.top50 <- t(data.top50) # euclid d.euclid <- dist(data.top50) plot(density(d.euclid)) # pearson d.pearson <- as.dist(1 - cor(t(data.top50))) plot(density(d.pearson)) # hierarchical clustering h.euclid <- hclust(d.euclid) h.pearson <- hclust(d.pearson) par(mfrow=c(2,3)) plot(hclust(d.euclid, "single")) plot(hclust(d.euclid, "average")) plot(hclust(d.euclid, "complete")) plot(hclust(d.pearson, "single")) plot(hclust(d.pearson, "average")) plot(hclust(d.pearson, "complete")) # kmeans clustering cl <- kmeans(t(data.top50), 2) plot(data.top50, col=cl$cluster) points(cl$centers, col = 1:2, pch = 8, cex=2)