2024 ADCs testing

bulk RNA-seq

library(dplyr)
library(ggplot2)
library(DT)

DEG analysis

Differentially Expressed Genes were collected by the following comparisons.

1. CND1 vs CTRL
library(DESeq2)
count.mtx = counts
cont = "CTRL"
tret = "CND1"
cols = c(c(1:3), c(4:6))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)
fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res1 = res
2. CND2 vs CTRL
library(DESeq2)
count.mtx = counts
cont = "CTRL"
tret = "CND2"
cols = c(c(1:3), c(7:9))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)
fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res2 = res
3. CND3 vs CTRL
library(DESeq2)
count.mtx = counts
cont = "CTRL"
tret = "CND3"
cols = c(c(1:3), c(10:12))
cond = c(rep(cont,3),
         rep(tret,3))
count.mtx = count.mtx[, cols]
# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- cond

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds)
res <- results(dds)

res <- data.frame(res)
fc = 1.5
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & pvalue < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & pvalue < pval, 'DN','no_sig')))
res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))

res3 =res
deg1 = res1 %>% filter(!(DE =="no_sig")) %>% rownames()
deg2 = res2 %>% filter(!(DE =="no_sig")) %>% rownames()
deg3 = res3 %>% filter(!(DE =="no_sig")) %>% rownames()

degs = union(deg1,union(deg2,deg3))

cat(paste0("The number of all DEGs is ", length(degs)))
## The number of all DEGs is 2226

KMEANS clustering to identify variable features

## Prepare input data 
input.data = tpms[degs,]
## Scaled
input.data <- t(apply(input.data, 1, function(x) (x - mean(x)) / sd(x)))
## Define function
kmeans.k <- function(k, input.data, title) {
  # Set a consistent seed for reproducibility
  set.seed(1234)
  
  # Perform k-means clustering
  fit <- kmeans(input.data, centers = k, nstart = k)
  fit.cluster <- fit$cluster %>% data.frame()
  
  # Assign column name for cluster identifiers
  colnames(fit.cluster) <- 'cluster'
  
  # Use row names from the input data
  rownames(fit.cluster) <- rownames(input.data)
  
  # Arrange data frame by cluster for better visualization
  fit.cluster <- fit.cluster %>% arrange(cluster)
  
  # Factorize the cluster numbers to use in the annotation
  fit.cluster$cluster <- factor(fit.cluster$cluster)
  
  # Prepare annotation data frame
  df.anno <- fit.cluster
  
  # Generate a heatmap using pheatmap with specified color palette and settings
  p <- pheatmap::pheatmap(input.data[rownames(df.anno),], cluster_cols = F, 
                          cluster_rows = F, show_rownames = F, annotation_row = df.anno,
                          col = colorRampPalette(c("navy", "white", "red"))(1000),
                          fontsize_row = 6, fontsize_col = 10,
                          main = title, gaps_col = c(3,6,9))
  
  # Return a list containing the heatmap object and the annotation data frame
  return(list(plot = p, annotation = df.anno))
}
## Perform kmeans clustering
kmeans.out1=kmeans.k(k = 2, input.data=input.data, title="clusters : 2")

kmeans.out2=kmeans.k(k = 3, input.data=input.data, title="clusters : 3")

kmeans.out3=kmeans.k(k = 4, input.data=input.data, title="clusters : 4")

kmeans.out4=kmeans.k(k = 5, input.data=input.data, title="clusters : 5")

kmeans.out5=kmeans.k(k = 6, input.data=input.data, title="clusters : 6")

kmeans.out6=kmeans.k(k = 7, input.data=input.data, title="clusters : 7")

Cluster number : 2

k=2
kmeans.out=kmeans.k(k = k, input.data=input.data, title= paste0("clusters : ",k))
kmeans.out$plot

cat("Cluster1 genes 50 selection : ")
## Cluster1 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==1) %>% rownames() %>% head(50)
##  [1] "CYP26B1"  "PRSS22"   "YBX2"     "CX3CL1"   "CACNA1G"  "USH1C"   
##  [7] "CEACAM21" "NOS2"     "CD79B"    "NFIX"     "TENM1"    "NRXN3"   
## [13] "HSD17B6"  "CD44"     "APBA2"    "TRIO"     "MYO16"    "RIPOR3"  
## [19] "ANO2"     "ARHGAP6"  "ROS1"     "HDAC9"    "LMO3"     "COL9A2"  
## [25] "LTBP1"    "NEDD4L"   "FOXP3"    "LAMC3"    "COL23A1"  "FSTL4"   
## [31] "CCDC85A"  "PHF21B"   "DCBLD2"   "CDK14"    "CDK17"    "LZTS1"   
## [37] "LIMCH1"   "CHI3L2"   "NHERF2"   "ME1"      "CTNNA2"   "TRAM1"   
## [43] "SYT1"     "RASGRP2"  "CAMK2A"   "RPS6KA2"  "SEMA3C"   "RASAL2"  
## [49] "FNDC3B"   "CACNG4"
cat("Cluster2 genes 50 selection : ") 
## Cluster2 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==2) %>% rownames() %>% head(50)
##  [1] "TMEM132A"   "DCN"        "MTMR11"     "CLCA4"      "C8B"       
##  [6] "BIRC3"      "MYOC"       "SLC18A1"    "TNC"        "TG"        
## [11] "KITLG"      "PTGER3"     "SERPINB3"   "PRDM1"      "CREB3L3"   
## [16] "PIGV"       "CA11"       "CCN5"       "DMRT3"      "COL17A1"   
## [21] "NAV3"       "PITX1"      "HES2"       "ST6GALNAC2" "LMCD1"     
## [26] "CDH19"      "ABCB11"     "CA12"       "PLD1"       "ATP12A"    
## [31] "FBLN1"      "AMPH"       "DUSP13B"    "PKP1"       "COL19A1"   
## [36] "FOLH1"      "FAT2"       "CETP"       "AAMDC"      "TMEM40"    
## [41] "TGM1"       "GABRP"      "TPSD1"      "SEC14L3"    "SLC35E4"   
## [46] "SOX10"      "TTC28"      "TTLL1"      "NEFH"       "CBX7"

Cluster number : 3

k= 3
kmeans.out=kmeans.k(k = k, input.data=input.data, title= paste0("clusters : ",k))
kmeans.out$plot

cat("Cluster1 genes 50 selection : ")
## Cluster1 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==1) %>% rownames() %>% head(50)
##  [1] "CYP26B1"  "PRSS22"   "YBX2"     "CX3CL1"   "CEACAM21" "NOS2"    
##  [7] "CD79B"    "NFIX"     "TENM1"    "NRXN3"    "CD44"     "APBA2"   
## [13] "TRIO"     "MYO16"    "ANO2"     "ARHGAP6"  "ROS1"     "LMO3"    
## [19] "COL9A2"   "LTBP1"    "NEDD4L"   "FOXP3"    "LAMC3"    "COL23A1" 
## [25] "CCDC85A"  "PHF21B"   "DCBLD2"   "CDK14"    "CDK17"    "LZTS1"   
## [31] "NHERF2"   "ME1"      "CTNNA2"   "TRAM1"    "SYT1"     "RASGRP2" 
## [37] "RPS6KA2"  "SEMA3C"   "CACNG4"   "ARHGAP15" "MAP2"     "SLC4A4"  
## [43] "COL5A3"   "CXCL2"    "CADPS2"   "ACHE"     "PHACTR3"  "TPX2"    
## [49] "DOCK3"    "BIRC5"
cat("Cluster2 genes 50 selection : ") 
## Cluster2 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==2) %>% rownames() %>% head(50)
##  [1] "TMEM132A"   "DCN"        "MTMR11"     "CLCA4"      "C8B"       
##  [6] "BIRC3"      "MYOC"       "SLC18A1"    "TNC"        "TG"        
## [11] "KITLG"      "SERPINB3"   "PRDM1"      "CREB3L3"    "PIGV"      
## [16] "CA11"       "CCN5"       "DMRT3"      "COL17A1"    "PITX1"     
## [21] "HES2"       "ST6GALNAC2" "CDH19"      "ABCB11"     "CA12"      
## [26] "PLD1"       "ATP12A"     "FBLN1"      "DUSP13B"    "PKP1"      
## [31] "COL19A1"    "FOLH1"      "FAT2"       "CETP"       "TMEM40"    
## [36] "TGM1"       "GABRP"      "TPSD1"      "SEC14L3"    "SLC35E4"   
## [41] "SOX10"      "TTC28"      "TTLL1"      "CBX7"       "PROCR"     
## [46] "PTK6"       "SIRPB1"     "TNNC2"      "GUCY2F"     "ACOD1"
cat("Cluster3 genes 50 selection : ") 
## Cluster3 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==3) %>% rownames() %>% head(50)
##  [1] "CACNA1G"    "USH1C"      "HSD17B6"    "RIPOR3"     "HDAC9"     
##  [6] "PTGER3"     "FSTL4"      "LIMCH1"     "CHI3L2"     "NAV3"      
## [11] "CAMK2A"     "LMCD1"      "RASAL2"     "FNDC3B"     "AMPH"      
## [16] "BRINP1"     "PTPRH"      "C1QTNF3"    "AAMDC"      "HEPH"      
## [21] "ICAM1"      "NRCAM"      "TGFB2"      "CDC45"      "CDC6"      
## [26] "NEFH"       "SALL4"      "SLC17A9"    "NALCN"      "MEOX2"     
## [31] "SORCS1"     "CSF3"       "ABCC3"      "ACSS3"      "FOXM1"     
## [36] "GNB3"       "TPD52L1"    "BACH2"      "GMDS"       "TBX18"     
## [41] "C7"         "PDE1A"      "GRB14"      "REG1A"      "ST6GALNAC5"
## [46] "PPP4R4"     "POPDC2"     "CSMD2"      "CD244"      "PHF24"

Cluster number : 4

k= 4
kmeans.out=kmeans.k(k = k, input.data=input.data, title= paste0("clusters : ",k))
kmeans.out$plot

cat("Cluster1 genes 50 selection : ")
## Cluster1 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==1) %>% rownames() %>% head(50)
##  [1] "CACNA1G"    "USH1C"      "HSD17B6"    "RIPOR3"     "HDAC9"     
##  [6] "FSTL4"      "LIMCH1"     "CHI3L2"     "NAV3"       "CAMK2A"    
## [11] "LMCD1"      "RASAL2"     "FNDC3B"     "BRINP1"     "PTPRH"     
## [16] "C1QTNF3"    "HEPH"       "ICAM1"      "NRCAM"      "TGFB2"     
## [21] "CDC45"      "CDC6"       "SALL4"      "SLC17A9"    "MEOX2"     
## [26] "SORCS1"     "CSF3"       "ABCC3"      "ACSS3"      "FOXM1"     
## [31] "GNB3"       "TPD52L1"    "GMDS"       "TBX18"      "C7"        
## [36] "PDE1A"      "GRB14"      "REG1A"      "ST6GALNAC5" "PPP4R4"    
## [41] "POPDC2"     "CSMD2"      "PHF24"      "IRF1"       "MASP1"     
## [46] "PKMYT1"     "ADM2"       "MCHR1"      "LIF"        "ISLR"
cat("Cluster2 genes 50 selection : ") 
## Cluster2 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==2) %>% rownames() %>% head(50)
##  [1] "CYP26B1"  "PRSS22"   "YBX2"     "CX3CL1"   "CEACAM21" "NOS2"    
##  [7] "CD79B"    "NFIX"     "TENM1"    "NRXN3"    "CD44"     "APBA2"   
## [13] "TRIO"     "MYO16"    "ANO2"     "ARHGAP6"  "ROS1"     "LMO3"    
## [19] "COL9A2"   "LTBP1"    "NEDD4L"   "FOXP3"    "LAMC3"    "COL23A1" 
## [25] "CCDC85A"  "PHF21B"   "DCBLD2"   "CDK14"    "CDK17"    "LZTS1"   
## [31] "NHERF2"   "ME1"      "CTNNA2"   "TRAM1"    "SYT1"     "RASGRP2" 
## [37] "RPS6KA2"  "SEMA3C"   "CACNG4"   "ARHGAP15" "MAP2"     "SLC4A4"  
## [43] "COL5A3"   "CXCL2"    "CADPS2"   "ACHE"     "PHACTR3"  "TPX2"    
## [49] "DOCK3"    "BIRC5"
cat("Cluster3 genes 50 selection : ") 
## Cluster3 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==3) %>% rownames() %>% head(50)
##  [1] "COL19A1"   "ACSBG1"    "RP1"       "HOXA5"     "PACRG"     "CLDN16"   
##  [7] "PFKFB4"    "HHLA2"     "GDA"       "VPREB3"    "RGN"       "TNS4"     
## [13] "CPLANE2"   "SAA2"      "BLK"       "STRA6"     "TTC6"      "WDR93"    
## [19] "SCN1A"     "MAL2"      "CACNA1C"   "ANK3"      "CCDC74B"   "KIF5A"    
## [25] "SNX22"     "SLC34A2"   "LRRC43"    "ESYT3"     "ADGRG5"    "CARMIL2"  
## [31] "FCRL1"     "GRM2"      "HHIP"      "DEUP1"     "PEX11A"    "PHYHIP"   
## [37] "RASA4B"    "AQP4"      "TNFRSF10C" "CRYBG2"    "NAALADL2"  "BRICD5"   
## [43] "ROBO2"     "FAF1"      "KIAA0825"  "PRAME"     "SLC38A3"   "SBSN"     
## [49] "PTPRT"     "RANBP17"
cat("Cluster4 genes 50 selection : ") 
## Cluster4 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==4) %>% rownames() %>% head(50)
##  [1] "TMEM132A"   "DCN"        "MTMR11"     "CLCA4"      "C8B"       
##  [6] "BIRC3"      "MYOC"       "SLC18A1"    "TNC"        "TG"        
## [11] "KITLG"      "PTGER3"     "SERPINB3"   "PRDM1"      "CREB3L3"   
## [16] "PIGV"       "CA11"       "CCN5"       "DMRT3"      "COL17A1"   
## [21] "PITX1"      "HES2"       "ST6GALNAC2" "CDH19"      "ABCB11"    
## [26] "CA12"       "PLD1"       "ATP12A"     "FBLN1"      "AMPH"      
## [31] "DUSP13B"    "PKP1"       "FOLH1"      "FAT2"       "CETP"      
## [36] "AAMDC"      "TMEM40"     "TGM1"       "GABRP"      "TPSD1"     
## [41] "SEC14L3"    "SLC35E4"    "SOX10"      "TTC28"      "TTLL1"     
## [46] "NEFH"       "CBX7"       "PROCR"      "PTK6"       "SIRPB1"

Cluster number : 5

k= 5
kmeans.out=kmeans.k(k = k, input.data=input.data, title= paste0("clusters : ",k))
kmeans.out$plot

cat("Cluster1 genes 50 selection : ")
## Cluster1 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==1) %>% rownames() %>% head(50)
##  [1] "COL19A1"   "ACSBG1"    "RP1"       "HOXA5"     "TFR2"      "PACRG"    
##  [7] "CLDN16"    "PFKFB4"    "GDA"       "VPREB3"    "RGN"       "TNS4"     
## [13] "CPLANE2"   "SAA2"      "BLK"       "STRA6"     "TTC6"      "WDR93"    
## [19] "SCN1A"     "CACNA1C"   "ANK3"      "CCDC74B"   "KIF5A"     "SNX22"    
## [25] "SLC34A2"   "LRRC43"    "ESYT3"     "ADGRG5"    "CARMIL2"   "FCRL1"    
## [31] "HHIP"      "DEUP1"     "PEX11A"    "PHYHIP"    "RASA4B"    "AQP4"     
## [37] "TNFRSF10C" "CRYBG2"    "NAALADL2"  "BRICD5"    "FAF1"      "KIAA0825" 
## [43] "PRAME"     "SBSN"      "PTPRT"     "RANBP17"   "NT5M"      "IGLV4-69" 
## [49] "IGLV6-57"  "IGLV1-51"
cat("Cluster2 genes 50 selection : ") 
## Cluster2 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==2) %>% rownames() %>% head(50)
##  [1] "HDAC9"   "NAV3"    "CAMK2A"  "RASAL2"  "FNDC3B"  "AMPH"    "HEPH"   
##  [8] "ICAM1"   "TGFB2"   "SALL4"   "SLC17A9" "NALCN"   "ACOD1"   "MEOX2"  
## [15] "CDH23"   "SORCS1"  "CSF3"    "ACSS3"   "FOXM1"   "TPD52L1" "BACH2"  
## [22] "C7"      "HHLA2"   "PDE1A"   "GRB14"   "PPP4R4"  "CD244"   "H2BC11" 
## [29] "CDKN1A"  "MASP1"   "PKMYT1"  "ADM2"    "DOCK4"   "ISLR"    "KIF1A"  
## [36] "UNC13A"  "HRC"     "COL5A1"  "IDO1"    "TBC1D5"  "SLC6A11" "KANK4"  
## [43] "CHRM3"   "TRPC4"   "APLNR"   "AMHR2"   "CYP19A1" "PCDH10"  "RBP5"   
## [50] "RTN1"
cat("Cluster3 genes 50 selection : ") 
## Cluster3 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==3) %>% rownames() %>% head(50)
##  [1] "CACNA1G"    "USH1C"      "HSD17B6"    "RIPOR3"     "PTGER3"    
##  [6] "FSTL4"      "PHF21B"     "LIMCH1"     "CHI3L2"     "LMCD1"     
## [11] "BRINP1"     "PTPRH"      "C1QTNF3"    "NRCAM"      "CDC45"     
## [16] "CDC6"       "EEF1A2"     "KLC3"       "ABCC3"      "MAPK10"    
## [21] "KIAA1549L"  "GNB3"       "GMDS"       "TBX18"      "REG1A"     
## [26] "ST6GALNAC5" "POPDC2"     "CSMD2"      "PHF24"      "IRF1"      
## [31] "SLC2A4RG"   "LRFN1"      "LIF"        "ADCY4"      "PRRG3"     
## [36] "FIBCD1"     "MICAL2"     "MRO"        "DTNA"       "ITGA7"     
## [41] "PCNX2"      "POU2F3"     "CTRL"       "LHX9"       "GALNT13"   
## [46] "ACKR2"      "CDCA5"      "PARD3"      "MKI67"      "PLA2R1"
cat("Cluster4 genes 50 selection : ") 
## Cluster4 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==4) %>% rownames() %>% head(50)
##  [1] "TMEM132A"   "DCN"        "MTMR11"     "CLCA4"      "C8B"       
##  [6] "BIRC3"      "MYOC"       "SLC18A1"    "TNC"        "TG"        
## [11] "KITLG"      "SERPINB3"   "PRDM1"      "CREB3L3"    "PIGV"      
## [16] "CA11"       "CCN5"       "DMRT3"      "COL17A1"    "PITX1"     
## [21] "HES2"       "ST6GALNAC2" "CDH19"      "ABCB11"     "CA12"      
## [26] "PLD1"       "ATP12A"     "FBLN1"      "DUSP13B"    "PKP1"      
## [31] "FOLH1"      "FAT2"       "CETP"       "AAMDC"      "TMEM40"    
## [36] "TGM1"       "GABRP"      "TPSD1"      "SEC14L3"    "SLC35E4"   
## [41] "SOX10"      "TTC28"      "TTLL1"      "NEFH"       "CBX7"      
## [46] "PROCR"      "PTK6"       "SIRPB1"     "TNNC2"      "GUCY2F"
cat("Cluster5 genes 50 selection : ") 
## Cluster5 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==5) %>% rownames() %>% head(50)
##  [1] "CYP26B1"  "PRSS22"   "YBX2"     "CX3CL1"   "CEACAM21" "NOS2"    
##  [7] "CD79B"    "NFIX"     "TENM1"    "NRXN3"    "CD44"     "APBA2"   
## [13] "TRIO"     "MYO16"    "ANO2"     "ARHGAP6"  "ROS1"     "LMO3"    
## [19] "COL9A2"   "LTBP1"    "NEDD4L"   "FOXP3"    "LAMC3"    "COL23A1" 
## [25] "CCDC85A"  "DCBLD2"   "CDK14"    "CDK17"    "LZTS1"    "NHERF2"  
## [31] "ME1"      "CTNNA2"   "TRAM1"    "SYT1"     "RASGRP2"  "RPS6KA2" 
## [37] "SEMA3C"   "CACNG4"   "ARHGAP15" "MAP2"     "SLC4A4"   "COL5A3"  
## [43] "CXCL2"    "CADPS2"   "ACHE"     "PHACTR3"  "TPX2"     "DOCK3"   
## [49] "BIRC5"    "PITPNM3"

Cluster number : 6

k= 6
kmeans.out=kmeans.k(k = k, input.data=input.data, title= paste0("clusters : ",k))
kmeans.out$plot

cat("Cluster1 genes 50 selection : ")
## Cluster1 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==1) %>% rownames() %>% head(50)
##  [1] "CLCA4"     "COL19A1"   "ACSBG1"    "HOXA5"     "TFR2"      "EFNB3"    
##  [7] "CLDN16"    "CFAP92"    "GDA"       "CFP"       "PNCK"      "TNS4"     
## [13] "CPLANE2"   "WDR93"     "CCDC74B"   "SLC34A2"   "LRRC43"    "ADGRG5"   
## [19] "CARMIL2"   "FCRL1"     "EFHB"      "DEUP1"     "PEX11A"    "RASA4B"   
## [25] "TNFRSF10C" "CRYBG2"    "PRAME"     "SLC38A3"   "MB"        "NT5M"     
## [31] "ETS2-AS1"  "IGHV1-46"  "CYTOR"     "SOX21-AS1" "PLCXD2"    "GPR162"   
## [37] "LINC01415" "LINC01127" "CD38"      "ZMYND10"   "DNAH9"     "DLEC1"    
## [43] "FUZ"       "LRRC23"    "PLEKHB1"   "CLXN"      "C6"        "ATP2C2"   
## [49] "BCAS1"     "ZMYND12"
cat("Cluster2 genes 50 selection : ") 
## Cluster2 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==2) %>% rownames() %>% head(50)
##  [1] "CYP26B1"  "CX3CL1"   "CEACAM21" "NOS2"     "TENM1"    "CD44"    
##  [7] "TRIO"     "MYO16"    "ANO2"     "ARHGAP6"  "ROS1"     "LMO3"    
## [13] "LTBP1"    "NEDD4L"   "FOXP3"    "LAMC3"    "CCDC85A"  "DCBLD2"  
## [19] "CDK14"    "LZTS1"    "NHERF2"   "ME1"      "TRAM1"    "RASGRP2" 
## [25] "RPS6KA2"  "SEMA3C"   "CACNG4"   "ARHGAP15" "MAP2"     "SLC4A4"  
## [31] "COL5A3"   "CADPS2"   "TPX2"     "DOCK3"    "BIRC5"    "PITPNM3" 
## [37] "ZFHX4"    "IL11"     "SUSD2"    "RASL10A"  "ACR"      "NFKBIA"  
## [43] "GINS1"    "MYBL2"    "EEF1A2"   "SYNDIG1"  "ZC3H12B"  "CCL22"   
## [49] "SLC7A5"   "CEMIP"
cat("Cluster3 genes 50 selection : ") 
## Cluster3 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==3) %>% rownames() %>% head(50)
##  [1] "HDAC9"   "NAV3"    "CAMK2A"  "RASAL2"  "FNDC3B"  "AMPH"    "HEPH"   
##  [8] "ICAM1"   "TGFB2"   "SALL4"   "SLC17A9" "NALCN"   "ACOD1"   "MEOX2"  
## [15] "CDH23"   "SORCS1"  "CSF3"    "ACSS3"   "FOXM1"   "TPD52L1" "BACH2"  
## [22] "C7"      "HHLA2"   "PDE1A"   "GRB14"   "PPP4R4"  "CD244"   "H2BC11" 
## [29] "CDKN1A"  "MASP1"   "PKMYT1"  "ADM2"    "DOCK4"   "ISLR"    "KIF1A"  
## [36] "UNC13A"  "HRC"     "COL5A1"  "IDO1"    "TBC1D5"  "SLC6A11" "KANK4"  
## [43] "CHRM3"   "TRPC4"   "APLNR"   "AMHR2"   "CYP19A1" "PCDH10"  "RBP5"   
## [50] "RTN1"
cat("Cluster4 genes 50 selection : ") 
## Cluster4 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==4) %>% rownames() %>% head(50)
##  [1] "CACNA1G"    "USH1C"      "HSD17B6"    "RIPOR3"     "PTGER3"    
##  [6] "FSTL4"      "PHF21B"     "LIMCH1"     "CHI3L2"     "LMCD1"     
## [11] "BRINP1"     "PTPRH"      "C1QTNF3"    "NRCAM"      "CDC45"     
## [16] "CDC6"       "KLC3"       "ABCC3"      "MAPK10"     "KIAA1549L" 
## [21] "GNB3"       "GMDS"       "TBX18"      "REG1A"      "ST6GALNAC5"
## [26] "POPDC2"     "CSMD2"      "PHF24"      "IRF1"       "SLC2A4RG"  
## [31] "LRFN1"      "LIF"        "ADCY4"      "PRRG3"      "FIBCD1"    
## [36] "MICAL2"     "MRO"        "DTNA"       "ITGA7"      "PCNX2"     
## [41] "POU2F3"     "CTRL"       "LHX9"       "GALNT13"    "ACKR2"     
## [46] "PARD3"      "MKI67"      "PLA2R1"     "TMPRSS11D"  "HS3ST3A1"
cat("Cluster5 genes 50 selection : ") 
## Cluster5 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==5) %>% rownames() %>% head(50)
##  [1] "TMEM132A"   "DCN"        "MTMR11"     "C8B"        "BIRC3"     
##  [6] "MYOC"       "SLC18A1"    "TNC"        "TG"         "KITLG"     
## [11] "SERPINB3"   "PRDM1"      "CREB3L3"    "PIGV"       "CA11"      
## [16] "CCN5"       "DMRT3"      "COL17A1"    "PITX1"      "HES2"      
## [21] "ST6GALNAC2" "CDH19"      "ABCB11"     "CA12"       "PLD1"      
## [26] "ATP12A"     "FBLN1"      "DUSP13B"    "PKP1"       "FOLH1"     
## [31] "FAT2"       "CETP"       "AAMDC"      "TMEM40"     "TGM1"      
## [36] "GABRP"      "TPSD1"      "SEC14L3"    "SLC35E4"    "SOX10"     
## [41] "TTC28"      "TTLL1"      "NEFH"       "CBX7"       "PROCR"     
## [46] "PTK6"       "SIRPB1"     "TNNC2"      "GUCY2F"     "DHRS12"
cat("Cluster6 genes 50 selection : ") 
## Cluster6 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==6) %>% rownames() %>% head(50)
##  [1] "PRSS22"   "YBX2"     "CD79B"    "NFIX"     "NRXN3"    "APBA2"   
##  [7] "COL9A2"   "COL23A1"  "CDK17"    "CTNNA2"   "SYT1"     "CXCL2"   
## [13] "ACHE"     "PHACTR3"  "CRISPLD2" "RP1"      "HGFAC"    "POU2AF1" 
## [19] "PACRG"    "PFKFB4"   "CR2"      "CRHR1"    "SOHLH2"   "SPINK4"  
## [25] "C4BPA"    "IL17C"    "VPREB3"   "ELL3"     "RGN"      "F12"     
## [31] "KHDRBS3"  "LGR6"     "DCLK1"    "MACROD1"  "CD180"    "SAA2"    
## [37] "ECHDC3"   "IGF2BP3"  "BLK"      "STRA6"    "SLCO1C1"  "TMEM132B"
## [43] "RDH16"    "TTC6"     "CDH11"    "CDH13"    "IMPA2"    "FCRL5"   
## [49] "DTL"      "SCN1A"

Cluster number : 7

k= 7
kmeans.out=kmeans.k(k = k, input.data=input.data, title= paste0("clusters : ",k))
kmeans.out$plot

cat("Cluster1 genes 50 selection : ")
## Cluster1 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==1) %>% rownames() %>% head(50)
##  [1] "PRSS22"   "YBX2"     "CD79B"    "NFIX"     "NRXN3"    "APBA2"   
##  [7] "COL9A2"   "COL23A1"  "CDK17"    "CTNNA2"   "SYT1"     "CXCL2"   
## [13] "ACHE"     "PHACTR3"  "CRISPLD2" "RP1"      "HGFAC"    "POU2AF1" 
## [19] "PACRG"    "PFKFB4"   "CR2"      "CRHR1"    "SOHLH2"   "SPINK4"  
## [25] "C4BPA"    "IL17C"    "VPREB3"   "ELL3"     "RGN"      "F12"     
## [31] "KHDRBS3"  "LGR6"     "DCLK1"    "MACROD1"  "CD180"    "SAA2"    
## [37] "ECHDC3"   "IGF2BP3"  "BLK"      "STRA6"    "SLCO1C1"  "TMEM132B"
## [43] "RDH16"    "TTC6"     "CDH11"    "CDH13"    "IMPA2"    "FCRL5"   
## [49] "DTL"      "SCN1A"
cat("Cluster2 genes 50 selection : ") 
## Cluster2 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==2) %>% rownames() %>% head(50)
##  [1] "CYP26B1"  "CX3CL1"   "CEACAM21" "NOS2"     "TENM1"    "CD44"    
##  [7] "TRIO"     "MYO16"    "ANO2"     "ARHGAP6"  "ROS1"     "LMO3"    
## [13] "LTBP1"    "NEDD4L"   "FOXP3"    "LAMC3"    "CCDC85A"  "DCBLD2"  
## [19] "CDK14"    "LZTS1"    "NHERF2"   "ME1"      "TRAM1"    "RASGRP2" 
## [25] "RPS6KA2"  "SEMA3C"   "CACNG4"   "ARHGAP15" "MAP2"     "SLC4A4"  
## [31] "COL5A3"   "CADPS2"   "TPX2"     "DOCK3"    "BIRC5"    "PITPNM3" 
## [37] "ZFHX4"    "IL11"     "SUSD2"    "RASL10A"  "ACR"      "NFKBIA"  
## [43] "GINS1"    "MYBL2"    "EEF1A2"   "SYNDIG1"  "ZC3H12B"  "CCL22"   
## [49] "SLC7A5"   "CEMIP"
cat("Cluster3 genes 50 selection : ") 
## Cluster3 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==3) %>% rownames() %>% head(50)
##  [1] "EFNB3"     "CLDN16"    "GDA"       "CFP"       "PNCK"      "WDR93"    
##  [7] "CCDC74B"   "LRRC43"    "FCRL1"     "EFHB"      "DEUP1"     "PEX11A"   
## [13] "TNFRSF10C" "CRYBG2"    "MB"        "ETS2-AS1"  "IGHV1-46"  "PLCXD2"   
## [19] "GPR162"    "LINC01415" "CD38"      "ZMYND10"   "DNAH9"     "DLEC1"    
## [25] "FUZ"       "LRRC23"    "PLEKHB1"   "CLXN"      "C6"        "ATP2C2"   
## [31] "BCAS1"     "ZMYND12"   "TP63"      "SPAG6"     "TP73"      "DNAAF6"   
## [37] "PTHLH"     "IL5RA"     "TEKT2"     "FMO2"      "RSPH14"    "RAB36"    
## [43] "IFT27"     "SAMD15"    "SPEF1"     "CCDC113"   "SMPD3"     "EYA1"     
## [49] "IFT56"     "B9D1"
cat("Cluster4 genes 50 selection : ") 
## Cluster4 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==4) %>% rownames() %>% head(50)
##  [1] "MTMR11"    "C8B"       "BIRC3"     "MYOC"      "SLC18A1"   "RIPOR3"   
##  [7] "TG"        "KITLG"     "CDH19"     "COL19A1"   "AAMDC"     "GUCY2F"   
## [13] "ACSBG1"    "ZFR2"      "TFR2"      "RASAL1"    "PDGFRB"    "CFAP92"   
## [19] "PRRX1"     "RGS4"      "TGM3"      "MCHR1"     "ADCY4"     "ALDH3B2"  
## [25] "CPLANE2"   "GSTM5"     "MYCN"      "FAIM2"     "LIMD2"     "IL1RN"    
## [31] "SERPINA10" "IL19"      "PPFIA4"    "AFF3"      "ALDH1L1"   "MKI67"    
## [37] "CCDC102B"  "SPHKAP"    "NCAM2"     "EPHB1"     "SLC24A2"   "DHRS1"    
## [43] "SLC34A2"   "TMSB15B"   "PTMS"      "ADGRG5"    "DRC7"      "FCRL3"    
## [49] "SHANK2"    "CFAP47"
cat("Cluster5 genes 50 selection : ") 
## Cluster5 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==5) %>% rownames() %>% head(50)
##  [1] "HDAC9"   "NAV3"    "CAMK2A"  "RASAL2"  "FNDC3B"  "AMPH"    "HEPH"   
##  [8] "ICAM1"   "TGFB2"   "SALL4"   "SLC17A9" "NALCN"   "ACOD1"   "MEOX2"  
## [15] "CDH23"   "SORCS1"  "CSF3"    "ACSS3"   "FOXM1"   "TPD52L1" "BACH2"  
## [22] "C7"      "HHLA2"   "PDE1A"   "GRB14"   "PPP4R4"  "CD244"   "H2BC11" 
## [29] "CDKN1A"  "MASP1"   "PKMYT1"  "ADM2"    "DOCK4"   "ISLR"    "KIF1A"  
## [36] "UNC13A"  "HRC"     "COL5A1"  "IDO1"    "TBC1D5"  "SLC6A11" "KANK4"  
## [43] "CHRM3"   "TRPC4"   "APLNR"   "AMHR2"   "CYP19A1" "PCDH10"  "RBP5"   
## [50] "RTN1"
cat("Cluster6 genes 50 selection : ") 
## Cluster6 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==6) %>% rownames() %>% head(50)
##  [1] "TMEM132A"   "DCN"        "CLCA4"      "TNC"        "SERPINB3"  
##  [6] "PRDM1"      "CREB3L3"    "PIGV"       "CA11"       "CCN5"      
## [11] "DMRT3"      "COL17A1"    "PITX1"      "HES2"       "ST6GALNAC2"
## [16] "ABCB11"     "CA12"       "PLD1"       "ATP12A"     "FBLN1"     
## [21] "DUSP13B"    "PKP1"       "FOLH1"      "FAT2"       "CETP"      
## [26] "TMEM40"     "TGM1"       "GABRP"      "TPSD1"      "SEC14L3"   
## [31] "SLC35E4"    "SOX10"      "TTC28"      "TTLL1"      "NEFH"      
## [36] "CBX7"       "PROCR"      "PTK6"       "SIRPB1"     "TNNC2"     
## [41] "DHRS12"     "NDRG4"      "RHOV"       "CA2"        "TUBB4A"    
## [46] "SLC17A7"    "TLE6"       "DYRK1B"     "EBI3"       "CLIP3"
cat("Cluster7 genes 50 selection : ") 
## Cluster7 genes 50 selection :
kmeans.out$annotation %>% as.data.frame() %>% filter(cluster==7) %>% rownames() %>% head(50)
##  [1] "CACNA1G"    "USH1C"      "HSD17B6"    "PTGER3"     "FSTL4"     
##  [6] "PHF21B"     "LIMCH1"     "CHI3L2"     "LMCD1"      "BRINP1"    
## [11] "PTPRH"      "C1QTNF3"    "NRCAM"      "CDC45"      "CDC6"      
## [16] "KLC3"       "ABCC3"      "MAPK10"     "KIAA1549L"  "GNB3"      
## [21] "GMDS"       "TBX18"      "REG1A"      "ST6GALNAC5" "POPDC2"    
## [26] "CSMD2"      "PHF24"      "IRF1"       "SLC2A4RG"   "LRFN1"     
## [31] "LIF"        "PRRG3"      "FIBCD1"     "MICAL2"     "MRO"       
## [36] "DTNA"       "ITGA7"      "PCNX2"      "POU2F3"     "CTRL"      
## [41] "LHX9"       "GALNT13"    "ACKR2"      "PARD3"      "PLA2R1"    
## [46] "TMPRSS11D"  "HS3ST3A1"   "THY1"       "GRIP1"      "SH3RF2"

Interpretation

The best k for the kmeans clustering is so far 6. k= 7 started to generated outlier group from CND2