Differentially Expressed Genes (DEG) analysis

Import count data

TCGA_BRCA_countsMeta = readRDS(paste0(dir,"rds/TCGA_BRCA_countsMeta.rds")) 
treament = TCGA_BRCA_countsMeta$meta$treatments

DEG strategy

Differentially Expressed Genes (DEG) between Tumor tissue and Normal tissue

Tumor tissue/Normal tissue

n = 1224
Tumor n = 1111
Normal n = 113




Create DEG object

Don’t run.

# Generate info table
info <- data.frame(matrix(nrow = ncol(count.mtx), ncol = 2))
colnames(info) <- c('sample', 'cond')
info$sample <- colnames(count.mtx)
info$cond <- dds$sample_type
info$cond <- factor(info$cond, 
                    levels = c("Solid Tissue Normal","Primary Tumor")) # CTL going first
# levels(info$cond)

# DESeq
dds <- DESeqDataSetFromMatrix(count.mtx, info, ~ cond)
dds <- DESeq(dds) 
# dds %>% saveRDS(paste0(dir,"TCGA_BRCA_countsMeta.dds.rds"))
res <- results(dds)
res <- data.frame(res)
# The Previous work was saved and read to here to save run time. 
dds = readRDS(paste0(dir,"rds/TCGA_BRCA_countsMeta.dds.rds"))
res <- results(dds)
res <- data.frame(res)

Add DEG information

# Add DEG information 
fc = 2
pval = 0.05

res = res %>% mutate(DE=ifelse(log2FoldChange >= log2(fc) & padj < pval, 'UP',
                               ifelse(log2FoldChange <= -log2(fc) & padj < pval, 'DN','no_sig')))
res = na.omit(res)

DEG table

res %>% DT::datatable(width = "700px", extensions = "Buttons", 
                      options = list(dom="Bfrtip",
                                     scrollX=TRUE,
                                     buttons=c("csv","excel")))

Volcanoplot

res$DE = factor(res$DE, levels = c('UP','DN','no_sig'))
res %>% 
  ggplot(aes(log2FoldChange, -log10(padj), color=DE)) + 
  geom_point(size=1, alpha=0.5) + 
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_classic() +
  geom_vline(xintercept = c(-log2(fc),log2(fc)), color='grey') +
  geom_hline(yintercept = -log10(0.05),color='grey') +
  guides(colour = guide_legend(override.aes = list(size=5))) +
  ggtitle(paste0(levels(dds$cond)[2], " / ", levels(dds$cond)[1] )) +
  ggeasy::easy_center_title() ## to center title

Volcanoplot with Number of DEGs

t= paste0(levels(dds$cond)[2], " / ", levels(dds$cond)[1] )
up = nrow(res[res$DE == "UP", ])
dn = nrow(res[res$DE == "DN", ])
res %>% ggplot(aes(log2FoldChange, -log10(padj), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(fc), log2(fc)), size=0.1, color="grey") +
  geom_hline(yintercept = -log10(0.05), size=0.1, color="grey") +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() + ggtitle(t)

Number of DEGs

UP : Genes upregulated in Tumor compared to Normal Tissue
DN : Genes downregulated in Tumor compared to Normal Tissue

res %>% filter(DE != "no_sig") %>% 
  ggplot(aes(DE, fill=DE)) + geom_bar(color="black", size=0.2) +
  geom_text(stat = 'count', aes(label = ..count..), vjust = -0.1, size= 4, color=c("salmon","royalblue")) +
  scale_fill_manual(values = c("salmon", "royalblue"), guide=F) +
  theme_bw()

res %>% saveRDS(paste0(dir, "TCGA_BRCA_RNAseq_TN_DEGs.rds"))