# Sample : Control
# CellRanger count for Transcriptome
cellranger count --id=<SampleID_GEX> \
--transcriptome=<Path_to_Mouse_GEX_Reference> \
--fastqs=<Path_to_GEX_Raw_Data> \
--sample=<Sample_Name_GEX> \
--expect-cells=10000
# CellRanger vdj for VDJ information
cellranger vdj --id=<SampleID_VDJ> \
--reference=<Path_to_Mouse_VDJ_Reference> \
--fastqs=<Path_to_VDJ_Raw_Data> \
--sample=<Sample_Name_VDJ>
# Sample : Treated
# CellRanger count for Transcriptome
cellranger count --id=<SampleID_GEX_2> \
--transcriptome=<Path_to_Mouse_GEX_Reference> \
--fastqs=<Path_to_GEX_Raw_Data_2> \
--sample=<Sample_Name_GEX_2> \
--expect-cells=10000
# CellRanger vdj for VDJ information
cellranger vdj --id=<SampleID_VDJ_2> \
--reference=<Path_to_Mouse_VDJ_Reference> \
--fastqs=<Path_to_VDJ_Raw_Data_2> \
--sample=<Sample_Name_VDJ_2>
# CellRanger aggr for combining all samples
cellranger aggr --id=<Aggregation_ID> \
--csv=<Path_to_Aggregation_CSV>
Cell Ranger 5’ VDJ Algorithm Overview link
# Import cellranger aggr output
vdj <- read.csv('path_to_filtered_contig_annotations.csv')
clono <- read.csv('path_to_clonotypes.csv')
# Removal of duplicates and merging by clonotype_id
vdj <- vdj %>% distinct(barcode, .keep_all = TRUE)
rownames(vdj) <- vdj$barcode
vdj.clono <- inner_join(vdj, clono, by=c('raw_clonotype_id'='clonotype_id'))
# Counting TRA and TRB from cdr3s_aa
vdj.clono$TRBcount <- stringr::str_count(vdj.clono$cdr3s_aa, "TRB")
vdj.clono$TRAcount <- stringr::str_count(vdj.clono$cdr3s_aa, "TRA")
vdj.clono <- vdj.clono %>% arrange(desc(frequency))
rownames(vdj.clono) <- vdj.clono$barcode
# filtering 1 alpha chain and 1 beta chain clonotype
vdj.clono <- vdj.clono %>% filter(TRBcount == 1 & TRAcount == 1)
# save vdj information
vdj.clono %>% write.csv('path_to_save_vdj_clono.csv')
Immune repertoire analysis
Analysis using R packages (scRepertoire, immunarch)
1. Diversity Index
2. Repertoire analysis
3. Expansion study
Merging vdj information with scRNA-seq (if available)
Suggested analyses below
Target cell selection
Correlation of clonotype frequency with related data
Comparative Analysis
Statistical Modeling
# data_melted : Clonotype frequency information
# Draw plot
ggplot(data_melted, aes(x = reorder(Clonotype, -value), y = value,
fill = variable)) +
geom_col(color = "grey3", size=0.2) +
scale_fill_manual(values = c("salmon", "darkorange"),
labels = c("Sample A", "Sample B")) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=6),
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5, size=20)) +
labs(x = "Top 100 Clonotype", y = "Total number of clonotype",
title = "Clonotype distribution by sample")
# Bubble Plot
# input data format
# column1 : name of the factor
# column2 : frequency of the factor
bubble_plot = function(input.data, title="Abundant Clonotypes"){
colnames(input.data)[2] = "Freq"
packing <- circleProgressiveLayout(input.data$Freq, sizetype='area')
input.data <- cbind(input.data, packing)
colnames(input.data)[1] ='clonotype'
dat.gg <- circleLayoutVertices(packing, npoints=50)
ggplot() +
# Make the bubbles
geom_polygon(data = dat.gg, aes(x, y, group = id,
fill=as.factor(id)),
colour = "black", alpha = 0.6) +
theme_void() +
theme(legend.position="none",
plot.title = element_text(hjust = 0.5, size=30)) +
coord_equal() +
# Add text in the center of each bubble + control its size
geom_text(data = input.data, aes(x, y, size=Freq, label = clonotype)) +
scale_size_continuous(range = c(0.5,4)) +
ggtitle(title) +
scale_fill_viridis_d(option = "D", begin = 0.0, end = 1.0, direction = 1)
}
bubble_plot(input.data = data[,c(1,2)], title = "Abundant Clonotypes in A")
bubble_plot(input.data = data[,c(1,3)], title = "Abundant Clonotypes in B")
# Draw heatmap
expression_matrix[rownames(cell_metadata),] %>%
pheatmap::pheatmap(show_rownames = F,
annotation_row = cell_metadata,
cluster_rows = T)
More example plots will be updated.