This package provides a streamlined resource for uniformly processed publicly available TCR-seq data from SRA with attention to known library preparation method and normalization by read number. The goal of reTCR is to provide users with a wealth of easy to access, end-to-end, uniformly processed data for exploratory analysis.
The raw sequencing data were processed with MiXCR using a specifc library preset for each study and further downsampled by read number allowing for comparison between samples. Normalization is crucial for separating biological signal from technical noise. Next, output from MiXCR is reformatted and input into pyTCR for downstream analysis. Users can browse attributes such as study metadata, clonotype count per sample, and several diveristy metrics.
[!WARNING] The package work is in progress.
1. Basic metrics
# summary data
print(proj@basic@summary_data)
# reads count
print(proj@basic@reads_count)
# clonotype count
print(proj@basic@clonotype_count)
# mean frequency
print(proj@basic@mean_freq)
# geometric mean of clonotype frequency
print(proj@basic@geomean_freq)
# mean length of CDR3 nucleotide sequence
print(proj@basic@mean_cdr3nt_len)
# convergence
print(proj@basic@convergence)
# spectratype
print(proj@basic@spectratype)
1.2 Statistical analysis
D’Agostino normality test
Shapiro-Wilk normality test
library(stats)
# using clonotype count
shapiro.test(proj@basic@summary_data$clonotype_count)
2. Diversity metrics
# shannon index values
print(proj@diversity@shannon)
# simpson index values
print(proj@diversity@simpson)
# D50 index
print(proj@diversity@d50)
# Chao1 estimate and standard deviation
print(proj@diversity@chao1)
# Gini coefficient
print(proj@diversity@gini_coeff)
2.2 Diversity visualization
reTCR::plot_diversity_index(proj@diversity@shannon, "shannon_index", "cmv_status")
reTCR::plot_diversity_index(proj@diversity@shannon, "shannon_wiener_index", "cmv_status")
reTCR::plot_diversity_index(proj@diversity@simpson, "simpson_index", "cmv_status")
reTCR::plot_diversity_index(proj@diversity@simpson, "inverse_simpson_index", "cmv_status")
reTCR::plot_diversity_index(proj@diversity@d50, "d50_index", "cmv_status")
reTCR::plot_diversity_index(proj@diversity@chao1, "chao1", "cmv_status")
reTCR::plot_diversity_index(proj@diversity@gini_coeff, "gini_coeff", "cmv_status")
3. Clonality metrics
# most frequent clonotypes
print(proj@clonality@most_clonotype)
# least frequent clonotypes
print(proj@clonality@least_clonotype)
# 1-Pielou index
print(proj@clonality@pielou)
# clonal proportion
print(proj@clonality@clonal_prop)
# relative abundance (in all clonotypes)
print(proj@clonality@abundance)
# relative abundance (in top 100 clonotypes)
print(proj@clonality@abundance_top)
# relative abundance (in rare 100 clonotypes)
print(proj@clonality@abundance_rare)
3.2 Clonality visualization
# let's store clonal proportion in `clonal_data`
clonal_data <- proj@clonality@clonal_prop
# plot clonal proportion per sample
reTCR::plot_clonal_prop_per_sample(clonal_data, "cmv_status")
# plot clonal proportion per group
reTCR::plot_clonal_prop_per_group(clonal_data, "cmv_status")
# plot relative abundance in all clonotypes
reTCR::plot_clonotype_abundance(proj@clonality@abundance)
# plot reads group abbundance in top 100 clonotypes
reTCR::plot_reads_group_abundance(proj@clonality@abundance_top)
# plot reads group abbundance in rare 100 clonotypes
reTCR::plot_reads_group_abundance(proj@clonality@abundance_rare)
4. Motif metrics
# Amino acid spectratype
print(proj@motif@aa_spectra)
# Amino acid max spectratype
print(proj@motif@aa_max_spectra)
# Amino acid motif count table
print(proj@motif@aa_motif_count)
# Most abundant amino acid motif per sample
print(proj@motif@aa_most_motif)
4.2 Motif visualization
# plot amino acid motif counts
reTCR::plot_motifs(proj@motif@aa_motif_count, "cmv_status", 20)
5. Segment usage metrics
# Top n highest clonotypes
reTCR::get_top_n_clonotypes(proj@data, 15, "cmv_status")
# Bottom n lowest clonotypes
reTCR::get_bottom_n_clonotypes(proj@data, 20, "cmv_status")
6. Hill numbers
# get hill numbers
print(proj@hill)