library(ithi.utils)
load_base_libs()

library(ithi.meta)
library(ithi.xcr)

Colour palettes

pal_patient <- select_palette("patient")

Parameters

db_path <- snakemake@params$db

xcr_table_path <- snakemake@input$xcr_table

Metadata

db <- src_sqlite(db_path, create = FALSE)
samples <- collect(tbl(db, "samples"))
duplicates <- collect(tbl(db, "duplicates"))
xcr_table <- read_clonotypes(xcr_table_path, duplicates = FALSE, db_path, verbose = 1)

Read 36.1% of 304822 rows
Read 88.6% of 304822 rows
Read 304822 rows and 18 (of 18) columns from 0.070 GB file in 00:00:04
tcr_segment_type <- "TRB"
bcr_segment_type <- "IGH"

id_type <- "condensed_id"

Number of unique VOAs: 95, number of unique condensed_ids: 95.

Species abundance distributions

SADs are an important part of checking TCR/BCR-seq data.

TCR

tcr_clonotypes <- subset(xcr_table, type == tcr_segment_type)
ggplot(tcr_clonotypes, aes(x = log10(freq))) + theme_bw() + theme_Publication() + 
    xlab("log(count)") + ylab("Frequency") + facet_wrap(~condensed_id, nrow = 16, 
    scales = "free_y") + geom_histogram(bins = 50)

BCR

bcr_clonotypes <- subset(xcr_table, type == bcr_segment_type)
ggplot(bcr_clonotypes, aes(x = log10(freq))) + theme_bw() + theme_Publication() + 
    xlab("log(count)") + ylab("Frequency") + facet_wrap(~condensed_id, nrow = 16, 
    scales = "free_y") + geom_histogram(bins = 50)

