library(ithi.utils)
load_base_libs()

library(ithi.meta)
library(ithi.clones)
library(ithi.seq)

Colour palettes

pal_patient <- select_palette("patient")

Parameters

db_path <- snakemake@params$db

ith_stats_file <- snakemake@input$ith_stats
# proportion_subclonality_file <- snakemake@input$subclonality
tumour_purity_file <- snakemake@input$tumour_purity

old_result_file <- "/shahlab/alzhang/projects/ITH_Immune/data/ith/old_ith_comparison.tsv"

Metadata

db <- src_sqlite(db_path, create = FALSE)
samples <- collect(tbl(db, "samples"))

Description

New clonal predictions are still under a lot of work so we have to validate their quality.

Old clonal predictions

Here’s what things looked like under the old set of predictions:

old_comparison <- fread(old_result_file)
old_comparison$patient_id <- factor(old_comparison$patient_id)

ith_vars <- c("divergence", "entropy", "composite_ith")
other_vars <- colnames(old_comparison)[!colnames(old_comparison) %in% ith_vars]
old_comparison_melted <- melt(old_comparison, id.vars = other_vars, measure.vars = ith_vars, 
    variable.name = "ithtype", value.name = "ith")
pvals <- setNames(ddply(old_comparison_melted, .(ithtype), function(x) {
    df <- x
    corres <- with(df, cor.test(proportion_subclonal, ith, method = "spearman"))
    
    pval <- corres$p.value
    eq <- substitute(italic(P) == p, list(p = format(pval, digits = 3)))
    return(as.character(as.expression(eq)))
}), c("ithtype", "p.value"))

ggplot(old_comparison_melted, aes(x = proportion_subclonal, y = ith)) + geom_point(aes(colour = patient_id)) + 
    theme_bw() + theme_Publication() + facet_wrap(~ithtype, scales = "free") + 
    scale_color_manual(values = pal_patient) + geom_text(data = pvals, aes(x = Inf, 
    y = Inf, label = p.value), hjust = 1.1, vjust = 1.5, size = 3, parse = TRUE)

This is what we expect to see: a reasonably good correlation between naive measures of ITH and more “rigorous” measures.

New clonal predictions

ith_stats <- read_ith_stats(ith_stats_file, db_path, duplicates = FALSE)
# subclonality <- fread(proportion_subclonality_file)

# new_comparison <- merge(ith_stats, subclonality, by=c('condensed_id',
# 'patient_id'))
new_comparison <- ith_stats

ith_vars <- c("divergence", "entropy", "postprocessed_divergence", "combined_ith_raw", 
    "combined_ith_normalized")
other_vars <- colnames(new_comparison)[!colnames(new_comparison) %in% ith_vars]
new_comparison_melted <- melt(new_comparison, id.vars = other_vars, measure.vars = ith_vars, 
    variable.name = "ithtype", value.name = "ith")
pvals <- setNames(ddply(new_comparison_melted, .(ithtype), function(x) {
    df <- x
    corres <- with(df, cor.test(proportion_subclonal, ith, method = "spearman"))
    
    pval <- corres$p.value
    eq <- substitute(italic(P) == p, list(p = format(pval, digits = 3)))
    return(as.character(as.expression(eq)))
}), c("ithtype", "p.value"))

ggplot(new_comparison_melted, aes(x = proportion_subclonal, y = ith)) + geom_point(aes(colour = patient_id)) + 
    theme_bw() + theme_Publication() + facet_wrap(~ithtype, scales = "free") + 
    scale_color_manual(values = pal_patient) + geom_text(data = pvals, aes(x = Inf, 
    y = Inf, label = p.value), hjust = 1.1, vjust = 1.5, size = 3, parse = TRUE)

As of now, these are pretty terrible, suggesting that the clonal predictions probably need to be worked out. Or perhaps we can just modify our measures.

Tumour purity vs. ITH

A potential issue for inferring clonal diversity values is tumour purity. Low tumour purities reduce the effective read count contributed by non-malignant cells and hence may reduce the power of clonal inference.

tumour_purity <- read_tumour_purity(tumour_purity_file, db_path)

Old estimates

ith_vars <- c("divergence", "entropy", "composite_ith", "proportion_subclonal")
other_vars <- colnames(old_comparison)[!colnames(old_comparison) %in% ith_vars]
old_comparison_melted2 <- melt(old_comparison, id.vars = other_vars, measure.vars = ith_vars, 
    variable.name = "ithtype", value.name = "ith")

old_comparison_melted2 <- plyr::join(old_comparison_melted2, tumour_purity)
pvals <- setNames(ddply(old_comparison_melted2, .(ithtype), function(x) {
    df <- x
    corres <- with(df, cor.test(tumour_content, ith, method = "spearman"))
    
    pval <- corres$p.value
    eq <- substitute(italic(P) == p, list(p = format(pval, digits = 3)))
    return(as.character(as.expression(eq)))
}), c("ithtype", "p.value"))

ggplot(old_comparison_melted2, aes(x = tumour_content, y = ith)) + geom_point(aes(colour = patient_id)) + 
    theme_bw() + theme_Publication() + facet_wrap(~ithtype, scales = "free") + 
    scale_color_manual(values = pal_patient) + geom_text(data = pvals, aes(x = Inf, 
    y = Inf, label = p.value), hjust = 1.1, vjust = 1.5, size = 3, parse = TRUE)

Oddly enough, tumour purity has a slight negative correlation with ITH. So, more pure samples have less ITH. That’s the opposite of what we’d expect naively.

New estimates

ith_vars <- c("divergence", "entropy", "postprocessed_divergence", "proportion_subclonal", 
    "combined_ith_raw", "combined_ith_normalized")
other_vars <- colnames(new_comparison)[!colnames(new_comparison) %in% ith_vars]
new_comparison_melted2 <- melt(new_comparison, id.vars = other_vars, measure.vars = ith_vars, 
    variable.name = "ithtype", value.name = "ith")

new_comparison_melted2 <- plyr::join(new_comparison_melted2, tumour_purity)
pvals <- setNames(ddply(new_comparison_melted2, .(ithtype), function(x) {
    df <- x
    corres <- with(df, cor.test(tumour_content, ith, method = "spearman"))
    
    pval <- corres$p.value
    eq <- substitute(italic(P) == p, list(p = format(pval, digits = 3)))
    return(as.character(as.expression(eq)))
}), c("ithtype", "p.value"))

ggplot(new_comparison_melted2, aes(x = tumour_content, y = ith)) + geom_point(aes(colour = patient_id)) + 
    theme_bw() + theme_Publication() + facet_wrap(~ithtype, scales = "free") + 
    scale_color_manual(values = pal_patient) + geom_text(data = pvals, aes(x = Inf, 
    y = Inf, label = p.value), hjust = 1.1, vjust = 1.5, size = 3, parse = TRUE)

