library(ithi.utils)
load_base_libs()

library(ithi.meta)
library(ithi.ihc)
library(ithi.xcr)
library(ithi.clones)

Colour palettes

pal_patient <- select_palette("patient")

Parameters

db_path <- snakemake@params$db
ihc_table_path <- snakemake@input$ihc_table
ihc_table_slide_path <- snakemake@input$ihc_table_slide

spatial_results_file <- snakemake@input$spatial_results_file

tcr_diversity_file <- snakemake@input$tcr_diversity
bcr_diversity_file <- snakemake@input$bcr_diversity

nanostring_data_path <- snakemake@input$nanostring_data
nanostring_annotations_path <- snakemake@input$nanostring_annotations

clonal_measures_file <- snakemake@input$ith_stats

Metadata

db <- src_sqlite(db_path, create = FALSE)
samples <- collect(tbl(db, "samples"))

id_cols <- c("voa", "patient_id", "condensed_id", "field", "tissue")
id_cols_sample <- c("voa", "patient_id", "condensed_id", "tissue")

Analysis

rename_field <- function(field) {
    voa_portion <- str_replace(field, "_HP_IM3.*$", "")
    voa_portion <- str_replace(voa_portion, "_", " ")
    field_portion <- str_extract(field, "_HP_IM3.*$")
    field_portion <- str_replace(field_portion, "__", "_[")
    field_portion <- paste0(field_portion, "]", sep = "")
    field <- paste(voa_portion, field_portion, sep = "")
    return(field)
}

fudge_cols <- function(ihc_slide_subset) {
    count_cols <- colnames(ihc_slide_subset)[str_detect(colnames(ihc_slide_subset), 
        "count")]
    ## JOIN WITH UNDERSCORES, THEN USE RESHAPE new_cols <-
    ## lapply(strsplit(str_replace_all(count_cols, '_count', ''), '_'),
    ## function(x) paste(rev(x), collapse='_')) colnames(ihc_slide_subset) <-
    ## mapvalues(colnames(ihc_slide_subset), count_cols, unlist(new_cols))
    df_melted <- melt(ihc_slide_subset, id.vars = c("voa", "field", "condensed_id"), 
        measure.vars = run1_cols, variable.name = "type", value.name = "count")
    df_melted$tissue <- mapvalues(str_extract(df_melted$type, "^(E|S)"), from = c("E", 
        "S"), to = c("epithelial", "stroma"))
    df_melted$type <- tolower(str_replace_all(df_melted$type, "(^(E|S)_|_count$)", 
        ""))
    df_cast <- dcast(df_melted, voa + field + condensed_id + tissue ~ type, 
        fun.aggregate = sum, value.var = "count")
    df_fudge <- df_cast %>% mutate(cd8_cd8 = cd8 * cd8, cd4_cd8 = cd4 * cd8, 
        cd4_cd4 = cd4 * cd4, cd20_cd4 = cd20 * cd4, cd20_cd8 = cd20 * cd8, cd20_cd20 = cd20 * 
            cd20)
    return(df_fudge)
}
spatial_data <- fread(spatial_results_file)

Read 0.0% of 1650000 rows
Read 12.1% of 1650000 rows
Read 22.4% of 1650000 rows
Read 34.5% of 1650000 rows
Read 46.7% of 1650000 rows
Read 58.2% of 1650000 rows
Read 69.7% of 1650000 rows
Read 80.6% of 1650000 rows
Read 91.5% of 1650000 rows
Read 1650000 rows and 28 (of 28) columns from 0.532 GB file in 00:00:15
if ("tissue.1" %in% colnames(spatial_data)) {
    spatial_data <- subset(spatial_data, select = -c(tissue.1))
}
spatial_data$field <- rename_field(spatial_data$field)

ihc_table_slide <- fread(ihc_table_slide_path)
ihc_table_slide$field <- str_replace(ihc_table_slide$slide, "\\.im3$", "")
ihc_table_slide <- subset(ihc_table_slide, select = -c(slide))

run1_tils <- c("CD8_count", "CD4_count", "CD20_count")
run1_cols <- c(paste0("E_", run1_tils), paste0("S_", run1_tils))
ihc_slide_subset <- subset(ihc_table_slide, select = c("voa", "field", "condensed_id", 
    run1_cols))
ihc_slide_fudge <- fudge_cols(ihc_slide_subset)

INTERACTION_VARS <- c("cd8_cd8", "cd4_cd8", "cd4_cd4", "cd20_cd4", "cd20_cd8", 
    "cd20_cd20")

ihc_slide_fudge_melted <- melt(ihc_slide_fudge, id.vars = c("voa", "field", 
    "condensed_id", "tissue"), measure.vars = INTERACTION_VARS, variable.name = "type", 
    value.name = "count")
spatial_data_melted <- melt(spatial_data, id.vars = c("voa", "field", "tissue", 
    "weight", "condensed_id", "patient_id"), measure.vars = INTERACTION_VARS, 
    variable.name = "type", value.name = "gamma")
spatial_data_merge <- merge(spatial_data_melted, ihc_slide_fudge_melted, by = c("voa", 
    "field", "tissue", "condensed_id", "type"))

THRESHOLD <- 10

spatial_data_merge <- subset(spatial_data_merge, count >= THRESHOLD)

spatial_summary_melted <- spatial_data_merge %>% group_by_(.dots = c(id_cols, 
    "type")) %>% summarise(gamma = weighted.mean(gamma, weight))

Remove super-outliers

spatial_summary_melted_filtered <- spatial_summary_melted %>% group_by(type, 
    tissue) %>% filter(!(abs(gamma - median(gamma, na.rm = TRUE)) > 10 * IQR(gamma, 
    na.rm = TRUE)))

Posterior distributions

ggplot(spatial_summary_melted_filtered, aes(x = gamma)) + geom_histogram(aes(fill = tissue), 
    bins = 30) + theme_bw() + theme_Publication() + facet_wrap(~type, scales = "free")

Correlations with TIL densities

Slide level

spatial_summary_filtered <- dcast(spatial_summary_melted_filtered, field + tissue ~ 
    type, value.var = "gamma")

spatial_ihc <- merge(ihc_table_slide, spatial_summary_filtered, by = c("field"))

IHC_VARS <- c("T_CD8_density", "T_CD4_density", "T_CD20_density", "T_Plasma_density", 
    "E_CD8_density", "E_CD4_density", "E_CD20_density", "E_Plasma_density", 
    "S_CD8_density", "S_CD4_density", "S_CD20_density", "S_Plasma_density", 
    "RUN1_E_area_pct", "RUN1_S_area_pct")
plot_correlation_heatmap <- function(dat, id_cols, VARS) {
    spatial_ihc_subset <- subset(dat, select = c(id_cols, VARS))
    spatial_ihc_subset_mat <- subset(spatial_ihc_subset, select = c(VARS))
    
    corres <- corr.test(spatial_ihc_subset_mat, adjust = "fdr", method = "spearman")
    corres.r <- corres$r
    corres.p <- corres$p
    hc <- hclust(dist(corres.r), method = "ward.D")
    
    diag(corres.r) <- NA
    diag(corres.p) <- NA
    
    corres.labels <- format(signif(corres.p, 2), 2)
    
    cols <- colorRampPalette(c("#95cbee", "#ffd73e", "#ce472e"))(100)
    pheatmap(corres.r[hc$order, hc$order], display_numbers = corres.labels, 
        color = cols, number_color = "black", cluster_rows = TRUE, cluster_cols = TRUE)
}
## Will fail due to filtering plot_correlation_heatmap(subset(spatial_ihc,
## tissue == 'epithelial'), id_cols, c(IHC_VARS, INTERACTION_VARS))
# plot_correlation_heatmap(subset(spatial_ihc, tissue == 'stroma'), id_cols,
# c(IHC_VARS, INTERACTION_VARS))

Sample level

ihc_table <- fread(ihc_table_path)

N_THRESHOLD <- 0

spatial_summary_melted_filtered_sample <- spatial_summary_melted_filtered %>% 
    group_by(voa, patient_id, condensed_id, tissue, type) %>% summarise(gamma = mean(gamma, 
    na.rm = TRUE), n = n())
spatial_summary_melted_filtered_sample <- subset(spatial_summary_melted_filtered_sample, 
    n >= N_THRESHOLD)

spatial_summary_filtered_sample <- dcast(spatial_summary_melted_filtered_sample, 
    voa + condensed_id + tissue ~ type, value.var = "gamma")

spatial_ihc_sample <- merge(ihc_table, spatial_summary_filtered_sample, by = c("voa", 
    "condensed_id"))
plot_correlation_heatmap(subset(spatial_ihc_sample, tissue == "epithelial"), 
    id_cols_sample, c(IHC_VARS, INTERACTION_VARS))

plot_correlation_heatmap(subset(spatial_ihc_sample, tissue == "stroma"), id_cols_sample, 
    c(IHC_VARS, INTERACTION_VARS))

Correlations with ITH

clonal_measures <- read_ith_stats(clonal_measures_file, db_path, duplicates = FALSE)

ITH_VARS <- c("entropy", "divergence", "postprocessed_divergence", "combined_ith_raw", 
    "combined_ith_normalized", "proportion_subclonal")
spatial_ith <- merge(clonal_measures, spatial_summary_filtered_sample, by = c("condensed_id"))
plot_correlation_heatmap(subset(spatial_ith, tissue == "epithelial"), id_cols_sample, 
    c(ITH_VARS, INTERACTION_VARS))

plot_correlation_heatmap(subset(spatial_ith, tissue == "stroma"), id_cols_sample, 
    c(ITH_VARS, INTERACTION_VARS))

spatial_ith_melted <- melt(spatial_ith, id.vars = colnames(spatial_ith)[!colnames(spatial_ith) %in% 
    ITH_VARS], measure.vars = ITH_VARS, variable.name = "ithtype", value.name = "ith")

spatial_ith_melted$patient_id <- factor(spatial_ith_melted$patient_id)

pvals <- setNames(ddply(spatial_ith_melted, .(tissue, ithtype), function(x) {
    df <- x
    corres <- with(df, cor.test(cd8_cd8, ith, method = "spearman"))
    
    pval <- corres$p.value
    eq <- substitute(italic(P) == p, list(p = format(pval, digits = 3)))
    return(as.character(as.expression(eq)))
}), c("tissue", "ithtype", "p.value"))

ggplot(spatial_ith_melted, aes(x = cd8_cd8, y = ith)) + geom_point(aes(colour = patient_id)) + 
    scale_colour_manual(values = pal_patient) + theme_bw() + theme_Publication() + 
    facet_wrap(tissue ~ ithtype, scales = "free") + geom_text(data = pvals, 
    aes(x = Inf, y = Inf, label = p.value), hjust = 1.1, vjust = 1.5, size = 3, 
    parse = TRUE)

---
title: "Spatial analysis"
output: 
  html_notebook:
    toc: true
    toc_depth: 5
    toc_float: true
params:
  rmd: "spatial_analysis.Rmd"
---
                        ```{r, echo=FALSE, message=FALSE, warning=FALSE}

######## Snakemake header ########
library(methods)
Snakemake <- setClass(
    "Snakemake",
    slots = c(
        input = "list",
        output = "list",
        params = "list",
        wildcards = "list",
        threads = "numeric",
        log = "list",
        resources = "list",
        config = "list",
        rule = "character"
    )
)
snakemake <- Snakemake(
    input = list('/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/TRB/postfilter_diversity_stats/diversity.strict.resampled.txt', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ith_statistics.tsv', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ihc_table_slide.tsv', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/spatial_table.tsv', '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ihc_table.tsv', '/shahlab/alzhang/projects/ITH_Immune/paper/analysis/Rmd/_site.yml', '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/pancancer_annotations.tsv', '/shahlab/alzhang/projects/ITH_Immune/results/nanostring_results/ith_full/qc/limma_quantile/normalized_expression_voa_labels_filtered.tsv', '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/IGH/postfilter_diversity_stats/diversity.strict.resampled.txt', 'Rmd/spatial_analysis.Rmd', "ith_stats" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ith_statistics.tsv', "ihc_table_slide" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ihc_table_slide.tsv', "spatial_results_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/spatial_table.tsv', "ihc_table" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2/ihc_table.tsv', "site_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/analysis/Rmd/_site.yml', "nanostring_annotations" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/pancancer_annotations.tsv', "tcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/TRB/postfilter_diversity_stats/diversity.strict.resampled.txt', "bcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/IGH/postfilter_diversity_stats/diversity.strict.resampled.txt', "nanostring_data" = '/shahlab/alzhang/projects/ITH_Immune/results/nanostring_results/ith_full/qc/limma_quantile/normalized_expression_voa_labels_filtered.tsv', "notebook" = 'Rmd/spatial_analysis.Rmd'),
    output = list('/shahlab/alzhang/projects/ITH_Immune/paper/results/web/spatial_analysis.nb.html'),
    params = list('/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', 'ithi-analysis-spatial-correlations', "db" = '/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', "name" = 'ithi-analysis-spatial-correlations'),
    wildcards = list(),
    threads = 1,
    log = list('/shahlab/alzhang/clusttmp/paperanalysis2/spatial_correlations.log'),
    resources = list(),
    config = list("classifier_type" = 'knn', "example_msa_plot" = '/shahlab/alzhang/pipeline_outputs/ith_immune/igpartition/run13/old/alignment_plots/msa/ith2_2/clust9/indel_reversed.html', "ith_statistics_notebook" = 'Rmd/ith_statistics.Rmd', "tcga_ov_annotations" = '/shahlab/alzhang/data/TCGA/tcga_ov_annotation_sup13.txt', "tcga_expr_matrix" = '/shahlab/alzhang/data/TCGA/expr_matrix_normalize_standardize_noduplicates.tsv', "benchmarkdir" = '/shahlab/alzhang/benchmarks/paperanalysis2', "tils_for_cluster" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "example_annotations" = '/shahlab/alzhang/pipeline_outputs/ith_immune/igpartition/run13/final_partitions/ith2_2/clust9/annotations_flagged.tsv', "library_sizes" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/library_sizes.tsv', "tcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/TRB/postfilter_diversity_stats/diversity.strict.resampled.txt', "mmctm_sample_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-sample/output', "variability_type" = 'stabilize', "site_file" = '/shahlab/alzhang/projects/ITH_Immune/paper/analysis/Rmd/_site.yml', "xcr_qc_notebook" = 'Rmd/replicates.Rmd', "driver_map" = '/shahlab/alzhang/projects/ITH_Immune/subprojects/drivers/data/gene_list_mapped.bed', "neoediting_outdir" = '/shahlab/alzhang/pipeline_outputs/ith_immune/neoediting/run4', "mvclust_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "intermediate_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/intermediates/run2', "xcr_mapping_notebook" = 'Rmd/xcr_mapping.Rmd', "icgc_expr_melted" = '/shahlab/alzhang/data/ICGC/OVAU_expr_melted.tsv', "bcr_diversity" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/postprocess/IGH/postfilter_diversity_stats/diversity.strict.resampled.txt', "mmctm_ancestral_descendant_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-ancestry-sample/output', "known_subtypes_merged" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/known_subtypes_merged.tsv', "mmctm_final_patient_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient_with-ov', "immune_variability_notebook" = 'Rmd/immune_variability.Rmd', "xcr_distance_method" = 'horn', "driver_analysis_notebook" = 'Rmd/driver_analysis.Rmd', "mmctm_sample_ad_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-ancestry-sample/plots/ith-by-ancestral-sample_snv-sv_sigs_multipanel.pdf', "mmctm_ov_combined_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/combined_ov_mmctm/output', "default_sampler" = 'HMC', "tcr_clonotypes" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/clonotypes/TRB_clonotypes_filtered.txt', "ith_til_notebook" = 'Rmd/ith_til_densities.Rmd', "mutation_signature_notebook" = 'Rmd/mutation_signatures.Rmd', "mmctm_ov_combined_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/combined_ov_mmctm/plots/ov_snv-sv_sigs_multipanel.pdf', "molsubtype_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "icgc_specimen_file" = '/shahlab/alzhang/data/ICGC/specimen.tsv', "spatial_result_dirs" = list("epithelial" = '/shahlab/alzhang/pipeline_outputs/ith_immune/spatsim/ith3/abc', "stromal" = '/shahlab/alzhang/pipeline_outputs/ith_immune/spatsim/ith5/abc'), "PNG_QUALITY" = 300, "xcrmapscape_notebook" = 'Rmd/xcrmapscape.Rmd', "mmctm_patient_ad_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient-ancestry/plots/ith-by-patient-ancestry_snv-sv_sigs_multipanel.pdf', "multiviewclustering_notebook" = 'Rmd/multiviewclustering.Rmd', "mmctm_sample_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-sample/plots/ith-by-sample_snv-sv_sigs_multipanel.pdf', "mapscape_notebook" = 'Rmd/mapscape.Rmd', "mmctm_final_patient_sigplot" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient_with-ov/plots/ith-by-patient_snv-sv_sigs_multipanel.pdf', "icgc_clinical" = '/shahlab/alzhang/data/ICGC/donor.OV-AU.tsv', "mvclust_nclust" = 3, "ihc_xcr_stats_notebook" = 'Rmd/ihc_xcr_stats.Rmd', "sad_notebook" = 'Rmd/species_abundance_distributions.Rmd', "patients_for_clonal" = c(1, 2, 3, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17), "subtype_marker_file" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/subtype_markers.tsv', "table_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/tables/run2', "db" = '/shahlab/alzhang/projects/ITH_Immune/metadata/db/immune_project.sqlite3', "clone_tree_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/tree_data.tsv', "ihc_run2" = '/shahlab/alzhang/projects/ITH_Immune/data/ihc/cd79cd138cd68/validated_stats_weighted.rdata', "nclusts" = 3, "spatial_notebook" = 'Rmd/spatial_analysis.Rmd', "bcrphylo_correlations_notebook" = 'Rmd/bcr_phylo_correlations.Rmd', "clone_branch_length_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/branch_data.tsv', "mmctm_patient_ancestral_descendant_result_dir" = '/shahlab/alzhang/projects/ITH_Immune/results/mmctm_results/ith_by-patient-ancestry/output', "neoantigen_editing_notebook" = 'Rmd/immunoediting.Rmd', "icgc_molecular_subtypes" = '/shahlab/alzhang/data/ICGC/icgc_primary_tumour_subtypes.tsv', "master_breakpoint_file" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_breakpoints.tsv', "prevalence_threshold" = 0.01, "igpartition_outdir" = '/shahlab/alzhang/pipeline_outputs/ith_immune/igpartition/run22', "tcga_clinical" = '/shahlab/alzhang/data/TCGA/synapse_clinAll_data.tsv', "nanostring_data" = '/shahlab/alzhang/projects/ITH_Immune/results/nanostring_results/ith_full/qc/limma_quantile/normalized_expression_voa_labels_filtered.tsv', "mutsig_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density', 'T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density'), "ihc_run1" = '/shahlab/alzhang/projects/ITH_Immune/data/ihc/cd8cd3cd20/validated_stats_weighted_new.rdata', "nanostring_signature_notebook" = 'Rmd/nanostring_signatures.Rmd', "notebook_dir" = '/shahlab/alzhang/projects/ITH_Immune/paper/results/web', "immtyper_lengths" = '11 12 13 14 15 16 17 18', "clonal_samplers" = c('HMC', 'NUTS'), "ihc_xcr_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density', 'T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density'), "known_subtype_file" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/array/subtypes/known_subtypes.tsv', "bcrphylo_tiltypes" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density', 'T_CD8_density', 'T_CD4_density', 'T_CD20_density', 'T_Plasma_density'), "nanostring_annotations" = '/shahlab/alzhang/projects/ITH_Immune/data/expression/nanostring/pancancer_annotations.tsv', "ith_stats_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/clonal_measures.tsv', "proportion_subclonal_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/old_proportion_subclonal.tsv', "PNG_DENSITY" = 300, "til_classifier_notebook" = 'Rmd/til_classifier.Rmd', "index_notebook" = 'Rmd/index.Rmd', "ith_stat_types" = c('entropy', 'postprocessed_divergence', 'combined_ith_normalized', 'proportion_subclonal'), "j_dictionary" = '/shahlab/alzhang/projects/ITH_Immune/subprojects/immtyper/metadata/imgt/Homo_sapiens_TRBJ.fasta', "tils_for_variability" = c('E_CD8_density', 'E_CD4_density', 'E_CD20_density', 'E_Plasma_density', 'S_CD8_density', 'S_CD4_density', 'S_CD20_density', 'S_Plasma_density'), "molsubtype_notebook" = 'Rmd/molecular_subtypes.Rmd', "bcrphylo_examples_notebook" = 'Rmd/bcr_phylo_examples.Rmd', "bcr_clonotypes" = '/shahlab/alzhang/pipeline_outputs/ith_immune/mixcr/mixcr_runs/ith_1_2_3/mixcr5/clonotypes/IGH_clonotypes_filtered.txt', "master_variant_file" = '/shahlab/amcpherson/projects/ith3/ith3/notebooks/bespoke/ith_snvs.tsv', "v_dictionary" = '/shahlab/alzhang/projects/ITH_Immune/subprojects/immtyper/metadata/imgt/Homo_sapiens_TRBV.fasta', "phenotype_threshold" = 0.85, "clone_prevalence_file" = '/shahlab/alzhang/projects/ITH_Immune/data/ith/complete/clone_data.tsv', "ith_project_results" = '/shahlab/alzhang/projects/ith3/data/results', "xcr_clones_notebook" = 'Rmd/xcr_clones_analysis.Rmd', "figure_gallery_notebook" = 'Rmd/figures.Rmd', "logdir" = '/shahlab/alzhang/clusttmp/paperanalysis2', "wang_fbi_status" = '/shahlab/alzhang/data/ICGC/ng.3849-S12.txt', "immtyper_models" = '/shahlab/alzhang/projects/ITH_Immune/results/immtyper_results/klarenbeek/aa_vj/gradboost', "icgc_normalized_reads_matrix" = '/shahlab/alzhang/data/ICGC/OVAU_expr_matrix.tsv'),
    rule = 'spatial_correlations'
)
######## Original script #########

                        ```



```{r global_chunk_options, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, tidy=TRUE, warning=FALSE, message=FALSE)
```


```{r}
library(ithi.utils)
load_base_libs()

library(ithi.meta)
library(ithi.ihc)
library(ithi.xcr)
library(ithi.clones)
```

## Colour palettes

```{r}
pal_patient <- select_palette("patient")
```

## Parameters

```{r}
db_path <- snakemake@params$db
ihc_table_path <- snakemake@input$ihc_table
ihc_table_slide_path <- snakemake@input$ihc_table_slide

spatial_results_file <- snakemake@input$spatial_results_file

tcr_diversity_file <- snakemake@input$tcr_diversity
bcr_diversity_file <- snakemake@input$bcr_diversity

nanostring_data_path <- snakemake@input$nanostring_data
nanostring_annotations_path <- snakemake@input$nanostring_annotations

clonal_measures_file <- snakemake@input$ith_stats
```

## Metadata

```{r}
db <- src_sqlite(db_path, create=FALSE)
samples <- collect(tbl(db, "samples"))

id_cols <- c("voa", "patient_id", "condensed_id", "field", "tissue")
id_cols_sample <- c("voa", "patient_id", "condensed_id", "tissue")
```

## Analysis

```{r}
rename_field <- function(field) {
  voa_portion <- str_replace(field, "_HP_IM3.*$", "")
  voa_portion <- str_replace(voa_portion, "_", " ")
  field_portion <- str_extract(field, "_HP_IM3.*$")
  field_portion <- str_replace(field_portion, "__", "_[")
  field_portion <- paste0(field_portion, "]", sep="")
  field <- paste(voa_portion, field_portion, sep="")
  return(field)
}

fudge_cols <- function(ihc_slide_subset) {
  count_cols <- colnames(ihc_slide_subset)[str_detect(colnames(ihc_slide_subset), "count")]
  ## JOIN WITH UNDERSCORES, THEN USE RESHAPE
  #new_cols <- lapply(strsplit(str_replace_all(count_cols, "_count", ""), "_"), function(x) paste(rev(x), collapse="_"))
  #colnames(ihc_slide_subset) <- mapvalues(colnames(ihc_slide_subset), count_cols, unlist(new_cols))
  df_melted <- melt(ihc_slide_subset,
                    id.vars = c("voa", "field", "condensed_id"),
                    measure.vars = run1_cols, variable.name = "type", value.name = "count")
  df_melted$tissue <- mapvalues(str_extract(df_melted$type, "^(E|S)"),
                                from = c("E", "S"),
                                to = c("epithelial", "stroma"))
  df_melted$type <- tolower(str_replace_all(df_melted$type, "(^(E|S)_|_count$)", ""))
  df_cast <- dcast(df_melted, voa+field+condensed_id+tissue ~ type, fun.aggregate = sum, value.var = "count")
  df_fudge <- df_cast %>% mutate(
    cd8_cd8=cd8*cd8,
    cd4_cd8=cd4*cd8,
    cd4_cd4=cd4*cd4,
    cd20_cd4=cd20*cd4,
    cd20_cd8=cd20*cd8,
    cd20_cd20=cd20*cd20
  )
  return(df_fudge)
}
```

```{r}
spatial_data <- fread(spatial_results_file)
if ("tissue.1" %in% colnames(spatial_data)) {
  spatial_data <- subset(spatial_data, select=-c(tissue.1))
}
spatial_data$field <- rename_field(spatial_data$field)

ihc_table_slide <- fread(ihc_table_slide_path)
ihc_table_slide$field <- str_replace(ihc_table_slide$slide, "\\.im3$", "")
ihc_table_slide <- subset(ihc_table_slide, select=-c(slide))

run1_tils <- c("CD8_count", "CD4_count", "CD20_count")
run1_cols <- c(paste0("E_", run1_tils), paste0("S_", run1_tils))
ihc_slide_subset <- subset(ihc_table_slide, select=c("voa", "field", "condensed_id", run1_cols))
```

```{r}
ihc_slide_fudge <- fudge_cols(ihc_slide_subset)

INTERACTION_VARS <- c("cd8_cd8", "cd4_cd8", "cd4_cd4", "cd20_cd4", "cd20_cd8", "cd20_cd20")

ihc_slide_fudge_melted <- melt(ihc_slide_fudge, id.vars = c("voa", "field", "condensed_id", "tissue"), 
     measure.vars = INTERACTION_VARS,
     variable.name = "type", value.name = "count")
spatial_data_melted <- melt(spatial_data, id.vars = c("voa", "field", "tissue", "weight", "condensed_id", "patient_id"), measure.vars = INTERACTION_VARS, variable.name = "type",
                            value.name = "gamma")
```

```{r}
spatial_data_merge <- merge(spatial_data_melted, ihc_slide_fudge_melted, by=c("voa", "field", "tissue", "condensed_id", "type"))

THRESHOLD <- 10

spatial_data_merge <- subset(spatial_data_merge, count >= THRESHOLD)

spatial_summary_melted <- spatial_data_merge %>% group_by_(.dots = c(id_cols, "type")) %>% summarise(gamma=weighted.mean(gamma, weight))
```

### Remove super-outliers

```{r}
spatial_summary_melted_filtered <- spatial_summary_melted %>% group_by(type, tissue) %>% filter(!(abs(gamma-median(gamma,na.rm=TRUE)) > 10*IQR(gamma,na.rm=TRUE)))
```

### Posterior distributions

```{r, fig.width=12, fig.height=12}
ggplot(spatial_summary_melted_filtered, aes(x=gamma)) + geom_histogram(aes(fill=tissue), bins=30) + theme_bw() + theme_Publication() + facet_wrap(~ type, scales="free")
```

### Correlations with TIL densities

#### Slide level

```{r}
spatial_summary_filtered <- dcast(spatial_summary_melted_filtered, field+tissue ~ type,value.var = "gamma")

spatial_ihc <- merge(ihc_table_slide, spatial_summary_filtered, by=c("field"))

IHC_VARS <- c("T_CD8_density", "T_CD4_density", "T_CD20_density", "T_Plasma_density",
              "E_CD8_density", "E_CD4_density", "E_CD20_density", "E_Plasma_density",
              "S_CD8_density", "S_CD4_density", "S_CD20_density", "S_Plasma_density",
              "RUN1_E_area_pct", "RUN1_S_area_pct")
```

```{r}
plot_correlation_heatmap <- function(dat, id_cols, VARS) {
  spatial_ihc_subset <- subset(dat, select=c(id_cols, VARS))
  spatial_ihc_subset_mat <- subset(spatial_ihc_subset, select=c(VARS))
  
  corres <- corr.test(spatial_ihc_subset_mat, adjust="fdr", method="spearman")
  corres.r <- corres$r
  corres.p <- corres$p
  hc <- hclust(dist(corres.r), method="ward.D")
  
  diag(corres.r) <- NA
  diag(corres.p) <- NA
  
  corres.labels <- format(signif(corres.p, 2), 2)
  
  cols <- colorRampPalette(c("#95cbee", "#ffd73e","#ce472e"))(100)
  pheatmap(corres.r[hc$order,hc$order], display_numbers = corres.labels, 
           color = cols, number_color = "black", 
           cluster_rows = TRUE, cluster_cols=TRUE)
}
```

```{r fig.width=10, fig.height=10}
## Will fail due to filtering
#plot_correlation_heatmap(subset(spatial_ihc, tissue == "epithelial"), id_cols, c(IHC_VARS, INTERACTION_VARS))
```

```{r fig.width=10, fig.height=10}
#plot_correlation_heatmap(subset(spatial_ihc, tissue == "stroma"), id_cols, c(IHC_VARS, INTERACTION_VARS))
```

#### Sample level

```{r}
ihc_table <- fread(ihc_table_path)

N_THRESHOLD <- 0

spatial_summary_melted_filtered_sample <- spatial_summary_melted_filtered %>% group_by(voa, patient_id, condensed_id, tissue, type) %>% summarise(gamma=mean(gamma, na.rm=TRUE), n=n())
spatial_summary_melted_filtered_sample <- subset(spatial_summary_melted_filtered_sample, n >= N_THRESHOLD)

spatial_summary_filtered_sample <- dcast(spatial_summary_melted_filtered_sample, voa+condensed_id+tissue ~ type,value.var = "gamma")

spatial_ihc_sample <- merge(ihc_table, spatial_summary_filtered_sample, by=c("voa", "condensed_id"))
```


```{r fig.width=10, fig.height=10}
plot_correlation_heatmap(subset(spatial_ihc_sample, tissue == "epithelial"), id_cols_sample, c(IHC_VARS, INTERACTION_VARS))
```

```{r fig.width=10, fig.height=10}
plot_correlation_heatmap(subset(spatial_ihc_sample, tissue == "stroma"), id_cols_sample, c(IHC_VARS, INTERACTION_VARS))
```


### Correlations with ITH

```{r}
clonal_measures <- read_ith_stats(clonal_measures_file, db_path, duplicates=FALSE)

ITH_VARS <- c("entropy", "divergence", "postprocessed_divergence", "combined_ith_raw", "combined_ith_normalized", "proportion_subclonal")
```

```{r}
spatial_ith <- merge(clonal_measures, spatial_summary_filtered_sample, by=c("condensed_id"))
```

```{r}
plot_correlation_heatmap(subset(spatial_ith, tissue == "epithelial"), id_cols_sample, c(ITH_VARS, INTERACTION_VARS))
```

```{r}
plot_correlation_heatmap(subset(spatial_ith, tissue == "stroma"), id_cols_sample, c(ITH_VARS, INTERACTION_VARS))
```

```{r}
spatial_ith_melted <- melt(spatial_ith, id.vars = colnames(spatial_ith)[!colnames(spatial_ith) %in% ITH_VARS], measure.vars = ITH_VARS, variable.name = "ithtype", value.name = "ith")

spatial_ith_melted$patient_id <- factor(spatial_ith_melted$patient_id)

pvals <- setNames(ddply(spatial_ith_melted, .(tissue,ithtype), function(x) {
  df <- x
  corres <- with(df, cor.test(cd8_cd8, ith, method="spearman"))
  
  pval <- corres$p.value
  eq <- substitute(italic(P)==p, list(p=format(pval, digits=3)))
  return(as.character(as.expression(eq)))
}), c("tissue", "ithtype", "p.value"))

ggplot(spatial_ith_melted, aes(x=cd8_cd8, y=ith)) + geom_point(aes(colour=patient_id)) + scale_colour_manual(values = pal_patient) + theme_bw() + theme_Publication() + facet_wrap(tissue ~ ithtype, scales="free") + geom_text(data=pvals, aes(x=Inf, y=Inf, label=p.value), hjust=1.1, vjust=1.5,size=3,parse=TRUE) 
```


