Code
library(Seurat)
library(glmmTMB)
library(qs2)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)Visualises differential expression results from the glmmTMB model fitted in scripts/glmmTMB_no_chr21.qmd (chromosome 21 genes excluded prior to model fitting). Produces an interaction logFC scatter plot, per-gene violin and pseudobulk boxplots, eCDF plots, and ISG/top-gene summaries.
Inputs: output/run1/objects/no_chr21/glmmTMB_with_interactions_corrected.qs2, output/run1/objects/prelabelled_integrated_rpca.qs2 Outputs: PDF figures written to output/run1/graphs/no_chr21/
library(Seurat)
library(glmmTMB)
library(qs2)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)run_num <- "run1"
out_dir <- file.path("output", run_num)
graphs_dir <- file.path(out_dir, "graphs", "no_chr21")
objects_dir <- file.path(out_dir, "objects")
no_chr21_dir <- file.path(objects_dir, "no_chr21")
dir.create(graphs_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(no_chr21_dir, recursive = TRUE, showWarnings = FALSE)required_inputs <- c(
file.path(no_chr21_dir, "glmmTMB_with_interactions_corrected.qs2"),
file.path(objects_dir, "prelabelled_integrated_rpca.qs2")
)
missing <- required_inputs[!file.exists(required_inputs)]
if (length(missing) > 0) {
message("Required inputs not found:\n", paste(" -", missing, collapse = "\n"),
"\nRun scripts/glmmTMB_no_chr21.qmd and seurat_trisomy_v1/v2.qmd first.")
knitr::knit_exit()
}
final_df <- qs_read(required_inputs[1])
seurat_obj <- qs_read(required_inputs[2])plot_data <- final_df %>%
# 1. Keep only the rows where we are looking at the Ploidy effect (A vs B)
# and ensure Background is one of our two target groups.
filter(contrast == "T - D", !is.na(Background)) %>%
# 2. Select only the necessary columns.
# Important: Interaction_FDR must be the same for both rows of a gene
# or you'll get duplicates again.
select(Gene, Background, estimate, Interaction_FDR) %>%
# 3. Pivot: this should now have 1 row per Gene
pivot_wider(names_from = Background, values_from = estimate) %>%
# 4. Create labeling logic
mutate(
Significance = ifelse(Interaction_FDR < 0.05, "Significant Interaction", "Not Significant"),
Label = ifelse(Interaction_FDR < 0.05, Gene, NA)
)p <- ggplot(plot_data, aes(x = NLGF, y = hAPP)) +
# Draw the non-significant genes first so they fall in the background
geom_point(data = filter(plot_data, Significance == "Not Significant"),
color = "grey80", alpha = 0.5, size = 1.5) +
# Draw your 23 significant genes on top in a bold color
geom_point(data = filter(plot_data, Significance == "Significant Interaction"),
color = "red", alpha = 0.9, size = 2.5) +
# Add the "Zero" crosshairs to divide it into quadrants
geom_hline(yintercept = 0, linetype = "dotted", color = "black") +
geom_vline(xintercept = 0, linetype = "dotted", color = "black") +
# Add the diagonal "Line of No Interaction" (Slope = 1, Intercept = 0)
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "darkblue", linewidth = 0.8) +
# Add the repelling text labels for the significant genes
geom_text_repel(aes(label = Label),
color = "black",
size = 4,
fontface = "bold",
box.padding = 0.5,
max.overlaps = Inf) +
# Aesthetics
theme_minimal() +
labs(
title = "Interaction Landscape: Trisomy Effect by Plaque Background (chr21 removed)",
x = "Log2 Fold Change (Trisomic vs Disomic in NLGF)",
y = "Log2 Fold Change (Trisomic vs Disomic in hAPP)"
) +
theme(
plot.title = element_text(face = "bold", size = 14),
axis.title = element_text(face = "bold")
)
print(p)Warning: Removed 14601 rows containing missing values or values outside the scale range
(`geom_text_repel()`).

ggsave(
file.path(graphs_dir, "interaction_logFC_scatterplot.pdf"),
plot = p,
width = 12,
height = 6,
units = "in"
)Warning: Removed 14601 rows containing missing values or values outside the scale range
(`geom_text_repel()`).
#Extract signficant interaction genes from plot data
sig_genes <- plot_data %>%
filter(Significance == "Significant Interaction") %>%
pull(Gene)
sig_genes [1] "ADCY9" "ASTN1" "BAHCC1" "BIRC7"
[5] "BRINP2" "C5orf64" "CALM3" "CFAP70"
[9] "CHCHD2" "CLIC1" "CTSD" "DHX40"
[13] "DPH5-DT" "DYNC2H1" "ENSG00000237356" "ENSG00000253288"
[17] "ENSG00000270130" "ENSG00000271254" "ENSG00000287024" "FERMT3"
[21] "HS3ST4" "INCA1" "JOSD2" "LINC00680"
[25] "LINC00888" "LINC01852" "LINC02352" "LINC03011"
[29] "MTHFS" "MYL12A" "NEIL1" "PPIL1"
[33] "SH3PXD2B" "SHQ1" "TDP2" "TRIQK"
[37] "TTC8" "USP54" "ZNF248" "ZNF354C"
[41] "ZNF493" "ZNF664" "ZNF790-AS1"
#Pull raw counts for these genes
expr_matrix <- GetAssayData(seurat_obj, assay = "SCT", layer = "data")[sig_genes, ]
expr_df <- as.data.frame(t(as.matrix(expr_matrix)))
expr_df$CellID <- rownames(expr_df)
expr_df$Ploidy <- factor(seurat_obj@meta.data$microglia, levels = c("D", "T"))
expr_df$Background <- factor(seurat_obj@meta.data$app_genotype, levels = c("NLGF", "hAPP"))
expr_df$Mouse <- seurat_obj@meta.data$orig.ident
expr_df$Condition <- factor(seurat_obj@meta.data$group_id)
# Pivot to "Long" format for ggplot2
expr_long <- expr_df %>%
pivot_longer(cols = any_of(sig_genes), names_to = "Gene", values_to = "Expression")
pdf_file <- file.path(graphs_dir, "Interaction_Genes_Raw_Violins.pdf")
pdf(pdf_file, width = 8, height = 6)
for (gene in sig_genes) {
# Filter data for the current gene
gene_data <- expr_long %>% filter(Gene == gene)
# Create the plot
p <- ggplot(gene_data, aes(x = Condition, y = Expression, fill = Background)) +
# scale="width" ensures all violins are the same width regardless of cell count
geom_violin(scale = "width", alpha = 0.7, color = "black") +
# Add a point for the mean expression to help guide the eye
stat_summary(fun = mean, geom = "point", shape = 23, size = 3, fill = "white") +
scale_fill_manual(values = c("NLGF" = "#4C72B0", "hAPP" = "#C44E52")) +
theme_minimal(base_size = 14) +
labs(
title = paste("Expression of", gene),
subtitle = "Significant Ploidy * Background Interaction (chr21 removed)",
x = "Condition",
y = "Raw Counts"
) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(face = "bold"),
legend.position = "none"
)
print(p)
}
dev.off()png
2
cat("Saved violin plots to:", pdf_file, "\n")Saved violin plots to: output/run1/graphs/no_chr21/Interaction_Genes_Raw_Violins.pdf
p_dot <- DotPlot(seurat_obj,
features = sig_genes,
group.by = "group_id",
assay = "SCT") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold.italic"),
axis.text.y = element_text(face = "bold", size = 12),
plot.title = element_text(face = "bold", hjust = 0.5)) +
scale_color_gradient(low = "lightgrey", high = "#C44E52") +
labs(title = "Significant Ploidy * Background Interaction Genes (chr21 removed)",
x = "Gene",
y = "Condition")Warning: Scaling data with a low number of groups may produce misleading
results
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
dp_file <- file.path(graphs_dir, "Interaction_Genes_DotPlot.pdf")
pdf(dp_file, width = 12, height = 5)
print(p_dot)
dev.off()png
2
cat("DotPlot saved to:", dp_file, "\n")DotPlot saved to: output/run1/graphs/no_chr21/Interaction_Genes_DotPlot.pdf
# Collapse the single cells down to one average value per mouse
pb_long <- expr_long %>%
group_by(Mouse, Condition, Background, Gene) %>%
summarise(Pseudobulk_Expression = mean(Expression), .groups = "drop")
pdf_file <- file.path(graphs_dir, "Interaction_Genes_Pseudobulk.pdf")
pdf(pdf_file, width = 8, height = 6)
for (gene in sig_genes) {
gene_pb <- pb_long %>% filter(Gene == gene)
p <- ggplot(gene_pb, aes(x = Condition, y = Pseudobulk_Expression, fill = Background)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
geom_jitter(width = 0.2, size = 3, shape = 21, color = "black", stroke = 1) +
scale_fill_manual(values = c("NLGF" = "#4C72B0", "hAPP" = "#C44E52")) +
theme_minimal(base_size = 14) +
labs(
title = paste(gene, "- Pseudobulk Expression"),
subtitle = "Each dot represents the average normalized expression of one mouse",
x = "Condition",
y = "Mean SCT Normalized Expression"
) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
legend.position = "none"
)
print(p)
}
dev.off()png
2
cat("Saved pseudobulked boxplots to:", pdf_file, "\n")Saved pseudobulked boxplots to: output/run1/graphs/no_chr21/Interaction_Genes_Pseudobulk.pdf
ecdf_pdf_file <- file.path(graphs_dir, "Interaction_Genes_NonZero_eCDF_with_Proportions.pdf")
pdf(ecdf_pdf_file, width = 10, height = 7)
for (gene in sig_genes) {
gene_data_all <- expr_long %>% filter(Gene == gene)
stats_summary <- gene_data_all %>%
group_by(Condition) %>%
summarise(
total_cells = n(),
non_zero_cells = sum(Expression > 0),
prop_expressing = (non_zero_cells / total_cells) * 100,
.groups = "drop"
) %>%
mutate(label = paste0(Condition, ": ", round(prop_expressing, 1), "%"))
prop_text <- paste("Expressing Cells % -", paste(stats_summary$label, collapse = " | "))
gene_data_nonzero <- gene_data_all %>% filter(Expression > 0)
if(nrow(gene_data_nonzero) < 5) {
message("Skipping ", gene, " - too few expressing cells.")
next
}
p_ecdf <- ggplot(gene_data_nonzero, aes(x = Expression, color = Condition)) +
stat_ecdf(geom = "step", linewidth = 1.2) +
theme_minimal(base_size = 12) +
labs(
title = paste(gene, "- Non-Zero Distribution"),
subtitle = prop_text,
x = "SCT Normalized Expression (Non-Zero Only)",
y = "Cumulative Fraction of Expressing Cells",
color = "Condition"
) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 9, color = "blue4"),
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(p_ecdf)
}
dev.off()png
2
cat("Saved annotated eCDF plots to:", ecdf_pdf_file, "\n")Saved annotated eCDF plots to: output/run1/graphs/no_chr21/Interaction_Genes_NonZero_eCDF_with_Proportions.pdf
ecdf_pdf_file <- file.path(graphs_dir, "Interaction_Genes_AllCells_eCDF_with_Proportions.pdf")
pdf(ecdf_pdf_file, width = 10, height = 7)
for (gene in sig_genes) {
gene_data_all <- expr_long %>% filter(Gene == gene)
stats_summary <- gene_data_all %>%
group_by(Condition) %>%
summarise(
total_cells = n(),
non_zero_cells = sum(Expression > 0),
prop_expressing = (non_zero_cells / total_cells) * 100,
.groups = "drop"
) %>%
mutate(label = paste0(Condition, ": ", round(prop_expressing, 1), "%"))
prop_text <- paste("Expressing Cells % -", paste(stats_summary$label, collapse = " | "))
if(nrow(gene_data_all) < 5) {
message("Skipping ", gene, " - too few total cells.")
next
}
p_ecdf <- ggplot(gene_data_all, aes(x = Expression, color = Condition)) +
stat_ecdf(geom = "step", linewidth = 1.2) +
theme_minimal(base_size = 12) +
labs(
title = paste(gene, "- All Cells Distribution"),
subtitle = prop_text,
x = "SCT Normalized Expression (All Cells)",
y = "Cumulative Fraction of Total Cells",
color = "Condition"
) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 9, color = "blue4"),
legend.position = "bottom",
panel.grid.minor = element_blank()
)
print(p_ecdf)
}
dev.off()png
2
cat("Saved annotated eCDF plots to:", ecdf_pdf_file, "\n")Saved annotated eCDF plots to: output/run1/graphs/no_chr21/Interaction_Genes_AllCells_eCDF_with_Proportions.pdf
isg_genes <- c("IFI27", "IFI44L", "IFIT1", "IFIT2", "IFIT3",
"MX1", "MX2", "OAS1", "ISG15", "USP18",
"RSAD2", "LY6E", "DDX60", "TRIM25", "STAT1", "IRF7")
isg_nlgf <- final_df %>%
filter(Background == "NLGF" & contrast == "T - D") %>%
arrange(FDR) %>%
filter(Gene %in% isg_genes) %>%
slice_head(n = 10)
isg_happ <- final_df %>%
filter(Background == "hAPP" & contrast == "T - D") %>%
arrange(FDR) %>%
filter(Gene %in% isg_genes) %>%
slice_head(n = 10)
expr_matrix <- GetAssayData(seurat_obj, assay = "SCT", layer = "data")[isg_genes, ]
expr_df <- as.data.frame(t(as.matrix(expr_matrix)))
expr_df$CellID <- rownames(expr_df)
expr_df$Ploidy <- factor(seurat_obj@meta.data$microglia, levels = c("D", "T"))
expr_df$Background <- factor(seurat_obj@meta.data$app_genotype, levels = c("NLGF", "hAPP"))
expr_df$Mouse <- seurat_obj@meta.data$orig.ident
expr_df$Condition <- factor(seurat_obj@meta.data$group_id)
expr_long <- expr_df %>%
pivot_longer(cols = any_of(isg_genes), names_to = "Gene", values_to = "Expression")
# Collapse the single cells down to one average value per mouse
pb_long <- expr_long %>%
group_by(Mouse, Condition, Background, Gene) %>%
summarise(Pseudobulk_Expression = mean(Expression), .groups = "drop")
pdf_file <- file.path(graphs_dir, "ISG_genes_Pseudobulk_glmmtmb.pdf")
pdf(pdf_file, width = 8, height = 6)
for (gene in isg_genes) {
gene_pb <- pb_long %>% filter(Gene == gene)
nlgf_fdr_raw <- final_df %>% filter(Gene == gene & Background == "NLGF" & contrast == "T - D") %>% pull(FDR)
happ_fdr_raw <- final_df %>% filter(Gene == gene & Background == "hAPP" & contrast == "T - D") %>% pull(FDR)
fmt_nlgf <- ifelse(length(nlgf_fdr_raw) > 0, signif(nlgf_fdr_raw, digits = 3), "N/A")
fmt_happ <- ifelse(length(happ_fdr_raw) > 0, signif(happ_fdr_raw, digits = 3), "N/A")
p <- ggplot(gene_pb, aes(x = Condition, y = Pseudobulk_Expression, fill = Background)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
geom_jitter(width = 0.2, size = 3, shape = 21, color = "black", stroke = 1) +
scale_fill_manual(values = c("NLGF" = "#4C72B0", "hAPP" = "#C44E52")) +
theme_minimal(base_size = 14) +
labs(
title = paste(gene, "- Pseudobulk Expression"),
subtitle = paste0("FDR (NLGF): ", fmt_nlgf, " | FDR (hAPP): ", fmt_happ, "\nEach dot = average expression of one mouse"),
x = "Condition",
y = "Mean SCT Normalized Expression"
) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
legend.position = "none"
)
print(p)
}
dev.off()png
2
cat("Saved pseudobulked boxplots to:", pdf_file, "\n")Saved pseudobulked boxplots to: output/run1/graphs/no_chr21/ISG_genes_Pseudobulk_glmmtmb.pdf
pb_long <- expr_long %>%
group_by(Mouse, Condition, Background, Gene) %>%
summarise(Pseudobulk_Expression = mean(Expression), .groups = "drop")
top10_sig <- final_df %>%
filter(Background == "NLGF" & contrast == "T - D") %>%
arrange(FDR) %>%
filter(estimate > 0) %>%
slice_head(n = 10)
bot10_sig <- final_df %>%
filter(Background == "NLGF" & contrast == "T - D") %>%
arrange(FDR) %>%
filter(estimate < 0) %>%
slice_head(n = 10)
top_bottom_res <- bind_rows(top10_sig, bot10_sig)
target_genes <- top_bottom_res$Gene
pdf_file <- file.path(graphs_dir, "ISG_genes_top_bottom_nlgf_a_vs_b.pdf")
pdf(pdf_file, width = 8, height = 6)
for (gene in target_genes) {
gene_pb <- pb_long %>% filter(Gene == gene)
gene_fdr <- top_bottom_res %>% filter(Gene == gene) %>% pull(FDR)
formatted_fdr <- signif(gene_fdr, digits = 3)
p <- ggplot(gene_pb, aes(x = Condition, y = Pseudobulk_Expression, fill = Background)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
geom_jitter(width = 0.2, size = 3, shape = 21, color = "black", stroke = 1) +
scale_fill_manual(values = c("NLGF" = "#4C72B0", "hAPP" = "#C44E52")) +
theme_minimal(base_size = 14) +
labs(
title = paste(gene, "- Pseudobulk Expression"),
subtitle = paste0("glmmTMB FDR: ", formatted_fdr, " | Each dot = average expression of one mouse"),
x = "Condition",
y = "Mean SCT Normalized Expression"
) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
legend.position = "none"
)
print(p)
}Warning: No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
dev.off()png
2
cat("Saved pseudobulked boxplots to:", pdf_file, "\n")Saved pseudobulked boxplots to: output/run1/graphs/no_chr21/ISG_genes_top_bottom_nlgf_a_vs_b.pdf
pb_long <- expr_long %>%
group_by(Mouse, Condition, Background, Gene) %>%
summarise(Pseudobulk_Expression = mean(Expression), .groups = "drop")
top10_sig <- final_df %>%
filter(Background == "hAPP" & contrast == "T - D") %>%
arrange(FDR) %>%
filter(estimate > 0) %>%
slice_head(n = 10)
bot10_sig <- final_df %>%
filter(Background == "hAPP" & contrast == "T - D") %>%
arrange(FDR) %>%
filter(estimate < 0) %>%
slice_head(n = 10)
top_bottom_res <- bind_rows(top10_sig, bot10_sig)
target_genes <- top_bottom_res$Gene
pdf_file <- file.path(graphs_dir, "top10_bottom_happ_a_vs_b.pdf")
pdf(pdf_file, width = 8, height = 6)
for (gene in target_genes) {
gene_pb <- pb_long %>% filter(Gene == gene)
gene_fdr <- top_bottom_res %>% filter(Gene == gene) %>% pull(FDR)
formatted_fdr <- signif(gene_fdr, digits = 3)
p <- ggplot(gene_pb, aes(x = Condition, y = Pseudobulk_Expression, fill = Background)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
geom_jitter(width = 0.2, size = 3, shape = 21, color = "black", stroke = 1) +
scale_fill_manual(values = c("NLGF" = "#4C72B0", "hAPP" = "#C44E52")) +
theme_minimal(base_size = 14) +
labs(
title = paste(gene, "- Pseudobulk Expression"),
subtitle = paste0("glmmTMB FDR: ", formatted_fdr, " | Each dot = average expression of one mouse"),
x = "Condition",
y = "Mean SCT Normalized Expression"
) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
legend.position = "none"
)
print(p)
}Warning: No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
No shared levels found between `names(values)` of the manual scale and the
data's fill values.
dev.off()png
2
cat("Saved pseudobulked boxplots to:", pdf_file, "\n")Saved pseudobulked boxplots to: output/run1/graphs/no_chr21/top10_bottom_happ_a_vs_b.pdf