Boundary cut-offs of 0.05 FDR and +/- 0.58 Log2FC (1.5x).
Code
# Define coloursvolcano_cols <-c("Upregulated"="firebrick3", "Downregulated"="navy", "Not Significant"="grey80")generate_swanky_volcano <-function(msstats_results, dict, comparison_label, logfc_cutoff =0.5, graphs_dir ="results") {# --- Data Processing --- volcano_data <- msstats_results %>%# Isolate the specific comparison and drop missing valuesfilter(Label == comparison_label, !is.na(adj.pvalue), is.finite(log2FC)) %>%# Map the Gene names from the dictionaryleft_join(dict, by ="Protein") %>%mutate(# Fallback to Protein ID if Gene name is missingGene =ifelse(is.na(Gene) | Gene =="", Protein, Gene),# Define significance categoriesStatus =case_when( adj.pvalue <0.05& log2FC > logfc_cutoff ~"Upregulated", adj.pvalue <0.05& log2FC <-logfc_cutoff ~"Downregulated",TRUE~"Not Significant" ) ) %>%# Group by Status to rank Upregulated and Downregulated separatelygroup_by(Status) %>%# Rank by FDR (smallest adj.pvalue = 1)mutate(GroupRank =rank(adj.pvalue, ties.method ="first")) %>%ungroup() %>%mutate(# Label top 20 for each significant categoryPlotLabel =case_when( Status !="Not Significant"& GroupRank <=20~ Gene, TRUE~NA_character_ ) )# --- Plotting --- p <-ggplot(volcano_data, aes(x = log2FC, y =-log10(adj.pvalue), fill = Status, color = Status)) +geom_point(aes(size = Status, alpha = Status), shape =21, stroke =0.2) +scale_fill_manual(values = volcano_cols) +scale_color_manual(values =c("Upregulated"="black", "Downregulated"="black", "Not Significant"="grey90")) +scale_size_manual(values =c("Upregulated"=3, "Downregulated"=3, "Not Significant"=1.5)) +scale_alpha_manual(values =c("Upregulated"=1, "Downregulated"=1, "Not Significant"=0.3)) +geom_hline(yintercept =-log10(0.05), linetype ="dashed", color ="grey30", linewidth =0.8) +geom_vline(xintercept =c(-logfc_cutoff, logfc_cutoff), linetype ="dashed", color ="grey30", linewidth =0.8) +geom_label_repel(aes(label = PlotLabel),fill ="white", color ="black", fontface ="bold", size =3.5,box.padding =0.5, point.padding =0.5,min.segment.length =0, segment.color ="grey50", max.overlaps =50 ) +labs(title =paste0("Differential Abundance: ", gsub("_", " ", comparison_label)),subtitle =paste0("Top 20 Genes Labeled | FDR < 0.05 | |log2FC| > ", logfc_cutoff),x =bquote(bold("Log"[2] ~"Fold Change")),y =bquote(bold("-Log"[10] ~"FDR")) ) +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", size =18, hjust =0),plot.subtitle =element_text(size =12, color ="grey40", margin =margin(b =10)),axis.title =element_text(face ="bold", size =14),axis.text =element_text(face ="bold", color ="black"),axis.line =element_line(color ="black", linewidth =1),axis.ticks =element_line(color ="black", linewidth =1),legend.position ="top",legend.title =element_blank(),legend.text =element_text(size =12, face ="bold"),panel.grid.minor =element_blank(),panel.grid.major =element_line(color ="grey95") )# --- Output Generation ---# Ensure directory existsdir.create(graphs_dir, showWarnings =FALSE, recursive =TRUE) safe_name <-gsub(" ", "_", comparison_label)# Save Plotggsave(filename =file.path(graphs_dir, paste0("Volcano_", safe_name, "_Swanky.pdf")), plot = p, width =10, height =7, dpi =300,device = cairo_pdf )# Save Data Table# Select only the most useful columns and sort by significance for easy reading export_data <- volcano_data %>% dplyr::select(Protein, Gene, Description, log2FC, pvalue, adj.pvalue, Status) %>%arrange(adj.pvalue)fwrite(export_data, file =file.path(graphs_dir, paste0("Volcano_Data_", safe_name, ".csv")))return(p)}
Iterate through comparisons to generate plots and useful DE tables
Code
all_comparisons <-unique(res_df$Label)# 2. Loop through each comparison, generate the plot/data, and store the plots in a listplot_list <-list()for (comp in all_comparisons) { p <-generate_swanky_volcano(msstats_results = res_df, dict = protein_dictionary, comparison_label = comp, logfc_cutoff =0.5, graphs_dir = results_dir )# Store the plot in our list plot_list[[comp]] <- p# Print the plot so it appears in HTML outputprint(p)}
Warning: Removed 5533 rows containing missing values or values outside the scale range
(`geom_label_repel()`).
Removed 5533 rows containing missing values or values outside the scale range
(`geom_label_repel()`).
Warning: Removed 5527 rows containing missing values or values outside the scale range
(`geom_label_repel()`).
Warning: Removed 5527 rows containing missing values or values outside the scale range
(`geom_label_repel()`).
Warning: Removed 5543 rows containing missing values or values outside the scale range
(`geom_label_repel()`).
Warning: Removed 5543 rows containing missing values or values outside the scale range
(`geom_label_repel()`).
Interrogation of floor genes from Near vs Far plaque volcano plots
Code
res_df
Code
floor_check <- res_df %>%filter(Label =="PlaqueNear_vs_Control", adj.pvalue >0.99) %>% dplyr::select(Protein, log2FC, pvalue, adj.pvalue, SE, DF) %>%arrange(desc(abs(log2FC)))# Look at the top resultshead(floor_check, 10)
Code
#filtered for top 4 logFC proteins with >0.99 FDRtarget_proteins <- res_df %>%filter(Label =="PlaqueNear_vs_PlaqueFar", adj.pvalue >0.99) %>%arrange(desc(abs(log2FC))) %>%slice_head(n =4) %>%pull(Protein)#read in msstats dataprocessed_data <-qs_read(file.path(objects_dir, "processed_msstats_data.qs2"))# extract their summarized abundances from the processed MSstats objectabundance_data <- processed_data$ProteinLevelData %>%filter(Protein %in% target_proteins) %>%# Filter to just the two conditions of interest for clarityfilter(GROUP %in%c("plaque_near", "plaque_far")) %>%group_by(Protein, GROUP, SUBJECT) %>%summarise(LogIntensities =mean(LogIntensities, na.rm =TRUE), .groups ="drop")#plot abundances#each dot represents the summarised abundance for a biological replicate p_abundance <-ggplot(abundance_data, aes(x = GROUP, y = LogIntensities, fill = GROUP)) +geom_boxplot(alpha =0.5, outlier.shape =NA) +geom_jitter(width =0.2, size =2, alpha =0.8) +facet_wrap(~ Protein, scales ="free_y") +theme_minimal(base_size =14) +scale_fill_manual(values =c("plaque_near"="#d95f02", "plaque_far"="#7570b3")) +labs(title ="Abundance Profile of High-FC 'Floor' Proteins",subtitle ="FDR = 1.0 despite high Log2FC",x ="Condition",y ="Log2 Intensity" ) +theme(legend.position ="none")# Print the plotprint(p_abundance)