Code
library(data.table)
library(MSstats)
library(MSstatsPTM)
library(MSstatsConvert)
library(qs2)
library(dplyr)
library(ggplot2)
library(stringr)
library(tidyr)Differential phospho-site abundance, adjusted for protein-level abundance changes from the bulk proteome pipeline. Uses MSstatsPTM, which fits paired models on PTM and protein datasets and reports both unadjusted (raw phospho) and adjusted (phospho minus protein) log2 fold changes per contrast.
Same contrast set as the bulk pipeline: 4 pairwise + APP × Microglia interaction.
Site-level localization filtering is applied to the phospho report at LOC_PROB_CUTOFF = 0.99 before the MSstatsPTM converter — the bulk protein report is left untouched. The search was confirmed to be directDIA (EG.Library = "directDIA™" in the Spectronaut export); Bekker-Jensen et al. 2020 (Mol Cell Proteomics) recommend a 0.99 probability threshold for directDIA searches rather than the 0.75 Class I default used for library-based searches.
project_root <- "/nemo/lab/destrooperb/home/shared/zanettc/giulia_proteomics"
phospho_base <- file.path(project_root, "phosphoproteomics")
bulk_base <- file.path(project_root, "bulk_proteomics")
run_num <- "run3"
phospho_results_dir <- file.path(phospho_base, "results", run_num)
phospho_objects_dir <- file.path(phospho_base, "data", "processed", run_num)
phospho_input_dir <- file.path(phospho_base, "input")
bulk_input_dir <- file.path(bulk_base, "input")
dir.create(phospho_results_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(phospho_objects_dir, recursive = TRUE, showWarnings = FALSE)
FASTA_PATH <- file.path(phospho_input_dir, "UP000000589_10090.fasta")
PTM_LABEL <- "Phospho (STY)" # how Spectronaut writes the modification
stopifnot("FASTA missing — see Setup notes" = file.exists(FASTA_PATH))The bulk report serves as the “global protein” reference for adjustment. We read the same column subset that the bulk MSstats notebook uses.
msstats_cols <- c(
"R.Condition", "R.FileName", "R.Replicate",
"PG.ProteinGroups", "PG.ProteinAccessions",
"PEP.StrippedSequence",
"EG.ModifiedSequence", "EG.PrecursorId", "EG.Qvalue",
"EG.PTMAssayProbability",
"FG.Charge", "FG.Id",
"F.FrgIon", "F.Charge", "F.FrgLossType", "F.FrgNum", "F.FrgType",
"F.NormalizedPeakArea", "F.PeakArea", "F.ExcludedFromQuantification"
)
# QC notebook 01 flagged that EG.PTMAssayProbability is sometimes missing from
# Spectronaut exports — probe the header and fall back to
# EG.PTMLocalizationProbabilities if needed.
phospho_path <- file.path(phospho_input_dir, "20260504_125530_PCF000612_phospho_GA_MSStats_updated.csv")
phospho_header <- names(fread(phospho_path, nrows = 0))
if ("EG.PTMAssayProbability" %in% phospho_header) {
loc_prob_col <- "EG.PTMAssayProbability"
} else if ("EG.PTMLocalizationProbabilities" %in% phospho_header) {
loc_prob_col <- "EG.PTMLocalizationProbabilities"
msstats_cols <- sub("EG.PTMAssayProbability", loc_prob_col, msstats_cols, fixed = TRUE)
} else {
stop(paste0(
"FATAL: no localization probability column found in the phospho report.\n",
"Expected EG.PTMAssayProbability or EG.PTMLocalizationProbabilities.\n",
"Re-export from Spectronaut with PTM localization enabled before proceeding.\n",
"Do NOT run the pipeline without this column — site-level filtering would be skipped\n",
"and downstream results would be unreliable."
))
}
cat("Localization probability column:", loc_prob_col, "\n")Localization probability column: EG.PTMAssayProbability
Reading phospho report...
Phospho rows: 19232048
Reading bulk proteome report (for protein-level adjustment)...
Protein rows: 24057952
# directDIA confirmed via EG.Library = "directDIA™" in the Spectronaut export.
# Bekker-Jensen et al. 2020 (Mol Cell Proteomics) recommend 0.99 for directDIA
# rather than the 0.75 Class I threshold used for library-based DIA searches.
LOC_PROB_CUTOFF <- 0.99
n_before <- nrow(phospho_raw)
sites_before <- uniqueN(phospho_raw$EG.ModifiedSequence)
proteins_before <- uniqueN(phospho_raw$PG.ProteinGroups)
phospho_raw <- phospho_raw[get(loc_prob_col) >= LOC_PROB_CUTOFF]
n_after <- nrow(phospho_raw)
sites_after <- uniqueN(phospho_raw$EG.ModifiedSequence)
proteins_after <- uniqueN(phospho_raw$PG.ProteinGroups)
pct <- round(100 * n_after / n_before, 2)
cat(sprintf("Localization filter: %s >= %.2f\n", loc_prob_col, LOC_PROB_CUTOFF))Localization filter: EG.PTMAssayProbability >= 0.99
Rows: 19232048 → 5003615 (26.02% retained)
Sites: 175523 → 58826
Proteins: 8199 → 7308
The phospho report writes sample IDs as GA_P_1..GA_P_16 while the bulk proteome and the annotation use GA_1..GA_16. Strip the GA_P_ prefix on the phospho data so both reports key against the same annotation. R.Condition is also “Not Defined” in both raw files — overwrite it from the annotation.
SpectronauttoMSstatsPTMFormat() returns a list of two data frames: PTM and PROTEIN. Both must be present for protein-level adjustment. The same annotation table is used for both — MSstatsPTM matches by Run.
ptm_input <- suppressWarnings(SpectronauttoMSstatsPTMFormat(
input = phospho_raw,
annotation = msstats_annotation,
fasta_path = FASTA_PATH,
protein_input = protein_raw,
use_unmod_peptides = FALSE, # site-level: drop unmodified peptides
mod_id = "\\[Phospho \\(STY\\)\\]",
filter_with_Qvalue = TRUE,
qvalue_cutoff = 0.01,
removeProtein_with1Feature = FALSE
))[1] "FASTA file missing 275 Proteins. These will be removed. This may be due to non-unique identifications."
INFO [2026-05-05 18:59:03] ** Raw data from Spectronaut imported successfully.
INFO [2026-05-05 18:59:03] ** Raw data from Spectronaut cleaned successfully.
INFO [2026-05-05 18:59:03] ** Using provided annotation.
INFO [2026-05-05 18:59:03] ** Run labels were standardized to remove symbols such as '.' or '%'.
INFO [2026-05-05 18:59:03] ** The following options are used:
- Features will be defined by the columns: PeptideSequence, PrecursorCharge, FragmentIon, ProductCharge
- Shared peptides will be removed.
- Proteins with single feature will not be removed.
- Features with less than 3 measurements across runs will be removed.
INFO [2026-05-05 18:59:03] ** Intensities with values of FExcludedFromQuantification equal to TRUE are replaced with NA
WARN [2026-05-05 18:59:03] ** PGQvalue not found in input columns.
INFO [2026-05-05 18:59:03] ** Intensities with values not smaller than 0.01 in EGQvalue are replaced with NA
INFO [2026-05-05 18:59:03] ** Features with all missing measurements across runs are removed.
INFO [2026-05-05 18:59:04] ** Shared peptides are removed.
INFO [2026-05-05 18:59:05] ** Multiple measurements in a feature and a run are summarized by summaryforMultipleRows: max
INFO [2026-05-05 18:59:05] ** Features with one or two measurements across runs are removed.
INFO [2026-05-05 18:59:06] ** Run annotation merged with quantification data.
INFO [2026-05-05 18:59:06] ** Features with one or two measurements across runs are removed.
INFO [2026-05-05 18:59:06] ** Fractionation handled.
INFO [2026-05-05 18:59:06] ** Updated quantification data to make balanced design. Missing values are marked by NA
INFO [2026-05-05 18:59:06] ** Finished preprocessing. The dataset is ready to be processed by the dataProcess function.
INFO [2026-05-05 18:59:09] ** Raw data from Spectronaut imported successfully.
INFO [2026-05-05 18:59:14] ** Raw data from Spectronaut cleaned successfully.
INFO [2026-05-05 18:59:16] ** Using annotation extracted from quantification data.
INFO [2026-05-05 18:59:16] ** Run labels were standardized to remove symbols such as '.' or '%'.
INFO [2026-05-05 18:59:16] ** The following options are used:
- Features will be defined by the columns: PeptideSequence, PrecursorCharge, FragmentIon, ProductCharge
- Shared peptides will be removed.
- Proteins with single feature will not be removed.
- Features with less than 3 measurements across runs will be removed.
INFO [2026-05-05 18:59:21] ** Intensities with values of FExcludedFromQuantification equal to TRUE are replaced with NA
WARN [2026-05-05 18:59:22] ** PGQvalue not found in input columns.
INFO [2026-05-05 18:59:23] ** Intensities with values not smaller than 0.01 in EGQvalue are replaced with NA
INFO [2026-05-05 18:59:27] ** Features with all missing measurements across runs are removed.
INFO [2026-05-05 18:59:31] ** Shared peptides are removed.
INFO [2026-05-05 19:00:30] ** Multiple measurements in a feature and a run are summarized by summaryforMultipleRows: max
INFO [2026-05-05 19:00:32] ** Features with one or two measurements across runs are removed.
INFO [2026-05-05 19:00:35] ** Run annotation merged with quantification data.
INFO [2026-05-05 19:00:45] ** Features with one or two measurements across runs are removed.
INFO [2026-05-05 19:00:45] ** Fractionation handled.
INFO [2026-05-05 19:01:08] ** Updated quantification data to make balanced design. Missing values are marked by NA
INFO [2026-05-05 19:01:10] ** Finished preprocessing. The dataset is ready to be processed by the dataProcess function.
PTM features: 540080
Protein features: 11946400
dataSummarizationPTM() runs dataProcess on PTM and PROTEIN separately and returns both. TMP summarisation, median normalisation, MB-imputation — same defaults as the bulk pipeline.
INFO [2026-05-05 19:01:15] ** Features with one or two measurements across runs are removed.
INFO [2026-05-05 19:01:15] ** Fractionation handled.
INFO [2026-05-05 19:01:16] ** Updated quantification data to make balanced design. Missing values are marked by NA
INFO [2026-05-05 19:01:17] ** Log2 intensities under cutoff = -0.34817 were considered as censored missing values.
INFO [2026-05-05 19:01:17] ** Log2 intensities = NA were considered as censored missing values.
INFO [2026-05-05 19:01:17] ** Use all features that the dataset originally has.
INFO [2026-05-05 19:01:17]
# proteins: 8883
# peptides per protein: 1-11
# features per peptide: 1-6
INFO [2026-05-05 19:01:17] Some proteins have only one feature:
A2AJI0_S282,
A2AP18_S676_S686,
A2AR02_S411_S413,
A2RSJ4_S21,
A2RSJ4_S952 ...
INFO [2026-05-05 19:01:17]
hApp hAppNLGF hAppNLGF_FIRE hApp_FIRE
# runs 4 4 4 4
# bioreplicates 4 4 4 4
# tech. replicates 1 1 1 1
INFO [2026-05-05 19:01:17] Some features are completely missing in at least one condition:
AAAAALSGS[Phospho (STY)]PPQTEKPTHYR_3_b5_1,
AAAAALSGS[Phospho (STY)]PPQTEKPTHYR_3_y11_2,
AAAAALSGS[Phospho (STY)]PPQTEKPTHYR_3_y15_2,
AAADDGEEPKS[Phospho (STY)]EPETK_3_b4_1,
AAADDGEEPKS[Phospho (STY)]EPETK_3_y13_2 ...
INFO [2026-05-05 19:01:17] == Start the summarization per subplot...
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
INFO [2026-05-05 19:03:25] == Summarization is done.
INFO [2026-05-05 19:03:45] ** Features with one or two measurements across runs are removed.
INFO [2026-05-05 19:03:45] ** Fractionation handled.
INFO [2026-05-05 19:04:02] ** Updated quantification data to make balanced design. Missing values are marked by NA
INFO [2026-05-05 19:04:29] ** Log2 intensities under cutoff = -2.162 were considered as censored missing values.
INFO [2026-05-05 19:04:29] ** Log2 intensities = NA were considered as censored missing values.
INFO [2026-05-05 20:46:10] ** Flag uninformative feature and outliers by feature selection algorithm.
INFO [2026-05-05 20:46:13]
# proteins: 8918
# peptides per protein: 1-631
# features per peptide: 1-6
INFO [2026-05-05 20:46:13] Some proteins have only one feature:
A0A0G2JET4,
A6X935,
C0HLV9,
D3Z7A4;K7N6U2;K7N6Y8;K7N754;L7N2A7,
P35991 ...
INFO [2026-05-05 20:46:13]
hApp hAppNLGF hAppNLGF_FIRE hApp_FIRE
# runs 4 4 4 4
# bioreplicates 4 4 4 4
# tech. replicates 1 1 1 1
INFO [2026-05-05 20:46:15] Some features are completely missing in at least one condition:
AAAAAAAAQM[Oxidation (M)]HTK_2_b4_1,
AAAAAAAAQM[Oxidation (M)]HTK_2_b5_1,
AAAAAAAAQM[Oxidation (M)]HTK_2_y8_1,
AAAADGEPLHNEEER_2_y10_1,
AAAAVASAASSC[Carbamidomethyl (C)]R_3_y7_1 ...
INFO [2026-05-05 20:46:15] == Start the summarization per subplot...
INFO [2026-05-05 20:46:16] ** Filtered out uninformative features and outliers.
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
INFO [2026-05-05 21:05:26] == Summarization is done.
Same 4 pairwise + interaction as the bulk pipeline.
Interaction expands to: (hAppNLGF − hApp) − (hAppNLGF_FIRE − hApp_FIRE) Disease effects with microglia vs disease effects without.
Condition levels (alphabetical):
[1] "hApp" "hApp_FIRE" "hAppNLGF" "hAppNLGF_FIRE"
make_contrast <- function(numerator, denominator, levels) {
v <- setNames(rep(0L, length(levels)), levels)
v[numerator] <- 1L
v[denominator] <- -1L
v
}
interaction_weights <- c(
hApp = -1,
hAppNLGF = 1,
hApp_FIRE = 1,
hAppNLGF_FIRE = -1
)
make_linear_contrast <- function(weights, levels) {
v <- setNames(rep(0, length(levels)), levels)
v[names(weights)] <- weights
stopifnot(abs(sum(v)) < 1e-9)
v
}
comparison_matrix <- rbind(
make_contrast("hAppNLGF", "hApp", condition_levels),
make_contrast("hApp_FIRE", "hApp", condition_levels),
make_contrast("hAppNLGF_FIRE", "hAppNLGF", condition_levels),
make_contrast("hAppNLGF_FIRE", "hApp_FIRE", condition_levels),
make_linear_contrast(interaction_weights, condition_levels)
)
rownames(comparison_matrix) <- c(
"hAppNLGF_vs_hApp",
"hApp_FIRE_vs_hApp",
"hAppNLGF_FIRE_vs_hAppNLGF",
"hAppNLGF_FIRE_vs_hApp_FIRE",
"Interaction_APP_x_Microglia"
)
print(comparison_matrix) hApp hApp_FIRE hAppNLGF hAppNLGF_FIRE
hAppNLGF_vs_hApp -1 0 1 0
hApp_FIRE_vs_hApp -1 1 0 0
hAppNLGF_FIRE_vs_hAppNLGF 0 0 -1 1
hAppNLGF_FIRE_vs_hApp_FIRE 0 -1 0 1
Interaction_APP_x_Microglia -1 1 1 -1
groupComparisonPTM() returns three result tables:
PTM.Model — raw PTM contrast results (unadjusted phospho changes)PROTEIN.Model — protein-level contrast results (from the bulk reference)ADJUSTED.Model — phospho minus protein per contrast → “phospho-specific” changesThe adjusted table is what most phospho papers cite — it isolates regulation that isn’t simply explained by total protein abundance.
INFO [2026-05-05 21:05:50] == Start to test and get inference in whole plot ...
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
INFO [2026-05-05 21:07:48] == Comparisons for all proteins are done.
INFO [2026-05-05 21:07:50] == Start to test and get inference in whole plot ...
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
INFO [2026-05-05 21:09:41] == Comparisons for all proteins are done.
Wrote PTM_unadjusted_GroupComparison.csv : 44415 rows
Wrote PROTEIN_GroupComparison.csv : 44590 rows
Wrote PTM_adjusted_GroupComparison.csv : 44415 rows
---
title: "2. MSstatsPTM — Phospho DE with protein-level adjustment"
---
## Overview
Differential phospho-site abundance, adjusted for protein-level abundance changes from the bulk proteome pipeline. Uses MSstatsPTM, which fits paired models on PTM and protein datasets and reports both unadjusted (raw phospho) and adjusted (phospho minus protein) log2 fold changes per contrast.
Same contrast set as the bulk pipeline: 4 pairwise + APP × Microglia interaction.
Site-level localization filtering is applied to the phospho report at `LOC_PROB_CUTOFF = 0.99` before the MSstatsPTM converter — the bulk protein report is left untouched. The search was confirmed to be directDIA (`EG.Library = "directDIA™"` in the Spectronaut export); Bekker-Jensen et al. 2020 (*Mol Cell Proteomics*) recommend a 0.99 probability threshold for directDIA searches rather than the 0.75 Class I default used for library-based searches.
## Libraries
```{r setup, include=TRUE, message=FALSE}
library(data.table)
library(MSstats)
library(MSstatsPTM)
library(MSstatsConvert)
library(qs2)
library(dplyr)
library(ggplot2)
library(stringr)
library(tidyr)
```
## Directories and parameters
```{r}
project_root <- "/nemo/lab/destrooperb/home/shared/zanettc/giulia_proteomics"
phospho_base <- file.path(project_root, "phosphoproteomics")
bulk_base <- file.path(project_root, "bulk_proteomics")
run_num <- "run3"
phospho_results_dir <- file.path(phospho_base, "results", run_num)
phospho_objects_dir <- file.path(phospho_base, "data", "processed", run_num)
phospho_input_dir <- file.path(phospho_base, "input")
bulk_input_dir <- file.path(bulk_base, "input")
dir.create(phospho_results_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(phospho_objects_dir, recursive = TRUE, showWarnings = FALSE)
FASTA_PATH <- file.path(phospho_input_dir, "UP000000589_10090.fasta")
PTM_LABEL <- "Phospho (STY)" # how Spectronaut writes the modification
stopifnot("FASTA missing — see Setup notes" = file.exists(FASTA_PATH))
```
## Build annotation (same 16-sample design)
```{r}
msstats_annotation <- data.table(
Run = paste0("GA_", 1:16),
Condition = rep(c("hApp", "hAppNLGF", "hAppNLGF_FIRE", "hApp_FIRE"), each = 4),
BioReplicate = 1:16
)
msstats_annotation
fwrite(msstats_annotation, file.path(phospho_objects_dir, "msstats_annotation.csv"))
```
## Load both Spectronaut reports (PTM and bulk)
The bulk report serves as the "global protein" reference for adjustment. We read the same column subset that the bulk MSstats notebook uses.
```{r load}
msstats_cols <- c(
"R.Condition", "R.FileName", "R.Replicate",
"PG.ProteinGroups", "PG.ProteinAccessions",
"PEP.StrippedSequence",
"EG.ModifiedSequence", "EG.PrecursorId", "EG.Qvalue",
"EG.PTMAssayProbability",
"FG.Charge", "FG.Id",
"F.FrgIon", "F.Charge", "F.FrgLossType", "F.FrgNum", "F.FrgType",
"F.NormalizedPeakArea", "F.PeakArea", "F.ExcludedFromQuantification"
)
# QC notebook 01 flagged that EG.PTMAssayProbability is sometimes missing from
# Spectronaut exports — probe the header and fall back to
# EG.PTMLocalizationProbabilities if needed.
phospho_path <- file.path(phospho_input_dir, "20260504_125530_PCF000612_phospho_GA_MSStats_updated.csv")
phospho_header <- names(fread(phospho_path, nrows = 0))
if ("EG.PTMAssayProbability" %in% phospho_header) {
loc_prob_col <- "EG.PTMAssayProbability"
} else if ("EG.PTMLocalizationProbabilities" %in% phospho_header) {
loc_prob_col <- "EG.PTMLocalizationProbabilities"
msstats_cols <- sub("EG.PTMAssayProbability", loc_prob_col, msstats_cols, fixed = TRUE)
} else {
stop(paste0(
"FATAL: no localization probability column found in the phospho report.\n",
"Expected EG.PTMAssayProbability or EG.PTMLocalizationProbabilities.\n",
"Re-export from Spectronaut with PTM localization enabled before proceeding.\n",
"Do NOT run the pipeline without this column — site-level filtering would be skipped\n",
"and downstream results would be unreliable."
))
}
cat("Localization probability column:", loc_prob_col, "\n")
cat("Reading phospho report...\n")
phospho_raw <- fread(
phospho_path,
select = msstats_cols
)
cat("Phospho rows:", nrow(phospho_raw), "\n")
cat("Reading bulk proteome report (for protein-level adjustment)...\n")
protein_raw <- fread(
file.path(bulk_input_dir, "20260423_154110_PCF000612_GA_MSStats_Report-Proteome.csv"),
select = setdiff(msstats_cols,
c("EG.PTMAssayProbability", "EG.PTMLocalizationProbabilities"))
)
cat("Protein rows:", nrow(protein_raw), "\n")
```
## Filter phospho by site localization probability
```{r localization_filter}
# directDIA confirmed via EG.Library = "directDIA™" in the Spectronaut export.
# Bekker-Jensen et al. 2020 (Mol Cell Proteomics) recommend 0.99 for directDIA
# rather than the 0.75 Class I threshold used for library-based DIA searches.
LOC_PROB_CUTOFF <- 0.99
n_before <- nrow(phospho_raw)
sites_before <- uniqueN(phospho_raw$EG.ModifiedSequence)
proteins_before <- uniqueN(phospho_raw$PG.ProteinGroups)
phospho_raw <- phospho_raw[get(loc_prob_col) >= LOC_PROB_CUTOFF]
n_after <- nrow(phospho_raw)
sites_after <- uniqueN(phospho_raw$EG.ModifiedSequence)
proteins_after <- uniqueN(phospho_raw$PG.ProteinGroups)
pct <- round(100 * n_after / n_before, 2)
cat(sprintf("Localization filter: %s >= %.2f\n", loc_prob_col, LOC_PROB_CUTOFF))
cat(sprintf(" Rows: %d → %d (%.2f%% retained)\n", n_before, n_after, pct))
cat(sprintf(" Sites: %d → %d\n", sites_before, sites_after))
cat(sprintf(" Proteins: %d → %d\n", proteins_before, proteins_after))
# protein_raw is intentionally NOT filtered — protein-level adjustment uses
# whole-protein abundance and does not require site localization.
```
## Normalise phospho R.FileName + patch R.Condition / R.Replicate
The phospho report writes sample IDs as `GA_P_1..GA_P_16` while the bulk proteome and the annotation use `GA_1..GA_16`. Strip the `GA_P_` prefix on the phospho data so both reports key against the same annotation. R.Condition is also "Not Defined" in both raw files — overwrite it from the annotation.
```{r patch_conditions}
phospho_raw[, R.FileName := stringr::str_replace(R.FileName, "^GA_P_", "GA_")]
patch <- function(dt) {
dt[msstats_annotation, on = c("R.FileName" = "Run"),
`:=`(R.Condition = i.Condition, R.Replicate = i.BioReplicate)]
dt
}
phospho_raw <- patch(phospho_raw)
protein_raw <- patch(protein_raw)
```
## Convert to MSstatsPTM input format
`SpectronauttoMSstatsPTMFormat()` returns a list of two data frames: `PTM` and `PROTEIN`. Both must be present for protein-level adjustment. The same `annotation` table is used for both — MSstatsPTM matches by `Run`.
```{r convert}
ptm_input <- suppressWarnings(SpectronauttoMSstatsPTMFormat(
input = phospho_raw,
annotation = msstats_annotation,
fasta_path = FASTA_PATH,
protein_input = protein_raw,
use_unmod_peptides = FALSE, # site-level: drop unmodified peptides
mod_id = "\\[Phospho \\(STY\\)\\]",
filter_with_Qvalue = TRUE,
qvalue_cutoff = 0.01,
removeProtein_with1Feature = FALSE
))
cat("PTM features: ", nrow(ptm_input$PTM), "\n")
cat("Protein features:", nrow(ptm_input$PROTEIN), "\n")
qs_save(ptm_input, file.path(phospho_objects_dir, "ptm_input.qs2"))
```
## Reload checkpoint (skip the read+convert on re-runs)
```{r reload, eval=FALSE}
ptm_input <- qs_read(file.path(phospho_objects_dir, "ptm_input.qs2"))
```
## Data summarisation (per dataset)
`dataSummarizationPTM()` runs `dataProcess` on PTM and PROTEIN separately and returns both. TMP summarisation, median normalisation, MB-imputation — same defaults as the bulk pipeline.
```{r summarise}
summarised <- suppressWarnings(dataSummarizationPTM(
data = ptm_input,
normalization = "equalizeMedians",
summaryMethod = "TMP",
censoredInt = "NA",
MBimpute = TRUE,
featureSubset = "highQuality",
remove_uninformative_feature_outlier = TRUE
))
qs_save(summarised, file.path(phospho_objects_dir, "summarised_ptm.qs2"))
```
## Verify condition levels and build contrast matrix
Same 4 pairwise + interaction as the bulk pipeline.
Interaction expands to: (hAppNLGF − hApp) − (hAppNLGF_FIRE − hApp_FIRE)
Disease effects with microglia vs disease effects without.
```{r contrasts}
summarised <- qs_read(file.path(phospho_objects_dir, "summarised_ptm.qs2"))
condition_levels <- levels(summarised$PTM$ProteinLevelData$GROUP)
cat("Condition levels (alphabetical):\n")
print(condition_levels)
make_contrast <- function(numerator, denominator, levels) {
v <- setNames(rep(0L, length(levels)), levels)
v[numerator] <- 1L
v[denominator] <- -1L
v
}
interaction_weights <- c(
hApp = -1,
hAppNLGF = 1,
hApp_FIRE = 1,
hAppNLGF_FIRE = -1
)
make_linear_contrast <- function(weights, levels) {
v <- setNames(rep(0, length(levels)), levels)
v[names(weights)] <- weights
stopifnot(abs(sum(v)) < 1e-9)
v
}
comparison_matrix <- rbind(
make_contrast("hAppNLGF", "hApp", condition_levels),
make_contrast("hApp_FIRE", "hApp", condition_levels),
make_contrast("hAppNLGF_FIRE", "hAppNLGF", condition_levels),
make_contrast("hAppNLGF_FIRE", "hApp_FIRE", condition_levels),
make_linear_contrast(interaction_weights, condition_levels)
)
rownames(comparison_matrix) <- c(
"hAppNLGF_vs_hApp",
"hApp_FIRE_vs_hApp",
"hAppNLGF_FIRE_vs_hAppNLGF",
"hAppNLGF_FIRE_vs_hApp_FIRE",
"Interaction_APP_x_Microglia"
)
print(comparison_matrix)
```
## Group comparison (adjusted for protein abundance)
`groupComparisonPTM()` returns three result tables:
- `PTM.Model` — raw PTM contrast results (unadjusted phospho changes)
- `PROTEIN.Model` — protein-level contrast results (from the bulk reference)
- `ADJUSTED.Model` — phospho minus protein per contrast → "phospho-specific" changes
The adjusted table is what most phospho papers cite — it isolates regulation that isn't simply explained by total protein abundance.
```{r group_comparison}
ptm_results <- suppressWarnings(groupComparisonPTM(
data = summarised,
contrast.matrix = comparison_matrix,
data.type = "LabelFree"
))
#qs_save(ptm_results, file.path(phospho_objects_dir, "ptm_results.qs2"))
```
## Export results CSVs
```{r export}
write_table <- function(df, filename) {
fwrite(as.data.table(df), file.path(phospho_results_dir, filename))
cat("Wrote", filename, ":", nrow(df), "rows\n")
}
write_table(ptm_results$PTM.Model, "PTM_unadjusted_GroupComparison.csv")
write_table(ptm_results$PROTEIN.Model, "PROTEIN_GroupComparison.csv")
write_table(ptm_results$ADJUSTED.Model, "PTM_adjusted_GroupComparison.csv")
```
## Quick summary of adjusted results
```{r summary_table}
adj <- as.data.table(ptm_results$ADJUSTED.Model)
adj[!is.na(adj.pvalue),
.(n_tested = .N,
n_sig = sum(adj.pvalue < 0.05),
n_up = sum(adj.pvalue < 0.05 & log2FC > 0),
n_down = sum(adj.pvalue < 0.05 & log2FC < 0)),
by = Label][order(Label)]
```