4. Kinase activity inference

Overview

Inferred kinase activity per contrast from the adjusted phospho contrast results. Translates “site X of protein Y went up” into “kinase K is more active.” Uses decoupleR with the OmniPath kinase–substrate network (mouse). Two methods are run for robustness:

  • WMEAN — weighted mean of substrate t-statistics (fast, robust to network noise)
  • ULM — univariate linear model per kinase (calibrated p-values)

Both produce per-kinase activity scores and FDR-adjusted significance per contrast.

Setup notes

  • The OmniPath query needs internet on first run; results can be cached and reloaded.

Libraries

Directories

Code
base_dir <- "/nemo/lab/destrooperb/home/shared/zanettc/giulia_proteomics/phosphoproteomics"
run_num  <- "run3"

results_dir <- file.path(base_dir, "results", run_num)
objects_dir <- file.path(base_dir, "data", "processed", run_num)
kinase_dir  <- file.path(results_dir, "Kinase_activity")
dir.create(kinase_dir, recursive = TRUE, showWarnings = FALSE)

Load adjusted phospho results

Code
res_df <- fread(file.path(results_dir, "PTM_adjusted_GroupComparison.csv"))

# Parse site identifier into accession + position(s) (same logic as notebook 03)
res_df[, Accession   := str_extract(Protein, "^[^_;]+")]
res_df[, Sites_in_id := str_extract(Protein, "(?<=_)[STY][0-9]+(?:_[STY][0-9]+)*")]
res_df[, Gene := mapIds(org.Mm.eg.db, keys = Accession,
                        column = "SYMBOL", keytype = "UNIPROT",
                        multiVals = "first")]
res_df[, Gene := ifelse(is.na(Gene) | Gene == "", Accession, Gene)]

# decoupleR needs site IDs of form Gene_S123. Expand multi-site rows.
sites_long <- res_df[!is.na(Sites_in_id),
  .(Site = unlist(strsplit(Sites_in_id, "_"))),
  by = .(Label, Gene, log2FC, pvalue, adj.pvalue, Tvalue)
][, ID := paste0(Gene, "_", Site)]
sites_long <- sites_long[!is.na(Gene)]
cat("Site rows after expansion:", nrow(sites_long), "\n")
Site rows after expansion: 49255 

Pull mouse kinase–substrate network from OmniPath

Code
ks_net <- OmnipathR::enzyme_substrate(
  organism = 10090
) %>%
  filter(modification == "phosphorylation") %>%
  mutate(
    target = paste0(substrate_genesymbol, "_", residue_type, residue_offset),
    source = enzyme_genesymbol,
    mor    = 1     # all phospho-events are activating substrate phosphorylation
  ) %>%
  dplyr::select(source, target, mor) %>%
  distinct()

cat("Kinase–substrate edges:", nrow(ks_net), "\n")
Kinase–substrate edges: 29446 
Code
cat("Unique kinases:        ", length(unique(ks_net$source)), "\n")
Unique kinases:         1402 
Code
qs_save(ks_net, file.path(objects_dir, "omnipath_kinase_substrate.qs2"))

Score per contrast

decoupleR expects a numeric matrix of statistics with sites as rows and one column per “sample” (here: per contrast). We use the t-statistic from MSstatsPTM (Tvalue) so direction and magnitude are encoded.

Code
mat <- sites_long %>%
  dplyr::filter(!is.na(Tvalue)) %>%   # drop NA Tvalue rows — max() returns -Inf on all-NA groups
  group_by(ID, Label) %>%
  summarise(stat = max(Tvalue, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(id_cols = ID, names_from = Label, values_from = stat,
              values_fill = 0) %>%
  as.data.frame()
rownames(mat) <- mat$ID
mat$ID <- NULL
mat <- as.matrix(mat)

# Filter to sites in the OmniPath network — others contribute no signal anyway
mat <- mat[rownames(mat) %in% ks_net$target, , drop = FALSE]
mat <- mat[apply(mat, 1, function(x) all(is.finite(x))), , drop = FALSE]
cat("Sites mapped to OmniPath network:", nrow(mat), "\n")
Sites mapped to OmniPath network: 938 
Code
set.seed(42)
acts_wmean <- decoupleR::run_wmean(mat = mat, network = ks_net,
                                   .source = "source", .target = "target",
                                   .mor = "mor", times = 1000, minsize = 5)
acts_ulm   <- decoupleR::run_ulm(mat = mat, network = ks_net,
                                 .source = "source", .target = "target",
                                 .mor = "mor", minsize = 5)

qs_save(list(wmean = acts_wmean, ulm = acts_ulm),
        file.path(objects_dir, "kinase_activities.qs2"))

Tidy + export per-contrast tables

Code
#Per contrast comparison
tidy <- function(df, method_label) {
  df %>%
    dplyr::filter(statistic == method_label) %>%
    dplyr::group_by(condition) %>%                          # <-- add this
    dplyr::mutate(padj = p.adjust(p_value, method = "BH")) %>%
    dplyr::ungroup() %>%
    dplyr::rename(Kinase = source, Contrast = condition,
                  Activity = score) %>%
    dplyr::select(Contrast, Kinase, Activity, p_value, padj)
}

wmean_tidy <- tidy(acts_wmean, "norm_wmean")
ulm_tidy   <- tidy(acts_ulm,   "ulm")

fwrite(wmean_tidy, file.path(kinase_dir, "Kinase_activity_wmean.csv"))
fwrite(ulm_tidy,   file.path(kinase_dir, "Kinase_activity_ulm.csv"))

sig_kinases <- ulm_tidy %>%
  dplyr::filter(padj < 0.05) %>%
  dplyr::arrange(Contrast, padj)

#Prints padj values in brackets
sig_kinases %>%
  dplyr::arrange(Contrast, padj) %>%
  dplyr::group_by(Contrast) %>%
  dplyr::summarise(
    n      = dplyr::n(),
    kinases = paste0(Kinase, " (", signif(padj, 2), ")", collapse = ", "),
    .groups = "drop"
  ) %>%
  print()
# A tibble: 2 × 3
  Contrast                      n kinases       
  <chr>                     <int> <chr>         
1 hAppNLGF_FIRE_vs_hAppNLGF     1 Cdk7 (1.3e-05)
2 hApp_FIRE_vs_hApp             1 Mapk3 (0.041) 

Top kinase barplots per contrast (ULM)

Just top 15 activity scores, with no statistical threshold,

Code
top_kinases <- ulm_tidy %>%
  group_by(Contrast) %>%
  arrange(desc(abs(Activity))) %>%
  slice_head(n = 15) %>%
  ungroup() %>%
  mutate(Direction = ifelse(Activity > 0, "Activated", "Inhibited"))


p_kin <- ggplot(top_kinases,
                aes(x = reorder_within(Kinase, Activity, Contrast),
                    y = Activity,
                    fill = Direction)) +
  geom_col() +
  scale_x_reordered() +
  scale_fill_manual(values = c("Activated" = "firebrick3",
                               "Inhibited" = "navy")) +
  facet_wrap(~ Contrast, ncol = 3, scales = "free_y") +
  coord_flip() +
  labs(title = "Top 15 kinases by inferred activity per contrast (ULM)",
       x = NULL, y = "Activity score") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "top",
        strip.text = element_text(face = "bold"),
        panel.grid.major.y = element_blank())

print(p_kin)

Code
ggsave(file.path(kinase_dir, "Top15_kinases_per_contrast.pdf"),
       p_kin, width = 11, height = 6, device = cairo_pdf)

Substrates per contrast (p < 0.05)

For every contrast, the table below lists all phosphorylated substrates with nominal p-value < 0.05 in the MSstatsPTM output that are annotated targets of a kinase in the OmniPath network, sorted by substrate p-value. The ULM kinase activity score and adjusted p-value are joined in as columns so the kinase-level context is filterable alongside the substrate stats.

Code
contrasts_ordered <- sort(unique(as.data.table(sites_long)$Label))

kinase_stats <- as.data.table(ulm_tidy)[, .(Contrast, Kinase,
                                            KinaseActivity = Activity,
                                            KinasePvalue   = p_value,
                                            KinasePadj     = padj)]
ks_dt <- as.data.table(ks_net)[, .(Kinase = source, ID = target)]

build_section <- function(contrast) {

  substrates_dt <- as.data.table(sites_long)[
    Label == contrast & pvalue < 0.05
  ]

  body <- if (nrow(substrates_dt) == 0) {
    htmltools::p(htmltools::em("No substrates with p < 0.05."))
  } else {
    joined <- merge(substrates_dt, ks_dt, by = "ID", allow.cartesian = TRUE)
    if (nrow(joined) == 0) {
      htmltools::p(htmltools::em(
        "No substrates with p < 0.05 mapped to OmniPath kinases."))
    } else {
      joined <- merge(joined,
                      kinase_stats[Contrast == contrast,
                                   .(Kinase, KinaseActivity,
                                     KinasePvalue, KinasePadj)],
                      by = "Kinase", all.x = TRUE)

      tbl <- joined %>%
        as.data.frame() %>%
        dplyr::arrange(pvalue) %>%
        dplyr::mutate(
          log2FC         = round(log2FC,         3),
          Tvalue         = round(Tvalue,         3),
          pvalue         = signif(pvalue,         3),
          adj.pvalue     = signif(adj.pvalue,     3),
          KinaseActivity = round(KinaseActivity,  3),
          KinasePvalue   = signif(KinasePvalue,   3),
          KinasePadj     = signif(KinasePadj,     3)
        ) %>%
        dplyr::select(
          Kinase,
          `Kinase activity` = KinaseActivity,
          `Kinase p-value`  = KinasePvalue,
          `Kinase padj`     = KinasePadj,
          `Substrate site`  = ID,
          Gene,
          Site,
          log2FC,
          `T-value`         = Tvalue,
          `p-value`         = pvalue,
          `adj. p-value`    = adj.pvalue
        )

      DT::datatable(
        tbl,
        rownames = FALSE,
        filter   = "top",
        options  = list(pageLength = 10, scrollX = TRUE)
      )
    }
  }

  htmltools::tagList(htmltools::h3(contrast), body)
}

htmltools::tagList(lapply(contrasts_ordered, build_section))

hApp_FIRE_vs_hApp

hAppNLGF_FIRE_vs_hApp_FIRE

hAppNLGF_FIRE_vs_hAppNLGF

hAppNLGF_vs_hApp

Interaction_APP_x_Microglia