6. PPI Network Analysis

Overview

Protein–protein interaction networks for the significant proteins of each MSstats contrast, using STRINGdb (mouse, physical interactions, score ≥ 700). For each contrast we build separate networks for upregulated and downregulated proteins, identify hub proteins by degree centrality, and export per-contrast hub tables.

Adapted from millie_proteomics/notebooks/08_PPI.qmd, with this project’s contrast set (four pairwise + the APP × Microglia interaction).

Libraries

Code
library(data.table)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:data.table':

    between, first, last
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Code
library(tidyr)
library(ggplot2)
library(ggrepel)
library(stringr)
library(STRINGdb)
library(igraph)

Attaching package: 'igraph'
The following object is masked from 'package:tidyr':

    crossing
The following objects are masked from 'package:dplyr':

    as_data_frame, groups, union
The following objects are masked from 'package:stats':

    decompose, spectrum
The following object is masked from 'package:base':

    union
Code
library(qs2)
qs2 0.1.6
Code
library(scales)

Directories and parameters

Code
base_dir    <- "/nemo/lab/destrooperb/home/shared/zanettc/giulia_proteomics/bulk_proteomics"
run_num     <- "run4"
results_dir <- file.path(base_dir, "results", run_num)
objects_dir <- file.path(base_dir, "data", "processed", run_num)
ppi_dir     <- file.path(results_dir, "PPI_networks")
dir.create(ppi_dir, recursive = TRUE, showWarnings = FALSE)

FDR_CUTOFF   <- 0.05
LOGFC_CUTOFF <- 0.5
STRING_SCORE <- 700
TOP_N        <- 30   # nodes shown in the top-N subnetwork plot

Load and prepare DE results

Code
res_df             <- fread(file.path(results_dir, "Full_GroupComparison_Results.csv"))
protein_dictionary <- fread(file.path(objects_dir, "protein_dictionary.csv"))

msstats_ready <- res_df %>%
  filter(!is.na(adj.pvalue), is.finite(log2FC)) %>%
  left_join(protein_dictionary, by = "Protein") %>%
  mutate(
    Gene = ifelse(is.na(Gene) | Gene == "", Protein, Gene),
    Status = case_when(
      adj.pvalue < FDR_CUTOFF & log2FC >  LOGFC_CUTOFF ~ "Upregulated",
      adj.pvalue < FDR_CUTOFF & log2FC < -LOGFC_CUTOFF ~ "Downregulated",
      TRUE ~ "Not Significant"
    )
  )

Initialise STRINGdb (mouse, physical, high-confidence)

Code
# Crick cluster blocks HTTPS with SSL; wget --no-check-certificate bypasses it
options(download.file.method = "wget",
        download.file.extra  = "--no-check-certificate")

stringdb_cache <- "/nemo/lab/destrooperb/home/shared/zanettc/giulia_proteomics/bulk_proteomics/data/stringdb_cache"
dir.create(stringdb_cache, recursive = TRUE, showWarnings = FALSE)

string_db <- STRINGdb$new(
  version         = "12.0",
  species         = 10090,
  score_threshold = STRING_SCORE,
  network_type    = "physical",
  input_directory = stringdb_cache
)

Build networks per contrast and direction

Code
contrasts <- c(
  "hAppNLGF_vs_hApp",
  "hApp_FIRE_vs_hApp",
  "hAppNLGF_FIRE_vs_hAppNLGF",
  "hAppNLGF_FIRE_vs_hApp_FIRE",
  "Interaction_APP_x_Microglia"
)

build_network <- function(genes_df) {
  if (nrow(genes_df) < 5) return(NULL)
  mapped <- string_db$map(genes_df, "Gene", removeUnmappedRows = TRUE)
  if (nrow(mapped) < 5) return(NULL)
  inter <- string_db$get_interactions(mapped$STRING_id)
  if (nrow(inter) == 0)
    return(list(proteins = mapped, interactions = inter, graph = NULL))
  g <- graph_from_data_frame(
    inter %>% dplyr::select(from, to, combined_score),
    directed = FALSE,
    vertices = mapped %>% dplyr::select(STRING_id, Gene, log2FC)
  ) %>% simplify()
  V(g)$degree <- degree(g)
  list(proteins = mapped, interactions = inter, graph = g)
}

network_results <- list()

for (comp in contrasts) {
  cat("\n============================\n", comp, "\n============================\n")
  for (dir_lab in c("Upregulated", "Downregulated")) {
    sig <- msstats_ready %>%
      filter(Label == comp, Status == dir_lab) %>%
      dplyr::select(Gene, log2FC, adj.pvalue) %>%
      distinct(Gene, .keep_all = TRUE) %>%
      as.data.frame()
    cat(sprintf("  %-13s n = %d\n", dir_lab, nrow(sig)))
    nr <- build_network(sig)
    if (!is.null(nr) && !is.null(nr$graph))
      cat(sprintf("    -> nodes %d | edges %d\n",
                  vcount(nr$graph), ecount(nr$graph)))
    network_results[[paste(comp, dir_lab, sep = "__")]] <- nr
  }
}

============================
 hAppNLGF_vs_hApp 
============================
  Upregulated   n = 4
  Downregulated n = 13

============================
 hApp_FIRE_vs_hApp 
============================
  Upregulated   n = 8
    -> nodes 8 | edges 1
  Downregulated n = 66
    -> nodes 66 | edges 19

============================
 hAppNLGF_FIRE_vs_hAppNLGF 
============================
  Upregulated   n = 16
  Downregulated n = 42
    -> nodes 42 | edges 9

============================
 hAppNLGF_FIRE_vs_hApp_FIRE 
============================
  Upregulated   n = 3
  Downregulated n = 2

============================
 Interaction_APP_x_Microglia 
============================
  Upregulated   n = 1
  Downregulated n = 4

Visualise full networks (one PDF per contrast/direction)

Code
plot_full_network <- function(nr, label) {
  if (is.null(nr) || is.null(nr$graph) || ecount(nr$graph) == 0) return(invisible())
  g_plot <- delete_vertices(nr$graph, V(nr$graph)[degree(nr$graph) == 0])
  if (vcount(g_plot) < 3) return(invisible())

  V(g_plot)$label       <- V(g_plot)$Gene
  V(g_plot)$size        <- rescale(degree(g_plot), to = c(5, 20))
  V(g_plot)$label.cex   <- rescale(degree(g_plot), to = c(0.6, 1.3))
  V(g_plot)$label.font  <- 2
  V(g_plot)$color       <- "steelblue"
  V(g_plot)$frame.color <- "grey30"
  E(g_plot)$color       <- "grey75"
  E(g_plot)$width       <- 0.8

  top_hubs <- names(sort(degree(g_plot), decreasing = TRUE))[1:min(10, vcount(g_plot))]
  V(g_plot)$color[V(g_plot)$name %in% top_hubs] <- "firebrick3"

  pdf(file.path(ppi_dir, paste0("network_full_", label, ".pdf")), width = 14, height = 14)
  set.seed(42)
  plot(g_plot,
       layout = layout_with_fr(g_plot),
       vertex.label.color = "black",
       main = paste0(label, "\nSTRING physical >= ", STRING_SCORE,
                     " | Nodes: ", vcount(g_plot),
                     " | Edges: ", ecount(g_plot),
                     " | Red = top 10 hubs"))
  dev.off()

  set.seed(42)
  plot(g_plot,
       layout = layout_with_fr(g_plot),
       vertex.label.color = "black",
       main = paste0(label, "\nNodes: ", vcount(g_plot),
                     " | Edges: ", ecount(g_plot)))
}

for (key in names(network_results))
  plot_full_network(network_results[[key]], key)

Top-N subnetwork plots — focused view of strongest hubs

Code
plot_top_network <- function(nr, label, top_n = TOP_N) {
  if (is.null(nr) || is.null(nr$graph) || ecount(nr$graph) == 0) return(invisible())
  g <- nr$graph
  top_nodes <- names(sort(degree(g), decreasing = TRUE))[1:min(top_n, vcount(g))]
  g_top <- induced_subgraph(g, top_nodes) %>%
    (function(x) delete_vertices(x, V(x)[degree(x) == 0]))()
  if (vcount(g_top) < 3) return(invisible())

  V(g_top)$label       <- V(g_top)$Gene
  V(g_top)$size        <- rescale(degree(g_top), to = c(8, 22))
  V(g_top)$label.cex   <- 1.0
  V(g_top)$label.font  <- 2
  V(g_top)$frame.color <- "grey30"
  E(g_top)$color       <- "grey70"
  E(g_top)$width       <- 1.2

  fc <- V(g_top)$log2FC
  pal <- colorRampPalette(c("navy", "white", "firebrick3"))(100)
  fc_idx <- findInterval(fc, seq(min(fc, na.rm = TRUE),
                                 max(fc, na.rm = TRUE),
                                 length.out = 100))
  fc_idx[fc_idx == 0] <- 1
  V(g_top)$color <- pal[fc_idx]

  pdf(file.path(ppi_dir, paste0("network_top", top_n, "_", label, ".pdf")),
      width = 12, height = 12)
  set.seed(42)
  plot(g_top, layout = layout_with_fr(g_top),
       vertex.label.color = "black",
       main = paste0(label, " — top ", vcount(g_top), " by degree\n",
                     "Coloured by log2FC | Edges: ", ecount(g_top)))
  legend_vals   <- seq(min(fc, na.rm = TRUE), max(fc, na.rm = TRUE), length.out = 5)
  legend_colors <- colorRampPalette(c("navy", "white", "firebrick3"))(5)
  legend("bottomright", legend = round(legend_vals, 2),
         fill = legend_colors, title = "log2FC", bty = "n", cex = 0.8)
  dev.off()

  set.seed(42)
  plot(g_top, layout = layout_with_fr(g_top),
       vertex.label.color = "black",
       main = paste0(label, " — top ", vcount(g_top), " by degree"))
  legend("bottomright", legend = round(legend_vals, 2),
         fill = legend_colors, title = "log2FC", bty = "n", cex = 0.8)
}

for (key in names(network_results))
  plot_top_network(network_results[[key]], key)

Hub protein summary tables

Code
hub_list <- list()

for (key in names(network_results)) {
  nr <- network_results[[key]]
  if (is.null(nr) || is.null(nr$graph)) next
  g <- nr$graph
  hub_df <- data.frame(
    Gene       = V(g)$Gene,
    log2FC     = V(g)$log2FC,
    Degree     = degree(g),
    Comparison = key
  ) %>% filter(Degree > 0) %>% arrange(desc(Degree))
  hub_list[[key]] <- hub_df
  fwrite(hub_df, file.path(ppi_dir, paste0("hub_proteins_", key, ".csv")))

  cat("\n--- Top 20 hubs:", key, "---\n")
  print(head(hub_df, 20))
}

--- Top 20 hubs: hApp_FIRE_vs_hApp__Upregulated ---
                         Gene    log2FC Degree                     Comparison
10090.ENSMUSP00000017488  Vtn 1.8811277      1 hApp_FIRE_vs_hApp__Upregulated
10090.ENSMUSP00000022616  Clu 0.5575064      1 hApp_FIRE_vs_hApp__Upregulated

--- Top 20 hubs: hApp_FIRE_vs_hApp__Downregulated ---
                            Gene     log2FC Degree
10090.ENSMUSP00000038838     Lyn -1.4974361      4
10090.ENSMUSP00000113852     Syk -2.2769085      4
10090.ENSMUSP00000000299   Itgb2 -3.6921908      3
10090.ENSMUSP00000023531   Hcls1 -3.6457404      3
10090.ENSMUSP00000081263   Wasf2 -1.3852725      3
10090.ENSMUSP00000040246    C1qb -5.7305583      2
10090.ENSMUSP00000078875  Fcer1g -3.3277773      2
10090.ENSMUSP00000004377   Ptpn6 -2.6064623      2
10090.ENSMUSP00000048836    C1qa -5.7201826      2
10090.ENSMUSP00000036747    C1qc -5.3031844      2
10090.ENSMUSP00000061893    Abi3 -3.2594921      2
10090.ENSMUSP00000035400 Nckap1l -2.4335139      2
10090.ENSMUSP00000014290 Apbb1ip -2.1724247      2
10090.ENSMUSP00000068468  P05555 -2.7276422      1
10090.ENSMUSP00000113071     Msn -0.5312957      1
10090.ENSMUSP00000032561    Vasp -0.8751803      1
10090.ENSMUSP00000106013  Ripor2 -1.0608928      1
10090.ENSMUSP00000037858  Fermt3 -3.3957648      1
                                               Comparison
10090.ENSMUSP00000038838 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000113852 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000000299 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000023531 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000081263 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000040246 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000078875 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000004377 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000048836 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000036747 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000061893 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000035400 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000014290 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000068468 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000113071 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000032561 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000106013 hApp_FIRE_vs_hApp__Downregulated
10090.ENSMUSP00000037858 hApp_FIRE_vs_hApp__Downregulated

--- Top 20 hubs: hAppNLGF_FIRE_vs_hAppNLGF__Downregulated ---
                            Gene    log2FC Degree
10090.ENSMUSP00000000299   Itgb2 -3.390713      3
10090.ENSMUSP00000068468  P05555 -3.032191      2
10090.ENSMUSP00000083587   Icam1 -1.888835      2
10090.ENSMUSP00000040246    C1qb -6.055242      2
10090.ENSMUSP00000048836    C1qa -5.669790      2
10090.ENSMUSP00000036747    C1qc -6.145870      2
10090.ENSMUSP00000004377   Ptpn6 -2.323901      1
10090.ENSMUSP00000061893    Abi3 -3.218126      1
10090.ENSMUSP00000037858  Fermt3 -3.090226      1
10090.ENSMUSP00000035400 Nckap1l -2.504796      1
10090.ENSMUSP00000014290 Apbb1ip -1.923074      1
                                                       Comparison
10090.ENSMUSP00000000299 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000068468 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000083587 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000040246 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000048836 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000036747 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000004377 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000061893 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000037858 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000035400 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
10090.ENSMUSP00000014290 hAppNLGF_FIRE_vs_hAppNLGF__Downregulated
Code
if (length(hub_list) > 0) {
  hub_combined <- bind_rows(hub_list)
  fwrite(hub_combined, file.path(ppi_dir, "hub_proteins_all.csv"))
}

Network summary statistics

Code
summary_table <- data.frame(
  Comparison       = character(),
  Direction        = character(),
  Sig_proteins     = integer(),
  Mapped_to_STRING = integer(),
  Nodes_with_edges = integer(),
  Edges            = integer(),
  stringsAsFactors = FALSE
)

for (key in names(network_results)) {
  parts <- strsplit(key, "__", fixed = TRUE)[[1]]
  comp <- parts[1]; dir_lab <- parts[2]
  nr <- network_results[[key]]
  g  <- if (!is.null(nr)) nr$graph else NULL
  n_with_edges <- if (!is.null(g)) sum(degree(g) > 0) else 0
  n_edges      <- if (!is.null(g)) ecount(g) else 0
  sig_n <- nrow(msstats_ready %>%
                  filter(Label == comp, Status == dir_lab) %>%
                  distinct(Gene))
  summary_table <- rbind(summary_table, data.frame(
    Comparison       = comp,
    Direction        = dir_lab,
    Sig_proteins     = sig_n,
    Mapped_to_STRING = if (!is.null(nr)) nrow(nr$proteins) else 0,
    Nodes_with_edges = n_with_edges,
    Edges            = n_edges
  ))
}

print(summary_table)
                 Comparison     Direction Sig_proteins Mapped_to_STRING
1          hAppNLGF_vs_hApp Downregulated           13               13
2         hApp_FIRE_vs_hApp   Upregulated            8                8
3         hApp_FIRE_vs_hApp Downregulated           66               66
4 hAppNLGF_FIRE_vs_hAppNLGF   Upregulated           16               16
5 hAppNLGF_FIRE_vs_hAppNLGF Downregulated           42               42
  Nodes_with_edges Edges
1                0     0
2                2     1
3               18    19
4                0     0
5               11     9
Code
fwrite(summary_table, file.path(ppi_dir, "network_summary.csv"))

Session info

Code
sessionInfo()
R version 4.5.1 (2025-06-13)
Platform: x86_64-pc-linux-gnu
Running under: Rocky Linux 8.7 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: FlexiBLAS OPENBLAS;  LAPACK version 3.12.0

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
 [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
 [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/London
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] scales_1.4.0        qs2_0.1.6           igraph_2.2.2       
 [4] STRINGdb_2.22.0     stringr_1.6.0       ggrepel_0.9.6      
 [7] ggplot2_4.0.2       tidyr_1.3.2         dplyr_1.2.0        
[10] data.table_1.18.2.1

loaded via a namespace (and not attached):
 [1] generics_0.1.4        gplots_3.3.0          bitops_1.0-9         
 [4] KernSmooth_2.23-26    gtools_3.9.5          RSQLite_2.4.5        
 [7] stringi_1.8.7         digest_0.6.39         magrittr_2.0.4       
[10] caTools_1.18.3        evaluate_1.0.5        grid_4.5.1           
[13] RColorBrewer_1.1-3    blob_1.2.4            fastmap_1.2.0        
[16] plyr_1.8.9            jsonlite_2.0.0        DBI_1.2.3            
[19] httr_1.4.8            purrr_1.2.1           cli_3.6.5            
[22] rlang_1.1.7           bit64_4.6.0-1         plotrix_3.8-14       
[25] gsubfn_0.7            cachem_1.1.0          withr_3.0.2          
[28] yaml_2.3.12           otel_0.2.0            proto_1.0.0          
[31] tools_4.5.1           memoise_2.0.1         hash_2.2.6.4         
[34] vctrs_0.7.1           R6_2.6.1              png_0.1-8            
[37] lifecycle_1.0.5       stringfish_0.17.0     bit_4.6.0            
[40] htmlwidgets_1.6.4     pkgconfig_2.0.3       RcppParallel_5.1.11-1
[43] pillar_1.11.1         gtable_0.3.6          glue_1.8.0           
[46] Rcpp_1.1.1            xfun_0.56             tibble_3.3.1         
[49] tidyselect_1.2.1      knitr_1.51            farver_2.1.2         
[52] sqldf_0.4-12          htmltools_0.5.9       rmarkdown_2.30       
[55] compiler_4.5.1        S7_0.2.1              chron_2.3-62