Differential Gene Expression - Answer Key

Author

Noor Sohail

Published

April 29, 2026

Exercise 1

  1. Pick a different cell type and run FindMarkers(). Name the population you chose and print the top 6 genes in the results.
# Vector of unique cell types
cell_types <- seurat_rctd$first_type %>%
  unique() %>% sort()

# Store DGE results
list_dge <- list()

for (ct in cell_types) {
  # Subset to cell type
  seurat_sub <- subset(seurat_rctd,
                        first_type == ct)
  Idents(seurat_sub) <- "orig.ident"

  # Calculate DGEs with wilcox test
  dge <- FindMarkers(seurat_sub,
                    ident.1 = "P5CRC",
                    ident.2 = "P5NAT")
  dge$gene <- rownames(dge)
  # Store full DGE results
  list_dge[[ct]] <- dge

  # Filter significant genes
  dge_sig <- dge %>% subset(p_val_adj < 0.05)
  # Get the gene names and get the first 6 values
  genes <- dge_sig %>%
    pull(gene) %>%
    head(6)

  # Print top 6 genes
  print(ct)
  print(genes)
}
[1] "B cells"
[1] "IGHA1"   "DUSP1"   "FOS"     "TSC22D3" "RGS1"    "IGHM"   
[1] "Endothelial"
[1] "COL4A1"  "PDK4"    "COL15A1" "IGHA1"   "SPARC"   "LCN2"   
[1] "Fibroblast"
[1] "DUSP1"   "FOS"     "PDK4"    "TSC22D3" "CD74"    "JUN"    
[1] "Intestinal Epithelial"
[1] "MUC2"  "CLCA1" "ZG16"  "TFF3"  "FCGBP" "FABP1"
[1] "Myeloid"
[1] "SRGN"   "SOD2"   "CXCL8"  "S100A9" "B2M"    "S100A8"
[1] "Neuronal"
[1] "B2M"   "CD74"  "DUSP1" "IGHA1" "IGHM"  "CLCA1"
[1] "Smooth Muscle"
[1] "CCN1"  "DUSP1" "CCN2"  "RGS5"  "PDK4"  "RHOB" 
[1] "T cells"
[1] "CXCL13" "TXNIP"  "CXCL14" "VIM"    "CLU"    "DUSP1" 
[1] "Tumor"
[1] "JCHAIN" "CHGA"   "LCN2"   "TXNIP"  "PDK4"   "SMOC2" 
[1] "unassigned"
[1] "MT-CO3"  "MT-ATP6" "MT-CO2"  "MT-ND4"  "MT-CYB"  "PIGR"   

Exercise 2

  1. Run an ORA analysis on the cell type that you calculated DE results for in the previous exercise. What is the top pathway that appears?
for (ct in names(list_dge)) {
  dge <- list_dge[[ct]]

  # Background genes
  all_genes <- as.character(dge$gene)

  # Significant up-regulated genes
  sigUp <- dplyr::filter(dge,
                         p_val_adj < 0.05,
                         avg_log2FC > 0)
  sigUp_genes <- as.character(sigUp$gene)

  # GO enrichment
  egoUp <- enrichGO(gene = sigUp_genes,
                    universe = all_genes,
                    keyType = "SYMBOL",
                    OrgDb = org.Hs.eg.db,
                    ont = "BP",
                    pAdjustMethod = "BH",
                    qvalueCutoff = 0.05,
                    readable = TRUE)

  cluster_summaryUp <- data.frame(egoUp)
  print(paste(ct, ":", egoUp$Description[1]))
}
[1] "B cells : immune effector process"
[1] "Endothelial : positive regulation of cell migration"
[1] "Fibroblast : inflammatory response"
[1] "Intestinal Epithelial : humoral immune response"
[1] "Myeloid : inflammatory response"
[1] "Neuronal : antibacterial humoral response"
[1] "Smooth Muscle : complement activation"
[1] "T cells : cell killing"
[1] "Tumor : viral life cycle"
[1] "unassigned : proton transmembrane transport"

Exercise 3

  1. Run GSEA on the cell type that you calculated DE results for in the first exercise. What is the top pathway that appears?
# Use a specific collection; C5 GO signatures
m_t2g <- msigdbr(species = "Homo sapiens",
                 collection = "C5") %>%
  dplyr::select(gs_name, gene_symbol)

for (ct in names(list_dge)) {
  dge <- list_dge[[ct]]
  
  # Filter significant genes
  dge_sig <- subset(dge, p_val_adj < 0.05)
  
  # Extract fold changes
  foldchanges <- dge_sig$avg_log2FC
  
  # Name each fold change with the corresponding gene symbol
  names(foldchanges) <- dge_sig$gene
  
  # Sort fold changes in decreasing order
  foldchanges <- sort(foldchanges, decreasing = TRUE)
  
  # Run GSEA
  msig_GSEA <- GSEA(foldchanges,
                    TERM2GENE = m_t2g,
                    verbose   = FALSE)
  
  # Extract the GSEA results
  msigGSEA_results <- msig_GSEA@result
  
  # Order by NES and pull first row's ID
  top_id <- msigGSEA_results %>%
    arrange(desc(abs(NES))) %>%
    head(1) %>%
    pull(ID)
  
  print(paste(ct, ":", top_id))
}
[1] "B cells : GOCC_SECRETORY_GRANULE"
[1] "Endothelial : GOBP_ANTIMICROBIAL_HUMORAL_RESPONSE"
[1] "Fibroblast : GOBP_RESPONSE_TO_MECHANICAL_STIMULUS"
[1] "Intestinal Epithelial : GOBP_NUCLEAR_CHROMOSOME_SEGREGATION"
[1] "Myeloid : HP_RESPIRATORY_INSUFFICIENCY"
[1] "Neuronal : "
[1] "Smooth Muscle : "
[1] "T cells : GOBP_ANTIMICROBIAL_HUMORAL_IMMUNE_RESPONSE_MEDIATED_BY_ANTIMICROBIAL_PEPTIDE"
[1] "Tumor : GOBP_NEURON_PROJECTION_GUIDANCE"
[1] "unassigned : GOCC_EXTERNAL_ENCAPSULATING_STRUCTURE"

Back to Lesson >>

Back to Schedule

Reuse

CC-BY-4.0