# Extract significant results for down-regulated
<- dplyr::filter(dge_deseq2_noNAs, padj < 0.05, log2FoldChange < 0)
sigDown <- as.character(sigDown$gene)
sigDown_genes
# Run GO enrichment analysis
<- enrichGO(gene = sigDown_genes,
egoDown universe = all_genes,
keyType = "SYMBOL",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE)
# Output results from GO analysis to a table
<- data.frame(egoDown)
cluster_summaryDown
# Save results
write.csv(cluster_summaryDown, "results/clusterProfiler_VSM_TNvsCold7_downregulated.csv")
Set-up DESeq2 analysis - Answer Key
Exercise 1
Using the code above as a template, run the over-representation analysis on the significantly down-regulated genes from the pseudobulk analysis.
- How many significant terms do you find?
# Number of significantlt down-regulated terms
nrow(cluster_summaryDown)
[1] 108
- What are some of the prominent biological processes that are observed?
head(cluster_summaryDown$Description)
[1] "cytoplasmic translation"
[2] "translation at presynapse"
[3] "translation at synapse"
[4] "translation at postsynapse"
[5] "regulation of mRNA splicing, via spliceosome"
[6] "regulation of RNA splicing"
Exercise 2
Now that we have run through functional analysis with the results from Pseudobulk DE, let’s see what results we derive from the DGE lists from our FindMarkers DE analysis.
Create a significant DE genes data frame from the FindMarkers results with an added fold change criteria to reduce the gene list size. You can do this by running the code below:
# Create significant DE genes from FindMarkers()
<- dge_vsm %>% dplyr::filter(p_val_adj < 0.05, abs(avg_log2FC) > 1) sig_fc_dge
- Use this gene list to run an over-representation analysis. Be sure to separate genes into up- and down-regulated first. Also keep in mind that the background gene dataset is different than for the DESeq2 analysis.
# Create background dataset for hypergeometric testing using all tested genes for significance in the results
<- dge_vsm$gene
all_genes_fm
# Extract significant results for up- and down-regulated
<- dplyr::filter(sig_fc_dge, avg_log2FC > 0)
sigUp_fm <- sigUp_fm$gene
sigUp_fm_genes <- dplyr::filter(sig_fc_dge, avg_log2FC < 0)
sigDown_fm <- sigDown_fm$gene
sigDown_fm_genes
# Run GO enrichment analysis
<- enrichGO(gene = sigUp_fm_genes,
egoUp_fm universe = all_genes_fm,
keyType = "SYMBOL",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE)
<- enrichGO(gene = sigDown_fm_genes,
egoDown_fm universe = all_genes_fm,
keyType = "SYMBOL",
OrgDb = org.Mm.eg.db,
ont = "BP",
pAdjustMethod = "BH",
qvalueCutoff = 0.05,
readable = TRUE)
# Output results from GO analysis to a table
<- data.frame(egoUp_fm)
cluster_summaryUp_fm <- data.frame(egoDown_fm) cluster_summaryDown_fm
What are the top terms enriched among up-regulated genes?
# Print top up-regulated terms from FindMarkers()
head(cluster_summaryUp_fm$Description)
[1] "extracellular matrix organization"
[2] "extracellular structure organization"
[3] "external encapsulating structure organization"
[4] "regulation of cell adhesion"
[5] "blood vessel development"
[6] "cell-substrate adhesion"
What are the top terms enriched among down-regulated genes?
# Print top down-regulated terms from FindMarkers()
head(cluster_summaryDown_fm$Description)
[1] "muscle tissue development" "fat cell differentiation"
[3] "brown fat cell differentiation" "striated muscle tissue development"
- How do these results compare with what we observed from the Pseudobulk DE functional analysis?
In the pseudobulk results, we also saw terms related to the extracellular matrix and cell adhesion upregulated. However, the downregulated results are different: for FindMarkers we see muscle and fat development, whereas for pseudobulk we saw RNA splicing and translation downregulated.