library(AnnotationHub)
library(tidyverse)Fetching Annotations
R Programming
This lesson introduces participants to retrieving gene-level annotations using the AnnotationHub packages. Participants will learn how to connect to AnnotationHub, query Ensembl databases, download the appropriate annotation resources and extract essential gene information for downstream analysis.
Keywords
R, AnnotationHub, Ensembl, Gene annotations
# Connect to AnnotationHub
ah <- AnnotationHub()
# Access the Ensembl database for organism
ahDb <- query(ah,
pattern = c("Homo sapiens", "EnsDb"),
ignore.case = TRUE)
# Acquire the latest annotation files
id <- ahDb %>%
mcols() %>%
rownames() %>%
tail(n = 1)
# Download the appropriate Ensembldb database
edb <- ah[[id]]
# Extract gene-level information from database
annotations <- genes(edb,
return.type = "data.frame")
# Select annotations of interest
annotations <- annotations %>%
dplyr::select(gene_id, gene_name, seq_name, gene_biotype, description)
head(annotations) gene_id gene_name seq_name gene_biotype
1 ENSG00000290825 DDX11L16 1 lncRNA
3 ENSG00000223972 DDX11L1 1 transcribed_unprocessed_pseudogene
4 ENSG00000310526 WASH7P 1 lncRNA
5 ENSG00000227232 WASH7P 1 transcribed_unprocessed_pseudogene
6 ENSG00000278267 MIR6859-1 1 miRNA
7 ENSG00000243485 MIR1302-2HG 1 lncRNA
description
1 DEAD/H-box helicase 11 like 16 (pseudogene) [Source:NCBI gene (formerly Entrezgene);Acc:727856]
3 DEAD/H-box helicase 11 like 1 (pseudogene) [Source:HGNC Symbol;Acc:HGNC:37102]
4 WASP family homolog 7, pseudogene [Source:HGNC Symbol;Acc:HGNC:38034]
5 WASP family homolog 7, pseudogene [Source:HGNC Symbol;Acc:HGNC:38034]
6 microRNA 6859-1 [Source:HGNC Symbol;Acc:HGNC:50039]
7 MIR1302-2 host gene [Source:HGNC Symbol;Acc:HGNC:52482]
Reuse
CC-BY-4.0