# Set sample name
sample_name <- "P5NAT"
# Set minimums and maximums for x and y
x_min <- 54000
x_max <- 61991
y_min <- 29000
y_max <- 39987Subsetting a Slide - Answer Key
Exercise 1
- The binned output not containing colorectal cancer bins comes from here. Using a loop and the parameters below, subset the non-cancerous slide for 8μm and 16μm bin sizes.
First, we can download the data and uncompress it.
# Set URL for where to download the data from
URL <- "https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Normal_P5/Visium_HD_Human_Colon_Normal_P5_binned_outputs.tar.gz"
# Create directory to hold full dataset
dir.create("data/P5NAT_full/")
# Set destination folder
dest <- file.path("data/P5NAT_full", basename(URL))
# Download data
curl_download(
url = URL,
destfile = dest,
mode = "wb")
# Uncompress tar.gz
untar(tarfile = dest,
exdir = "data/P5NAT_full")Next, we will use the same loop as before to for subsetting:
# Selecting the bin sizes to use
bins <- c("008", "016")
# Create directory to hold subsetted data
dir.create(paste0("data/", sample_name, "_subsetted/binned_outputs/"),
recursive = TRUE)
# For each bin size
for (bin in bins) {
# Set path to write data to
path_bin <- paste0("data/", sample_name, "_subsetted/binned_outputs/square_", bin, "um")
dir.create(path = path_bin,
recursive = TRUE)
# Load object
seurat_obj <- Load10X_Spatial(
data.dir = "data/P5NAT_full/",
bin.size = c(as.integer(bin)),
slice = sample_name)
# Obtain coordinates for our spatial data
coordinates <- GetTissueCoordinates(seurat_obj)
# Extract bin barcodes based on the defined x and y bounds
cells <- coordinates %>%
subset(x > x_min) %>%
subset(x < x_max) %>%
subset(y > y_min) %>%
subset(y < y_max) %>%
row.names()
# Label a column called filter for TRUE
seurat_obj$filter <- TRUE
# Re-assign the value of FALSE to the filter column if the bin is within subsetted_bins
seurat_obj@meta.data[cells, "filter"] <- FALSE
# Retain the bins which have FALSE for filter in the meta data
filtered_seurat_obj <- subset(seurat_obj, filter == FALSE)
# Copy spatial images from original folder
dir.create(path = paste0(path_bin, "/spatial"),
recursive = TRUE)
# Set path for JSON file
path_json <- paste0("data/P5NAT_full/binned_outputs/square_", bin, "um/spatial/scalefactors_json.json")
# Set path for Parquet file
path_parq <- paste0("data/P5NAT_full/binned_outputs/square_", bin, "um/spatial/tissue_positions.parquet")
# Set path for PNG file
path_png <- paste0("data/P5NAT_full/binned_outputs/square_", bin, "um/spatial/tissue_lowres_image.png")
# Combine JSON, Parquet and PNG file paths into a vector
files_spatial <- c(path_json, path_parq, path_png)
# Copy JSON, Parquet and PNG files into our spatial folder
file.copy(from = files_spatial,
to = paste0(path_bin, "/spatial"))
# Write counts to h5 file
write10xCounts(path = paste0(path_bin, "/filtered_feature_bc_matrix.h5"),
x=LayerData(filtered_seurat_obj),
barcodes=colnames(filtered_seurat_obj),
gene.id=rownames(filtered_seurat_obj),
version="3",
type="HDF5",
overwrite = TRUE)
# Remove seurat_obj
rm(seurat_obj)
# Remove filtered_seurat_obj
rm(filtered_seurat_obj)
}After running this loop, the directory structure should now look like:
data/P5NAT_subsetted/binned_outputs/
├── square_008um
│ ├── filtered_feature_bc_matrix.h5
│ └── spatial
│ ├── scalefactors_json.json
│ ├── tissue_lowres_image.png
│ └── tissue_positions.parquet
└── square_016um
├── filtered_feature_bc_matrix.h5
└── spatial
├── scalefactors_json.json
├── tissue_lowres_image.png
└── tissue_positions.parquet
We can also double check that the bins in the subsetted dataset from the loop match the provided cropped dataset:
# Load subsetted data
subsetted_seurat_obj <- Load10X_Spatial(
data.dir = "data/P5NAT_subsetted/",
bin.size = 16,
slice = "P5NAT")
# Load provided data
provided_seurat_object <- Load10X_Spatial(
data.dir = "data/P5NAT_cropped/",
bin.size = 16,
slice = "P5NAT")
# Check to ensure that both data sets have the same bins
all(colnames(subsetted_seurat_obj) == colnames(provided_seurat_object))[1] TRUE