Subsetting a Slide - Answer Key

Authors

Noor Sohail

Will Gammerdinger

Published

September 5, 2025

Exercise 1

  1. The binned output not containing colorectal cancer bins comes from here. Using a loop and the parameters below, subset the non-cancerous slide for 8μm and 16μm bin sizes.
# Set sample name
sample_name <- "P5NAT"

# Set minimums and maximums for x and y
x_min <- 54000
x_max <- 61991
y_min <- 29000
y_max <- 39987

First, we can download the data and uncompress it.

# Set URL for where to download the data from
URL <- "https://cf.10xgenomics.com/samples/spatial-exp/3.0.0/Visium_HD_Human_Colon_Normal_P5/Visium_HD_Human_Colon_Normal_P5_binned_outputs.tar.gz"

# Create directory to hold full dataset
dir.create("data/P5NAT_full/")

# Set destination folder
dest <- file.path("data/P5NAT_full", basename(URL))

# Download data
curl_download(
  url = URL,
  destfile = dest,
  mode = "wb")

# Uncompress tar.gz
untar(tarfile = dest,
      exdir = "data/P5NAT_full")

Next, we will use the same loop as before to for subsetting:

# Selecting the bin sizes to use
bins <- c("008", "016")

# Create directory to hold subsetted data
dir.create(paste0("data/", sample_name, "_subsetted/binned_outputs/"), 
           recursive = TRUE)

# For each bin size
for (bin in bins) {
  # Set path to write data to
  path_bin <- paste0("data/", sample_name, "_subsetted/binned_outputs/square_", bin, "um")
  dir.create(path = path_bin,
             recursive = TRUE)
  
  # Load object
  seurat_obj <- Load10X_Spatial(
    data.dir = "data/P5NAT_full/",
    bin.size = c(as.integer(bin)),
    slice = sample_name)
  
  # Obtain coordinates for our spatial data
  coordinates <- GetTissueCoordinates(seurat_obj)
  
  # Extract bin barcodes based on the defined x and y bounds
  cells <- coordinates %>% 
    subset(x > x_min) %>%
    subset(x < x_max) %>%
    subset(y > y_min) %>%
    subset(y < y_max) %>% 
    row.names()
  
  # Label a column called filter for TRUE
  seurat_obj$filter <- TRUE
  
  # Re-assign the value of FALSE to the filter column if the bin is within subsetted_bins
  seurat_obj@meta.data[cells, "filter"] <- FALSE
  
  # Retain the bins which have FALSE for filter in the meta data
  filtered_seurat_obj <- subset(seurat_obj, filter == FALSE)
  
  # Copy spatial images from original folder
  dir.create(path = paste0(path_bin, "/spatial"),
             recursive = TRUE)
  
    # Set path for JSON file
  path_json <- paste0("data/P5NAT_full/binned_outputs/square_", bin, "um/spatial/scalefactors_json.json")
  # Set path for Parquet file
  path_parq <- paste0("data/P5NAT_full/binned_outputs/square_", bin, "um/spatial/tissue_positions.parquet")
  # Set path for PNG file
  path_png  <- paste0("data/P5NAT_full/binned_outputs/square_", bin, "um/spatial/tissue_lowres_image.png")
  
  # Combine JSON, Parquet and PNG file paths into a vector
  files_spatial <- c(path_json, path_parq, path_png)

  # Copy JSON, Parquet and PNG files into our spatial folder
  file.copy(from = files_spatial,
            to = paste0(path_bin, "/spatial"))
  
  # Write counts to h5 file
  write10xCounts(path = paste0(path_bin, "/filtered_feature_bc_matrix.h5"),
                 x=LayerData(filtered_seurat_obj),
                 barcodes=colnames(filtered_seurat_obj),
                 gene.id=rownames(filtered_seurat_obj),
                 version="3",
                 type="HDF5",
                 overwrite = TRUE)
  
  # Remove seurat_obj
  rm(seurat_obj)
  # Remove filtered_seurat_obj
  rm(filtered_seurat_obj)
}

After running this loop, the directory structure should now look like:

data/P5NAT_subsetted/binned_outputs/
├── square_008um
│   ├── filtered_feature_bc_matrix.h5
│   └── spatial
│       ├── scalefactors_json.json
│       ├── tissue_lowres_image.png
│       └── tissue_positions.parquet
└── square_016um
    ├── filtered_feature_bc_matrix.h5
    └── spatial
        ├── scalefactors_json.json
        ├── tissue_lowres_image.png
        └── tissue_positions.parquet

We can also double check that the bins in the subsetted dataset from the loop match the provided cropped dataset:

# Load subsetted data
subsetted_seurat_obj <- Load10X_Spatial(
  data.dir = "data/P5NAT_subsetted/",
  bin.size = 16,
  slice = "P5NAT")

# Load provided data
provided_seurat_object <- Load10X_Spatial(
  data.dir = "data/P5NAT_cropped/",
  bin.size = 16,
  slice = "P5NAT")

# Check to ensure that both data sets have the same bins
all(colnames(subsetted_seurat_obj) == colnames(provided_seurat_object))
[1] TRUE

Back to Lesson >>

Back to Schedule