Slide-seq_MOB

[1]:

import warnings
warnings.filterwarnings("ignore")
import MENDER
import scanpy as sc
import pandas as pd
import numpy as np
from sklearn.metrics import *
import time

[2]:

## load the data using pysodb, please install pysodb in advance [https://pysodb.readthedocs.io/en/latest/]
import pysodb
sodb = pysodb.SODB()
adata_raw = sodb.load_experiment('stickels2020highly','stickels2021highly_SlideSeqV2_Mouse_Olfactory_bulb_Puck_200127_15')

download experiment[stickels2021highly_SlideSeqV2_Mouse_Olfactory_bulb_Puck_200127_15] in dataset[stickels2020highly]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 114M/114M [00:08<00:00, 14.0MB/s]

load experiment[stickels2021highly_SlideSeqV2_Mouse_Olfactory_bulb_Puck_200127_15] in dataset[stickels2020highly]

[12]:

adata_raw.shape

[12]:

(20139, 15149)

[3]:

# Downloaded from https://drive.google.com/drive/folders/10lhz5VY7YfvHrtV40MwaqLmWz56U9eBP?usp=sharing
used_barcode = pd.read_csv('data/used_barcodes.txt', sep='\t', header=None)
used_barcode = used_barcode[0]

[4]:

adata_raw = adata_raw[used_barcode,]

[5]:

sc.pp.filter_cells(adata_raw, min_counts=10)
sc.pp.filter_genes(adata_raw, min_cells=10)

[6]:

adata_raw.layers["counts"] = adata_raw.X.copy()
sc.pp.highly_variable_genes(adata_raw, flavor="seurat_v3", n_top_genes=4000)
sc.pp.normalize_total(adata_raw, inplace=True)
sc.pp.log1p(adata_raw)
sc.pp.pca(adata_raw)
sc.pp.neighbors(adata_raw)
sc.tl.umap(adata_raw)
sc.tl.leiden(adata_raw,resolution=2)

WARNING: adata.X seems to be already log-transformed.

[7]:

adata = adata_raw.copy()

[8]:

# input parameters of MENDER
scale = 4

# main body of MENDER
msm = MENDER.MENDER_single(
    adata,
    # determine which cell state to use
    # we use the cell state got by Leiden
    ct_obs='leiden'
)

estimated radius: 15.93267083699364

[9]:

# set the MENDER parameters


msm.set_MENDER_para(
    # default of n_scales is 6
    n_scales=scale,

    # for single cell data, nn_mode is set to 'radius'
    nn_mode='radius',

    # default of n_scales is 15 um (see the manuscript for why).
    # MENDER also provide a function 'estimate_radius' for estimating the radius
    nn_para=15,

)
# construct the context representation
msm.run_representation(

    # the number of processings
)

# set the spatial clustering parameter
# positive values for the expected number of domains
# negative values for the clustering resolution
msm.run_clustering_normal(-0.5)

scale 0, median #cells per radius (r=15): 1.0
scale 1, median #cells per radius (r=15): 3.0
scale 2, median #cells per radius (r=15): 5.0
scale 3, median #cells per radius (r=15): 7.0

[10]:

msm.output_cluster('MENDER')

[11]:

msm.adata_MENDER.write_h5ad('dump/Slide-seq_MOB.h5ad')

[ ]: