{ "cells": [ { "cell_type": "markdown", "id": "6b8a162a", "metadata": {}, "source": [ "# DLPFC Gene Cost by SOMDE" ] }, { "cell_type": "markdown", "id": "220e4abc", "metadata": {}, "source": [ "In this file, the embedding from DLPFC data will be generated. SOMDE will be used to select the highly variable genes. If you don't have the data, Please run the 0_somde_robust_scanpy.ipynb to generate the data. " ] }, { "cell_type": "code", "execution_count": 1, "id": "6e0552e4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mode: fast\n", "Using device: mps\n", "OMP_NUM_THREADS: None\n", "MKL_NUM_THREADS: None\n", "torch num threads: 12\n", "torch interop threads: 12\n", "\n", "=== Computing gene cost: 151508 vs 151509 ===\n", "[load_DLPFC] Loaded section 151508\n", "[load_DLPFC] Loaded section 151509\n", "[0000] DGI loss = 2.8241\n", "[0500] DGI loss = 0.0010\n", "[1000] DGI loss = 0.0003\n", "[1500] DGI loss = 0.0002\n", "[2000] DGI loss = 0.0001\n", "[2500] DGI loss = 0.0000\n", "[3000] DGI loss = 0.0000\n", "[3499] DGI loss = 0.0001\n", "\n", "All gene cost matrices computed successfully.\n" ] } ], "source": [ "import os\n", "import random\n", "import numpy as np\n", "import torch\n", "import scanpy as sc\n", "import pandas as pd\n", "\n", "from raftup import _gene_cost_somde\n", "\n", "\n", "# =========================================================\n", "# User options\n", "# =========================================================\n", "mode = \"fast\" # choose from: \"reproducible\", \"fast\"\n", "seed = 0\n", "\n", "ROOT_DIR = \"./data\"\n", "SOMDE_DIR = \"./results/somde\"\n", "OUT_DIR = \"./results/gene_cost_matrix\"\n", "N_TOP_SV_GENES = 3000\n", "\n", "SECTION_PAIRS = [\n", " (\"151508\", \"151509\")\n", "]\n", "\n", "\n", "# =========================================================\n", "# Setup\n", "# =========================================================\n", "def setup_environment(mode=\"reproducible\", seed=0):\n", " \"\"\"\n", " Configure random seeds and device.\n", "\n", " mode = \"reproducible\"\n", " - force CPU\n", " - enable deterministic algorithms\n", " - optionally limit thread-level nondeterminism\n", "\n", " mode = \"fast\"\n", " - prefer CUDA, then MPS, then CPU\n", " - allow nondeterministic behavior for speed\n", " \"\"\"\n", " if mode not in {\"reproducible\", \"fast\"}:\n", " raise ValueError(\"mode must be 'reproducible' or 'fast'\")\n", "\n", " # ----- reproducible mode -----\n", " if mode == \"reproducible\":\n", " # Optional: reduce CPU nondeterminism from parallel reductions\n", " os.environ[\"OMP_NUM_THREADS\"] = \"1\"\n", " os.environ[\"MKL_NUM_THREADS\"] = \"1\"\n", "\n", " random.seed(seed)\n", " np.random.seed(seed)\n", " torch.manual_seed(seed)\n", "\n", " torch.set_num_threads(1)\n", " torch.use_deterministic_algorithms(True)\n", "\n", " device = torch.device(\"cpu\")\n", "\n", " # ----- fast mode -----\n", " else:\n", " random.seed(seed)\n", " np.random.seed(seed)\n", " torch.manual_seed(seed)\n", "\n", " if torch.cuda.is_available():\n", " torch.cuda.manual_seed(seed)\n", " torch.cuda.manual_seed_all(seed)\n", " device = torch.device(\"cuda\")\n", " elif torch.backends.mps.is_available():\n", " device = torch.device(\"mps\")\n", " else:\n", " device = torch.device(\"cpu\")\n", "\n", " # allow faster execution\n", " torch.use_deterministic_algorithms(False)\n", "\n", " if device.type == \"cuda\":\n", " torch.backends.cudnn.benchmark = True\n", " torch.backends.cudnn.deterministic = False\n", "\n", " print(f\"Mode: {mode}\")\n", " print(f\"Using device: {device}\")\n", " print(\"OMP_NUM_THREADS:\", os.environ.get(\"OMP_NUM_THREADS\"))\n", " print(\"MKL_NUM_THREADS:\", os.environ.get(\"MKL_NUM_THREADS\"))\n", " print(\"torch num threads:\", torch.get_num_threads())\n", " print(\"torch interop threads:\", torch.get_num_interop_threads())\n", "\n", " return device\n", "\n", "\n", "def load_DLPFC(root_dir: str, section_id: str):\n", " \"\"\"\n", " Load one DLPFC Visium slice with ground-truth layer labels.\n", " \"\"\"\n", " adata = sc.read_visium(\n", " path=os.path.join(root_dir, section_id),\n", " count_file=f\"{section_id}_filtered_feature_bc_matrix.h5\",\n", " )\n", " adata.var_names_make_unique()\n", "\n", " gt_dir = os.path.join(root_dir, section_id, \"gt\")\n", " gt_df = pd.read_csv(\n", " os.path.join(gt_dir, \"tissue_positions_list_GTs.txt\"),\n", " sep=\",\",\n", " header=None,\n", " index_col=0,\n", " )\n", "\n", " adata.obs[\"original_clusters\"] = gt_df.loc[:, 6]\n", " keep_bcs = adata.obs.dropna().index\n", " adata = adata[keep_bcs].copy()\n", " adata.obs[\"original_clusters\"] = (\n", " adata.obs[\"original_clusters\"].astype(int).astype(str)\n", " )\n", "\n", " print(f\"[load_DLPFC] Loaded section {section_id}\")\n", " return adata\n", "\n", "\n", "# =========================================================\n", "# Main\n", "# =========================================================\n", "device = setup_environment(mode=mode, seed=seed)\n", "os.makedirs(OUT_DIR, exist_ok=True)\n", "\n", "for sid_A, sid_B in SECTION_PAIRS:\n", " print(f\"\\n=== Computing gene cost: {sid_A} vs {sid_B} ===\")\n", "\n", " sliceA = load_DLPFC(ROOT_DIR, sid_A)\n", " sliceB = load_DLPFC(ROOT_DIR, sid_B)\n", "\n", " for sl in (sliceA, sliceB):\n", " sc.pp.normalize_total(sl)\n", " sc.pp.log1p(sl)\n", "\n", " df_somde_A = pd.read_csv(f\"{SOMDE_DIR}/somde_{sid_A}.csv\")\n", " df_somde_B = pd.read_csv(f\"{SOMDE_DIR}/somde_{sid_B}.csv\")\n", "\n", " sv_genes_A = df_somde_A[\"g\"].values[:N_TOP_SV_GENES]\n", " sv_genes_B = df_somde_B[\"g\"].values[:N_TOP_SV_GENES]\n", "\n", " sliceA = sliceA[:, sv_genes_A].copy()\n", " sliceB = sliceB[:, sv_genes_B].copy()\n", "\n", " for sl in (sliceA, sliceB):\n", " sc.pp.scale(sl)\n", "\n", " _gene_cost_somde.compute_gene_cost(\n", " sliceA=sliceA,\n", " sliceB=sliceB,\n", " section_id_A=sid_A,\n", " section_id_B=sid_B,\n", " n_h=100,\n", " n_epoch=3500,\n", " lr=2e-4,\n", " print_step=500,\n", " seed=seed,\n", " device=device,\n", " output_dir=OUT_DIR,\n", " )\n", "\n", "print(\"\\nAll gene cost matrices computed successfully.\")" ] } ], "metadata": { "kernelspec": { "display_name": "scanit-env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.19" } }, "nbformat": 4, "nbformat_minor": 5 }