Source code for neuralib.atlas.cellatlas.core

import dataclasses
import io
from pathlib import Path

import pandas as pd
import polars as pl
from neuralib.atlas.data import load_structure_tree
from neuralib.io.core import ATLAS_CACHE_DIRECTORY
from neuralib.util.verbose import fprint

__all__ = ['CellAtlas']

from typing_extensions import Self


[docs] @dataclasses.dataclass class CellAtlas: dataframe: pl.DataFrame """ Example:: ┌────────────────────────────────┬────────────────┬───────────┐ │ Brain region ┆ Volumes [mm^3] ┆ n_neurons │ │ --- ┆ --- ┆ --- │ │ str ┆ f64 ┆ i64 │ ╞════════════════════════════════╪════════════════╪═══════════╡ │ Abducens nucleus ┆ 0.015281 ┆ 1324 │ │ Accessory facial motor nucleus ┆ 0.013453 ┆ 497 │ │ Accessory olfactory bulb ┆ 0.6880625 ┆ 189608 │ │ … ┆ … ┆ … │ │ Zona incerta ┆ 2.157641 ┆ 136765 │ │ posteromedial visual area ┆ 1.2225625 ┆ 197643 │ └────────────────────────────────┴────────────────┴───────────┘ """
[docs] @classmethod def load_from_csv(cls, file: Path | None = None, ignore_cell_types_info: bool = True, ignore_detail_info: bool = True) -> Self: """ Load/Download the csv file :param file: filepath. If None, download from source paper :param ignore_cell_types_info: ignore cell types information, only select neuron and volume foreach areas :param ignore_detail_info: ignore information in brain subregion :return: """ if file is None: d = ATLAS_CACHE_DIRECTORY if not d.exists(): d.mkdir(exist_ok=True, parents=True) file = d / 'cellatlas.csv' if not file.exists(): cls._request(file) df = pl.read_csv(file) if ignore_cell_types_info: df = df.select('Brain region', 'Neuron [mm^-3]', 'Volumes [mm^3]') # total neurons df = ( df.with_columns((pl.col('Neuron [mm^-3]') * pl.col('Volumes [mm^3]')).alias('n_neurons').cast(pl.Int64)) .drop('Neuron [mm^-3]') ) if ignore_detail_info: patterns = (',', '/', r'\(') for pt in patterns: df = df.filter(~(pl.col('Brain region').str.contains(pt))) return CellAtlas(df.sort('Brain region'))
@classmethod def _request(cls, output: Path) -> pl.DataFrame: """download from paper source""" import requests url = 'https://journals.plos.org/ploscompbiol/article/file?type=supplementary&id=10.1371/' \ 'journal.pcbi.1010739.s011' resp = requests.get(url) if resp.status_code == 200: df = pd.read_excel(io.BytesIO(resp.content), sheet_name='Densities BBCAv1') pl.from_pandas(df).write_csv(output) fprint(f'Download successfully cellatlas csv and save in {output}!', vtype='io') else: raise RuntimeError('download cellatlas FAIL') return df @property def brain_regions(self) -> list[str]: """list of brain regions""" return self.dataframe['Brain region'].unique().to_list()
[docs] @classmethod def load_sync_allen_structure_tree(cls, force_save: bool = True) -> pl.DataFrame: """ TODO `ProS` not found Based on cellatlas dataframe, create a sync used `acronym` header in allen struct_tree (sorted by name) **fields** ``name``: `Brain region` found in cellatlas ``n_neurons``: `n_neurons` from cellatlas dataframe ``acronym``: `acronym` found in the structure_tree csv :param force_save: create sync file every time in root directory :return: sync_dataframe example:: ┌────────────────────────────────┬────────────────┬───────────┬─────────┐ │ name ┆ Volumes [mm^3] ┆ n_neurons ┆ acronym │ │ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ f64 ┆ i64 ┆ str │ ╞════════════════════════════════╪════════════════╪═══════════╪═════════╡ │ Abducens nucleus ┆ 0.015281 ┆ 1324 ┆ VI │ │ Agranular insular area ┆ 4.901734 ┆ 242362 ┆ AI │ │ … ┆ … ┆ … ┆ … │ │ Visual areas ┆ 12.957203 ┆ 1297194 ┆ VIS │ │ Zona incerta ┆ 2.157641 ┆ 136765 ┆ ZI │ │ posteromedial visual area ┆ 1.2225625 ┆ 197643 ┆ VISpm │ └────────────────────────────────┴────────────────┴───────────┴─────────┘ """ out = ATLAS_CACHE_DIRECTORY / 'cellatlas_allen_sync.csv' if not out.exists() or force_save: ctlas = cls.load_from_csv() allen = load_structure_tree().select('name', 'acronym').sort('name') df = ( ctlas.dataframe .rename({'Brain region': 'name'}) .join(allen, on='name') ) df.write_csv(out) fprint(f'SAVE cellatlas sync csv to {out}', vtype='io') return pl.read_csv(out)