Source code for ClearMap.Scripts.generate_jsonl_file_ontology
import pandas as pd
import numpy as np
import json
from ClearMap.Settings import atlas_folder
import os
[docs]
def get_flattened_structure(structure):
"""
flatten any structure
structure : dict
"""
children_list = []
children = structure.get('children') # can be empty list
for child in children:
children_list.append(child)
children_list.extend(get_flattened_structure(child)) # recursion
return children_list
[docs]
def get_direct_children_structures_ids(children):
"""
list the ids of direct children only
"""
return [child.get("id") for child in children]
[docs]
def get_all_children_structures_ids(children):
"""
list the ids of direct children, their children and so on
"""
list_all_children = children.copy()
for child in children:
list_all_children.extend(get_flattened_structure(child))
return [child.get("id") for child in list_all_children]
[docs]
def get_parent_id(structure_id):
value = df.loc[df['id'] == structure_id, "parent_structure_id"].values
if value.size:
if np.isnan(value[0]) == False:
return int(value[0])
[docs]
def get_structure_path(structure_id):
path_structure = [int(structure_id)]
while structure_id:
structure_id = get_parent_id(structure_id)
path_structure = [structure_id] + path_structure if structure_id else path_structure
return path_structure
[docs]
def make_structure_ids_path_string(path_structure):
str_path_structure = ''
for i in path_structure:
str_path_structure = f'{str_path_structure}/{i}'
return str_path_structure[1:]
[docs]
def make_structure_acronyms_path_string(path_structure):
str_path_structure = ''
for i in path_structure:
str_path_structure = f'{str_path_structure} > {dict_id_to_acronym[i]}'
return str_path_structure[3:]
atlas_name = "ABA_annotation_last"
fpath = os.path.join(atlas_folder, f"{atlas_name}.json")
with open(fpath, "r") as file:
data = json.load(file)["msg"]
assert len(data) == 1
data.extend(get_flattened_structure(data[0]))
df = pd.DataFrame(data)
df["direct_children_structures_ids"] = df.children.map(get_direct_children_structures_ids)
df['all_children_structures_ids'] = df.children.map(get_all_children_structures_ids)
df['structure_path'] = df['id'].map(get_structure_path)
dict_id_to_acronym = dict(zip(df['id'], df['acronym']))
df['structure_ids_path'] = df['structure_path'].map(make_structure_ids_path_string)
df['structure_acronyms_path'] = df['structure_path'].map(make_structure_acronyms_path_string)
cols_of_interest = [
'id',
'acronym',
'name',
'color_hex_triplet',
'st_level',
'parent_structure_id',
'direct_children_structures_ids',
'all_children_structures_ids',
'structure_path',
'structure_ids_path',
'structure_acronyms_path'
]
df_to_save = df[cols_of_interest].copy()
fpath = os.path.join(atlas_folder, f"{atlas_name}.jsonl")
print(fpath)
df_to_save.to_json(fpath, orient="records", lines=True)