from pathlib import Path
import logging
from typing import Callable, List
from bids import BIDSLayout
from .preprocessing import pre_proc
from memori.pathman import get_prefix
[docs]def bids_proc(
bids_path: str,
output_path: str,
participant_label: List[str],
function_to_run: Callable = pre_proc,
combine_sessions: bool = False,
skip_validation: bool = False,
database: str = None,
reset_database: bool = False,
skip_database: bool = False,
dryrun: bool = False,
single_rest_run: bool = False,
**kwargs,
) -> None:
"""BIDS processor.
Parameters
----------
bids_path : str
Path to BIDS dataset.
output_path : str
Path to output files.
participant_label : List[str]
List of participant labels to process.
function_to_run: Callable
Function to run. Must take the following arguments: output_path, t1, t2, func, metadata, kwargs
skip_validation : bool
Skip BIDS validator.
database : str
Path to alternative location for BIDS sql database.
reset_database : bool
Delete the current BIDS sql database.
skip_database : bool
Skip reading/writing from BIDS data sql database.
dryrun : bool
Run bids parsing, but don't do any processing.
single_rest_run : bool
If True, only process the first rest run (For paper).
"""
# keep bids path in kwargs just in-case we need it again
kwargs["bids_path"] = bids_path
# create a layout
layout = BIDSLayout(
bids_path,
validate=not skip_validation,
database_path=None if skip_database else database,
reset_database=reset_database,
)
# if participant label not defined get all subjects
if not participant_label:
participant_label = layout.get_subjects()
logging.info("Subjects in dataset: %s", participant_label)
# create dictionary for loop through files
bids_dict = dict()
# loop over all subjects
for subject in participant_label:
# get all sessions for subject
sessions = layout.get_sessions(subject=subject)
sessions.sort()
# sessions not empty
if sessions:
logging.info("Found these sessions for subject %s: %s", subject, sessions)
# add subject in to dictionary
bids_dict[subject] = dict()
# loop over sessions
for session in sessions:
# add session to dict
bids_dict[subject][session] = dict()
# subject and session
sub_ses = {"subject": subject, "session": session, "extension": "nii.gz"}
# get t1/t2/func for subject/session
t1 = layout.get(datatype="anat", suffix="T1w", **sub_ses)
t2 = layout.get(datatype="anat", suffix="T2w", **sub_ses)
func = layout.get(datatype="func", suffix="bold", **sub_ses)
# sort lists
t1.sort(key=lambda f: f.filename)
t2.sort(key=lambda f: f.filename)
func.sort(key=lambda f: f.filename)
# add t1/t2/func to the dict
bids_dict[subject][session]["t1"] = t1
bids_dict[subject][session]["t2"] = t2
bids_dict[subject][session]["func"] = func
# sessions is empty
# check data at subject level
else:
# subject
sub = {"subject": subject, "extension": "nii.gz"}
# get t1/t2/func for subject/session
t1 = layout.get(datatype="anat", suffix="T1w", **sub)
t2 = layout.get(datatype="anat", suffix="T2w", **sub)
func = layout.get(datatype="func", suffix="bold", **sub)
# skip if any empty
if not t1 or not t2 or not func:
continue
logging.info("Found these runs for subject %s: %s", subject, func)
# sort lists
t1.sort(key=lambda f: f.filename)
t2.sort(key=lambda f: f.filename)
func.sort(key=lambda f: f.filename)
# set session to None
session = None
# add keys to bids dict
bids_dict[subject] = {}
bids_dict[subject][session] = {}
# add t1/t2/func to the dict
bids_dict[subject][session]["t1"] = t1
bids_dict[subject][session]["t2"] = t2
bids_dict[subject][session]["func"] = func
# if combine_sessions enabled, try to fill sessions that are missing an
# anatomical with another session's anatomical per subject
if combine_sessions:
for subject in bids_dict:
# set default t1/t2
t1 = list()
t2 = list()
# get the first available session with a t1/t2
for session in bids_dict[subject]:
if bids_dict[subject][session]["t1"] and bids_dict[subject][session]["t2"]:
t1 = bids_dict[subject][session]["t1"]
t2 = bids_dict[subject][session]["t2"]
break
# loop over subject sessions, and fill in missing BOTH anatomicals
for session in bids_dict[subject]:
if not bids_dict[subject][session]["t1"] and not bids_dict[subject][session]["t2"]:
bids_dict[subject][session]["t1"] = t1
bids_dict[subject][session]["t2"] = t2
# make a list of files to run in the form of a tuple (T1, T2, func)
run_list = list()
# add subject/session/runs to run_list
for subject in bids_dict:
for session in bids_dict[subject]:
for func in bids_dict[subject][session]["func"]:
try:
if single_rest_run:
# paper only does single rest run
if func.entities["task"] == "rest" or func.entities["task"] == "EMOTION": # for HCP
run_list.append(
(bids_dict[subject][session]["t1"][0], bids_dict[subject][session]["t2"][0], func)
)
break
else:
run_list.append(
(bids_dict[subject][session]["t1"][0], bids_dict[subject][session]["t2"][0], func)
)
except IndexError:
logging.info("Subject %s, session %s, run %s is missing an anatomical", subject, session, func)
logging.info(
"If you want to combine anatomicals from another session, use the --combine_sessions flag"
)
# for each item in run_list, run the pipeline.
for t1, t2, func in run_list:
# for t1/t2, get subject and session
t1_subject = t1.entities["subject"]
t2_subject = t2.entities["subject"]
assert t1_subject == t2_subject
try:
t1_session = t1.entities["session"]
t2_session = t2.entities["session"]
assert t1_session == t2_session
except KeyError:
t1_session = None
t2_session = None
# get name of t1 without _T1w.nii.gz suffix
t1_base = get_prefix(t1.filename).split("_T1w")[0]
# construct path for anat outputs
kwargs["anat_path"] = str(
Path("sub-%s" % t1_subject) / Path("ses-%s" % t1_session) / Path("anat") / Path(t1_base)
)
# for functional, get subject and session
func_subject = func.entities["subject"]
try:
func_session = func.entities["session"]
except KeyError:
func_session = None
kwargs["subject"] = func_subject
kwargs["session"] = func_session
# get name of func without .nii.gz suffix
func_base = get_prefix(func.filename)
# construct paths for func/epi outputs
kwargs["func_path"] = str(
Path("sub-%s" % func_subject) / Path("ses-%s" % func_session) / Path("func") / Path(func_base)
)
kwargs["epi_path"] = kwargs["func_path"]
# Display files being run.
logging.info("anat_path: %s", kwargs["anat_path"])
logging.info("func_path: %s", kwargs["func_path"])
logging.info("epi_path: %s", kwargs["epi_path"])
# call function_to_run
if not dryrun:
function_to_run(output_path, t1=t1.path, t2=t2.path, func=func.path, metadata=func.get_metadata(), **kwargs)