python source code of bids

#!/usr/bin/env python
"""
Module with helper functions

Some functions are derived from dac2bids.py from Daniel Gomez 29.08.2016
https://github.com/dangom/dac2bids/blob/master/dac2bids.py

@author: Marcel Zwiers
"""

import copy
import inspect
import ast
import re
import logging
import coloredlogs
import subprocess
import pydicom
import nibabel
import tempfile
import tarfile
import zipfile
import fnmatch
try:
    from bidscoin import dicomsort
except ImportError:
    import dicomsort  # This should work if bidscoin was not pip-installed
from distutils.dir_util import copy_tree
from typing import Union, List, Tuple
from pathlib import Path
from importlib import util
from ruamel.yaml import YAML
yaml = YAML()

logger = logging.getLogger('bidscoin')

bidsmodalities  = ('fmap', 'anat', 'func', 'dwi', 'beh', 'pet')                                         # NB: get_matching_run() uses this order to search for a match
ignoremodality  = 'leave_out'
unknownmodality = 'extra_data'
bidslabels      = ('task', 'acq', 'ce', 'rec', 'dir', 'run', 'mod', 'echo', 'suffix', 'IntendedFor')    # This is not really something from BIDS, but these are the BIDS-labels used in the bidsmap

heuristics_folder = Path(__file__).parents[1]/'heuristics'
bidsmap_template  = heuristics_folder/'bidsmap_template.yaml'


def bidsversion() -> str:
    """
    Reads the BIDS version from the BIDSVERSION.TXT file

    :return:    The BIDS version number
    """

    with (Path(__file__).parent.parent/'bidsversion.txt').open('r') as fid:
        value = fid.read().strip()

    return str(value)


def version() -> str:
    """
    Reads the BIDSCOIN version from the VERSION.TXT file

    :return:    The BIDSCOIN version number
    """

    with (Path(__file__).parent.parent/'version.txt').open('r') as fid:
        value = fid.read().strip()

    return str(value)


def setup_logging(log_file: Path=Path(), debug: bool=False) -> logging.Logger:
    """
    Setup the logging

    :param log_file:    Name of the logfile
    :param debug:       Set log level to DEBUG if debug==True
    :return:            Logger object
     """

    # debug = True

    # Set the format and logging level
    fmt       = '%(asctime)s - %(name)s - %(levelname)s %(message)s'
    datefmt   = '%Y-%m-%d %H:%M:%S'
    formatter = logging.Formatter(fmt=fmt, datefmt=datefmt)
    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    # Set & add the streamhandler and add some color to those boring terminal logs! :-)
    coloredlogs.install(level=logger.level, fmt=fmt, datefmt=datefmt)

    if not log_file.name:
        return

    # Set & add the log filehandler
    log_file.parent.mkdir(parents=True, exist_ok=True)      # Create the log dir if it does not exist
    loghandler = logging.FileHandler(log_file)
    loghandler.setLevel(logging.DEBUG)
    loghandler.setFormatter(formatter)
    loghandler.set_name('loghandler')
    logger.addHandler(loghandler)

    # Set & add the error / warnings handler
    error_file = log_file.with_suffix('.errors')            # Derive the name of the error logfile from the normal log_file
    errorhandler = logging.FileHandler(error_file, mode='w')
    errorhandler.setLevel(logging.WARNING)
    errorhandler.setFormatter(formatter)
    errorhandler.set_name('errorhandler')
    logger.addHandler(errorhandler)

    return logger


def reporterrors() -> None:
    """
    Summarized the warning and errors from the logfile

    :return:
    """

    for filehandler in logger.handlers:
        if filehandler.name == 'errorhandler':

            errorfile = Path(filehandler.baseFilename)
            if errorfile.stat().st_size:
                with errorfile.open('r') as fid:
                    errors = fid.read()
                logger.info(f"The following BIDScoin errors and warnings were reported:\n\n{40*'>'}\n{errors}{40*'<'}\n")

            else:
                logger.info(f'No BIDScoin errors or warnings were reported')
                logger.info('')

        elif filehandler.name == 'loghandler':
            logfile = Path(filehandler.baseFilename)

    if 'logfile' in locals():
        logger.info(f"For the complete log see: {logfile}")
        logger.info(f"NB: {logfile.parent} may contain privacy sensitive information, e.g. pathnames in logfiles and provenance data samples")


def run_command(command: str) -> bool:
    """
    Runs a command in a shell using subprocess.run(command, ..)

    :param command: the command that is executed
    :return:        True if the were no errors, False otherwise
    """

    logger.info(f"Running: {command}")
    process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)          # TODO: investigate shell=False and capture_output=True for python 3.7
    logger.info(f"Output:\n{process.stdout.decode('utf-8')}")

    if process.stderr.decode('utf-8') or process.returncode!=0:
        logger.error(f"Failed to run:\n{command}\nErrorcode {process.returncode}:\n{process.stderr.decode('utf-8')}")
        logger.debug(f"{process.stdout.decode('utf-8')}")
        return False

    return True


def import_plugin(plugin: Path) -> util.module_from_spec:
    """

    :param plugin:  Name of the plugin
    :return:        plugin-module
    """

    # Get the full path to the plugin-module
    plugin = Path(plugin)
    if len(plugin.parents) == 1:
        plugin = Path(__file__).parent/'plugins'/plugin

    # See if we can find the plug-in
    if not plugin.is_file():
        logger.error(f"Could not find plugin: '{plugin}'")
        return None

    # Load the plugin-module
    try:
        spec   = util.spec_from_file_location('bidscoin_plugin', plugin)
        module = util.module_from_spec(spec)
        spec.loader.exec_module(module)

        # bidsmapper -> module.bidsmapper_plugin(runfolder, bidsmap_new, bidsmap_old)
        if 'bidsmapper_plugin' not in dir(module):
            logger.info(f"Could not find bidscoiner_plugin() in {plugin}")

        # bidscoiner -> module.bidscoiner_plugin(session, bidsmap, bidsfolder, personals)
        if 'bidscoiner_plugin' not in dir(module):
            logger.info(f"Could not find bidscoiner_plugin() in {plugin}")

        if 'bidsmapper_plugin' not in dir(module) and 'bidscoiner_plugin' not in dir(module):
            logger.warning(f"{plugin} can (and will) not perform any operation")

        return module

    except Exception:
        logger.exception(f"Could not import '{plugin}'")

        return None


def test_tooloptions(tool: str, opts: dict) -> bool:
    """
    Performs tests of the user tool parameters set in bidsmap['Options']

    :param tool:    Name of the tool that is being tested in bidsmap['Options']
    :param opts:    The editable options belonging to the tool
    :return:        True if the tool generated the expected result, False if there was a tool error
    """

    if tool == 'dcm2niix':
        command = f"{opts['path']}dcm2niix -h"
    elif tool == 'bidsmapper':
        command = f"{Path(__file__).parent/'bidsmapper.py'} -v"
    elif tool in ('bidscoin', 'bidscoiner'):
        command = f"{Path(__file__).parent/'bidscoiner.py'} -v"
    else:
        logger.warning(f"Testing of '{tool}' not supported")
        return None

    logger.info(f"Testing: '{tool}'")

    return run_command(command)


def test_plugins(plugin: Path) -> bool:
    """
    Performs tests of the plug-ins in bidsmap['PlugIns']

    :param plugin:  The name of the plugin that is being tested (-> bidsmap['Plugins'])
    :return:        True if the plugin generated the expected result, False if there
                    was a plug-in error, None if this function has an implementation error
    """

    logger.info(f"Testing: '{plugin}' plugin")

    module = import_plugin(plugin)
    if inspect.ismodule(module):
        methods = [method for method in dir(module) if not method.startswith('_')]
        logger.info(f"Result:\n{module.__doc__}\n{plugin} attributes and methods:\n{methods}\n")
        return True

    else:
        return False


def lsdirs(folder: Path, wildcard: str='*') -> List[Path]:
    """
    Gets all directories in a folder, ignores files

    :param folder:      The full pathname of the folder
    :param wildcard:    Simple (glob.glob) shell-style wildcards. Foldernames starting with a dot are special cases that are not matched by '*' and '?' patterns.") wildcard
    :return:            A list with all directories in the folder
    """

    return [fname for fname in sorted(folder.glob(wildcard)) if fname.is_dir()]


def is_dicomfile(file: Path) -> bool:
    """
    Checks whether a file is a DICOM-file. It uses the feature that Dicoms have the string DICM hardcoded at offset 0x80.

    :param file:    The full pathname of the file
    :return:        Returns true if a file is a DICOM-file
    """

    if file.is_file():
        if file.stem.startswith('.'):
            logger.warning(f'File is hidden: {file}')
        with file.open('rb') as dcmfile:
            dcmfile.seek(0x80, 1)
            if dcmfile.read(4) == b'DICM':
                return True
            else:
                dicomdict = pydicom.dcmread(str(file), force=True)       # The DICM tag may be missing for anonymized DICOM files
                return 'Modality' in dicomdict
    else:
        return False


def is_dicomfile_siemens(file: Path) -> bool:
    """
    Checks whether a file is a *SIEMENS* DICOM-file. All Siemens Dicoms contain a dump of the
    MrProt structure. The dump is marked with a header starting with 'ASCCONV BEGIN'. Though
    this check is not foolproof, it is very unlikely to fail.

    :param file:    The full pathname of the file
    :return:        Returns true if a file is a Siemens DICOM-file
    """

    return b'ASCCONV BEGIN' in file.open('rb').read()


def is_parfile(file: Path) -> bool:
    """
    Rudimentary check (on file extensions and whether it exists) whether a file is a Philips PAR file

    :param file:    The full pathname of the file
    :return:        Returns true if a file is a Philips PAR-file
    """

    # TODO: Implement a proper check, e.g. using nibabel
    if file.is_file() and file.suffix in ('.PAR', '.par', '.XML', '.xml'):
        return True
    else:
        return False


def is_p7file(file: Path) -> bool:
    """
    Checks whether a file is a GE P*.7 file

    WIP!!!!!!

    :param file:    The full pathname of the file
    :return:        Returns true if a file is a GE P7-file
    """

    # TODO: Returns true if filetype is P7.
    pass


def is_niftifile(file: Path) -> bool:
    """
    Checks whether a file is a nifti file

    :param file:    The full pathname of the file
    :return:        Returns true if a file is a nifti-file
    """

    # TODO: Implement a proper check, e.g. using nibabel
    if file.is_file() and file.suffix in ('.nii', '.nii.gz', '.img', '.hdr'):
        return True
    else:
        return False


def unpack(sourcefolder: Path, subprefix: str='sub-', sesprefix: str='ses-', wildcard: str='*', workfolder: Path='') -> (Path, bool):
    """
    Unpacks and sorts DICOM files in sourcefolder to a temporary folder if sourcefolder contains a DICOMDIR file or .tar.gz, .gz or .zip files

    :param sourcefolder:    The full pathname of the folder with the source data
    :param subprefix:       The optional subprefix (e.g. 'sub-'). Used to parse the subid
    :param sesprefix:       The optional sesprefix (e.g. 'ses-'). Used to parse the sesid
    :param wildcard:        A glob search pattern to select the tarballed/zipped files
    :param workfolder:      A root folder for temporary data
    :return:                A tuple with the full pathname of the source or workfolder and a workdir-path or False when the data is not unpacked in a temporary folder
    """

    # Search for zipped/tarballed files
    packedfiles = []
    packedfiles.extend(sourcefolder.glob(f"{wildcard}.tar"))
    packedfiles.extend(sourcefolder.glob(f"{wildcard}.tar.?z"))
    packedfiles.extend(sourcefolder.glob(f"{wildcard}.tar.bz2"))
    packedfiles.extend(sourcefolder.glob(f"{wildcard}.zip"))

    # Check if we are going to do unpacking and/or sorting
    if packedfiles or (sourcefolder/'DICOMDIR').is_file():

        # Create a (temporary) sub/ses workfolder for unpacking the data
        if not workfolder:
            workfolder = tempfile.mkdtemp()
        workfolder   = Path(workfolder)
        subid, sesid = get_subid_sesid(sourcefolder/'dum.my', subprefix=subprefix, sesprefix=sesprefix)
        subid, sesid = subid.replace('sub-', subprefix), sesid.replace('ses-', sesprefix)
        worksubses   = workfolder/subid/sesid
        worksubses.mkdir(parents=True, exist_ok=True)

        # Copy everything over to the workfolder
        logger.info(f"Making temporary copy: {sourcefolder} -> {worksubses}")
        copy_tree(str(sourcefolder), str(worksubses))     # Older python versions don't support PathLib

        # Unpack the zip/tarballed files in the temporary folder
        for packedfile in [worksubses/packedfile.name for packedfile in packedfiles]:
            logger.info(f"Unpacking: {packedfile.name} -> {worksubses}")
            ext = packedfile.suffixes
            if ext[-1] == '.zip':
                with zipfile.ZipFile(packedfile, 'r') as zip_fid:
                    zip_fid.extractall(worksubses)
            elif '.tar' in ext:
                with tarfile.open(packedfile, 'r') as tar_fid:
                    tar_fid.extractall(worksubses)

            # Sort the DICOM files immediately (to avoid name collisions)
            dicomsort.sortsessions(worksubses)

        # Sort the DICOM files if not sorted yet (e.g. DICOMDIR)
        dicomsort.sortsessions(worksubses)

        return worksubses, workfolder

    else:

        return sourcefolder, False


def get_dicomfile(folder: Path, index: int=0) -> Path:
    """
    Gets a dicom-file from the folder (supports DICOMDIR)

    :param folder:  The full pathname of the folder
    :param index:   The index number of the dicom file
    :return:        The filename of the first dicom-file in the folder.
    """

    if (folder/'DICOMDIR').is_file():
        dicomdir = pydicom.filereader.read_dicomdir(str(folder/'DICOMDIR'))
        files    = [folder.joinpath(*image.ReferencedFileID) for patient in dicomdir.patient_records
                                                             for study   in patient.children
                                                             for series  in study.children
                                                             for image   in series.children]
    else:
        files = sorted(folder.iterdir())

    idx = 0
    for file in files:
        if file.stem.startswith('.'):
            logger.warning(f'Ignoring hidden file: {file}')
            continue
        if is_dicomfile(file):
            if idx == index:
                return file
            else:
                idx += 1

    return Path()


def get_parfiles(folder: Path) -> List[Path]:
    """
    Gets a Philips PAR-file from the folder

    :param folder:  The full pathname of the folder
    :return:        The filename of the first PAR-file in the folder.
    """

    parfiles = []
    for file in sorted(folder.iterdir()):
        if is_parfile(file):
            parfiles.append(file)

    return parfiles


def get_p7file(folder: Path) -> Path:
    """
    Gets a GE P*.7-file from the folder

    :param folder:  The full pathname of the folder
    :return:        The filename of the first P7-file in the folder.
    """

    pass
    return Path()


def get_niftifile(folder: Path) -> Path:
    """
    Gets a nifti-file from the folder

    :param folder:  The full pathname of the folder
    :return:        The filename of the first nifti-file in the folder.
    """

    pass
    return Path()


def load_bidsmap(yamlfile: Path, folder: Path=Path(), report: bool=True) -> Tuple[dict, Path]:
    """
    Read the mapping heuristics from the bidsmap yaml-file. If yamlfile is not fullpath, then 'folder' is first searched before
    the default 'heuristics'. If yamfile is empty, then first 'bidsmap.yaml' is searched for, them 'bidsmap_template.yaml'. So fullpath
    has precendence over folder and bidsmap.yaml has precedence over bidsmap_template.yaml

    :param yamlfile:    The full pathname or basename of the bidsmap yaml-file. If None, the default bidsmap_template.yaml file in the heuristics folder is used
    :param folder:      Only used when yamlfile=basename or None: yamlfile is then first searched for in folder and then falls back to the ./heuristics folder (useful for centrally managed template yaml-files)
    :param report:      Report log.info when reading a file
    :return:            Tuple with (1) ruamel.yaml dict structure, with all options, BIDS mapping heuristics, labels and attributes, etc and (2) the fullpath yaml-file
    """

    # Input checking
    if not folder.name:
        folder = heuristics_folder
    if not yamlfile.name:
        yamlfile = folder/'bidsmap.yaml'
        if not yamlfile.is_file():
            yamlfile = bidsmap_template

    # Add a standard file-extension if needed
    if not yamlfile.suffix:
        yamlfile = yamlfile.with_suffix('.yaml')

    # Get the full path to the bidsmap yaml-file
    if len(yamlfile.parents) == 1:
        if (folder/yamlfile).is_file():
            yamlfile = folder/yamlfile
        else:
            yamlfile = heuristics_folder/yamlfile

    if not yamlfile.is_file():
        if report:
            logger.info(f"No existing bidsmap file found: {yamlfile}")
        return dict(), yamlfile
    elif report:
        logger.info(f"Reading: {yamlfile}")

    # Read the heuristics from the bidsmap file
    with yamlfile.open('r') as stream:
        bidsmap = yaml.load(stream)

    # Issue a warning if the version in the bidsmap YAML-file is not the same as the bidscoin version
    if 'bidscoin' in bidsmap['Options'] and 'version' in bidsmap['Options']['bidscoin']:
        bidsmapversion = bidsmap['Options']['bidscoin']['version']
    elif 'version' in bidsmap['Options']:
        bidsmapversion = bidsmap['Options']['version']
    else:
        bidsmapversion = 'Unknown'

    if bidsmapversion != version() and report:
        logger.warning(f'BIDScoiner version conflict: {yamlfile} was created using version {bidsmapversion}, but this is version {version()}')

    # Make sure we get a proper list of plugins
    if not bidsmap['PlugIns']:
        bidsmap['PlugIns'] = []
    bidsmap['PlugIns'] = [plugin for plugin in bidsmap['PlugIns'] if plugin]

    return bidsmap, yamlfile


def save_bidsmap(filename: Path, bidsmap: dict) -> None:
    """
    Save the BIDSmap as a YAML text file

    :param filename:
    :param bidsmap:         Full bidsmap data structure, with all options, BIDS labels and attributes, etc
    :return:
    """

    logger.info(f"Writing bidsmap to: {filename}")
    filename.parent.mkdir(parents=True, exist_ok=True)
    with filename.open('w') as stream:
        yaml.dump(bidsmap, stream)

    # See if we can reload it, i.e. whether it is valid yaml...
    try:
        load_bidsmap(filename, report=False)
    except:
        # Just trying again seems to help? :-)
        with filename.open('w') as stream:
            yaml.dump(bidsmap, stream)
        try:
            load_bidsmap(filename, report=False)
        except:
            logger.error(f'The saved output bidsmap does not seem to be valid YAML, please check {filename}, e.g. by way of an online yaml validator, such as https://yamlchecker.com/')


def parse_x_protocol(pattern: str, dicomfile: Path) -> str:
    """
    Siemens writes a protocol structure as text into each DICOM file.
    This structure is necessary to recreate a scanning protocol from a DICOM,
    since the DICOM information alone wouldn't be sufficient.

    :param pattern:     A regexp expression: '^' + pattern + '\t = \t(.*)\\n'
    :param dicomfile:   The full pathname of the dicom-file
    :return:            The string extracted values from the dicom-file according to the given pattern
    """

    if not is_dicomfile_siemens(dicomfile):
        logger.warning(f"Parsing {pattern} may fail because {dicomfile} does not seem to be a Siemens DICOM file")

    regexp = '^' + pattern + '\t = \t(.*)\n'
    regex  = re.compile(regexp.encode('utf-8'))

    with dicomfile.open('rb') as openfile:
        for line in openfile:
            match = regex.match(line)
            if match:
                return match.group(1).decode('utf-8')

    logger.warning(f"Pattern: '{regexp.encode('unicode_escape').decode()}' not found in: {dicomfile}")
    return None


# Profiling shows this is currently the most expensive function, so therefore the (primitive but effective) _DICOMDICT_CACHE optimization
_DICOMDICT_CACHE = None
_DICOMFILE_CACHE = None
def get_dicomfield(tagname: str, dicomfile: Path) -> Union[str, int]:
    """
    Robustly extracts a DICOM field/tag from a dictionary or from vendor specific fields

    :param tagname:     Name of the DICOM field
    :param dicomfile:   The full pathname of the dicom-file
    :return:            Extracted tag-values from the dicom-file
    """

    global _DICOMDICT_CACHE, _DICOMFILE_CACHE

    if not dicomfile.name:
        return ''

    if not dicomfile.is_file():
        logger.warning(f"{dicomfile} not found")
        value = None

    elif not is_dicomfile(dicomfile):
        logger.warning(f"{dicomfile} is not a DICOM file, cannot read {tagname}")
        value = None

    else:
        try:
            if dicomfile != _DICOMFILE_CACHE:
                dicomdict = pydicom.dcmread(str(dicomfile), force=True)      # The DICM tag may be missing for anonymized DICOM files
                if 'Modality' not in dicomdict:
                    raise ValueError(f'Cannot read {dicomfile}')
                _DICOMDICT_CACHE = dicomdict
                _DICOMFILE_CACHE = dicomfile
            else:
                dicomdict = _DICOMDICT_CACHE

            value = dicomdict.get(tagname)

            # Try a recursive search
            if not value:
                for elem in dicomdict.iterall():
                    if elem.name==tagname:
                        value = elem.value
                        continue

        except IOError:
            logger.warning(f'Cannot read {tagname} from {dicomfile}')
            value = None

        except Exception:
            try:
                value = parse_x_protocol(tagname, dicomfile)

            except Exception:
                logger.warning(f'Could not parse {tagname} from {dicomfile}')
                value = None

    # Cast the dicom datatype to int or str (i.e. to something that yaml.dump can handle)
    if value is None:
        return ''

    elif isinstance(value, int):
        return int(value)

    elif not isinstance(value, str):    # Assume it's a MultiValue type and flatten it
        return str(value)

    else:
        return str(value)


# Profiling shows this is currently the most expensive function, so therefore the (primitive but effective) _PARDICT_CACHE optimization
_PARDICT_CACHE = None
_PARFILE_CACHE = None
def get_parfield(tagname: str, parfile: Path) -> Union[str, int]:
    """
    Extracts the value from a PAR/XML field

    :param tagname: Name of the PAR/XML field
    :param parfile: The full pathname of the PAR/XML file
    :return:        Extracted tag-values from the PAR/XML file
    """

    global _PARDICT_CACHE, _PARFILE_CACHE

    if not parfile.name:
        return ''

    if not parfile.is_file():
        logger.warning(f"{parfile} not found")
        value = None

    elif not is_parfile(parfile):
        logger.warning(f"{parfile} is not a PAR/XML file, cannot read {tagname}")
        value = None

    else:
        try:
            if parfile != _PARFILE_CACHE:
                pardict = nibabel.parrec.parse_PAR_header(parfile.open('r'))
                if 'series_type' not in pardict[0]:
                    raise ValueError(f'Cannot read {parfile}')
                _PARDICT_CACHE = pardict
                _PARFILE_CACHE = parfile
            else:
                pardict = _PARDICT_CACHE
            value = pardict[0].get(tagname)

        except IOError:
            logger.warning(f'Cannot read {tagname} from {parfile}')
            value = None

        except Exception:
            logger.warning(f'Could not parse {tagname} from {parfile}')
            value = None

    # Cast the dicom datatype to int or str (i.e. to something that yaml.dump can handle)
    if value is None:
        return ''

    elif isinstance(value, int):
        return int(value)

    elif not isinstance(value, str):  # Assume it's a MultiValue type and flatten it
        return str(value)

    else:
        return str(value)


def get_dataformat(source: Path) -> str:
    """
    TODO: replace sourcefile with a class as soon as Pathlib supports subclassing

    :param source:  The full pathname of a (e.g. DICOM or PAR/XML) session directory or of a source file
    :return:        'DICOM' if sourcefile is a DICOM-file or 'PAR' when it is a PAR/XML file
    """


    # If source is a session directory, get a sourcefile
    if source.is_dir():

        # Try to see if we can find DICOM files
        sourcedirs = lsdirs(source)
        for sourcedir in sourcedirs:
            sourcefile = get_dicomfile(sourcedir)
            if sourcefile.name:
                return 'DICOM'

        # Try to see if we can find PAR/XML files
        sourcefiles = get_parfiles(source)
        if sourcefiles:
            return 'PAR'

    # If we don't know the dataformat, just try
    if is_dicomfile(source):
        return 'DICOM'

    if is_parfile(source):
        return 'PAR'

    logger.warning(f"Cannot determine the dataformat of: {source}")
    return ''


def get_sourcefield(tagname: str, sourcefile: Path=Path(), dataformat: str='') -> Union[str, int]:
    """
    Wrapper around get_dicomfield and get_parfield

    :param tagname:     Name of the field in the sourcefile
    :param sourcefile:  The full pathname of the (e.g. DICOM or PAR/XML) sourcefile
    :param dataformat:  The information source in the bidsmap that is used, e.g. 'DICOM'
    :return:
    """

    if not dataformat:
        dataformat = get_dataformat(sourcefile)

    if dataformat=='DICOM':
        return get_dicomfield(tagname, sourcefile)

    if dataformat=='PAR':
        return get_parfield(tagname, sourcefile)


def add_prefix(prefix: str, tag: str) -> str:
    """
    Simple function to account for optional BIDS tags in the bids file names, i.e. it prefixes 'prefix' only when tag is not empty

    :param prefix:  The prefix (e.g. '_sub-')
    :param tag:     The tag (e.g. 'control01')
    :return:        The tag with the leading prefix (e.g. '_sub-control01') or just the empty tag ''
    """

    if tag:
        tag = prefix + str(tag)
    else:
        tag = ''

    return tag


def strip_suffix(run: dict) -> dict:
    """
    Certain attributes such as SeriesDescriptions (but not ProtocolName!?) may get a suffix like '_SBRef' from the vendor,
    try to strip it off from the BIDS labels

    :param run: The run with potentially added suffixes that are the same as the BIDS suffixes
    :return:    The run with these suffixes removed
    """

    # See if we have a suffix for this modality
    if 'suffix' in run['bids'] and run['bids']['suffix']:
        suffix = run['bids']['suffix'].lower()
    else:
        return run

    # See if any of the BIDS labels ends with the same suffix. If so, then remove it
    for key in run['bids']:
        if key == 'suffix':
            continue
        if isinstance(run['bids'][key], str) and run['bids'][key].lower().endswith(suffix):
            run['bids'][key] = run['bids'][key][0:-len(suffix)]       # NB: This will leave the added '_' and '.' characters, but they will be taken out later (as they are not BIDS-valid)

    return run


def cleanup_value(label: str) -> str:
    """
    Converts a given label to a cleaned-up label that can be used as a BIDS label. Remove leading and trailing spaces;
    convert other spaces, special BIDS characters and anything that is not an alphanumeric to a ''. This will for
    example map "Joe's reward_task" to "Joesrewardtask"

    :param label:   The given label that potentially contains undesired characters
    :return:        The cleaned-up / BIDS-valid label
    """

    if label is None:
        return label

    special_characters = (' ', '_', '-','.')

    for special in special_characters:
        label = label.strip().replace(special, '')

    return re.sub(r'(?u)[^-\w.]', '', label)


def dir_bidsmap(bidsmap: dict, dataformat: str) -> List[Path]:
    """
    Make a provenance list of all the runs in the bidsmap[dataformat]

    :param bidsmap:     The bidsmap, with all the runs in it
    :param dataformat:  The information source in the bidsmap that is used, e.g. 'DICOM'
    :return:            List of all provenances
    """

    provenance = []
    for modality in bidsmodalities + (unknownmodality, ignoremodality):
        if modality in bidsmap[dataformat] and bidsmap[dataformat][modality]:
            for run in bidsmap[dataformat][modality]:
                if not run['provenance']:
                    logger.warning(f'The bidsmap run {modality} run does not contain provenance data')
                else:
                    provenance.append(Path(run['provenance']))

    provenance.sort()

    return provenance


def get_run(bidsmap: dict, dataformat: str, modality: str, suffix_idx: Union[int, str], sourcefile: Path='') -> dict:
    """
    Find the (first) run in bidsmap[dataformat][bidsmodality] with run['bids']['suffix_idx'] == suffix_idx

    :param bidsmap:     This could be a template bidsmap, with all options, BIDS labels and attributes, etc
    :param dataformat:  The information source in the bidsmap that is used, e.g. 'DICOM'
    :param modality:    The modality in which a matching run is searched for (e.g. 'anat')
    :param suffix_idx:  The name of the suffix that is searched for (e.g. 'bold') or the modality index number
    :param sourcefile:  The name of the sourcefile. If given, the bidsmap values are read from file
    :return:            The clean (filled) run item in the bidsmap[dataformat][bidsmodality] with the matching suffix_idx, otherwise None
    """

    if not dataformat:
        dataformat = get_dataformat(sourcefile)

    for index, run in enumerate(bidsmap[dataformat][modality]):
        if index == suffix_idx or run['bids']['suffix'] == suffix_idx:

            run_ = dict(provenance={}, attributes={}, bids={})

            for attrkey, attrvalue in run['attributes'].items():
                if sourcefile.name:
                    run_['attributes'][attrkey] = get_sourcefield(attrkey, sourcefile, dataformat)
                    run_['provenance']          = str(sourcefile.resolve())
                else:
                    run_['attributes'][attrkey] = attrvalue

            for bidskey, bidsvalue in run['bids'].items():
                if sourcefile.name:
                    run_['bids'][bidskey] = get_dynamic_value(bidsvalue, sourcefile)
                else:
                    run_['bids'][bidskey] = bidsvalue

            return run_

    logger.error(f"'{modality}' run with suffix_idx '{suffix_idx}' not found in bidsmap['{dataformat}']")


def delete_run(bidsmap: dict, dataformat: str, modality: str, provenance: Path) -> dict:
    """
    Delete a run from the BIDS map

    :param bidsmap:     Full bidsmap data structure, with all options, BIDS labels and attributes, etc
    :param dataformat:  The information source in the bidsmap that is used, e.g. 'DICOM'
    :param modality:    The modality that is used, e.g. 'anat'
    :param provenance:  The unique provance that is use to identify the run
    :return:            The new bidsmap
    """

    if not dataformat:
        dataformat = get_dataformat(provenance)

    for index, run in enumerate(bidsmap[dataformat][modality]):
        if run['provenance'] == str(provenance):
            del bidsmap[dataformat][modality][index]

    return bidsmap


def append_run(bidsmap: dict, dataformat: str, modality: str, run: dict, clean: bool=True) -> dict:
    """
    Append a run to the BIDS map

    :param bidsmap:     Full bidsmap data structure, with all options, BIDS labels and attributes, etc
    :param dataformat:  The information source in the bidsmap that is used, e.g. 'DICOM'. If empty then it is determined from the provenance
    :param modality:    The modality that is used, e.g. 'anat'
    :param run:         The run (listitem) that is appended to the modality
    :param clean:       A boolean to clean-up commentedMap fields
    :return:            The new bidsmap
    """

    if not dataformat:
        dataformat = get_dataformat(run['provenance'])

    # Copy the values from the run to an empty dict
    if clean:
        run_ = dict(provenance={}, attributes={}, bids={})

        run_['provenance'] = run['provenance']

        for key, value in run['attributes'].items():
            run_['attributes'][key] = value
        for key, value in run['bids'].items():
            run_['bids'][key] = value

        run = run_

    if bidsmap[dataformat][modality] is None:
        bidsmap[dataformat][modality] = [run]
    else:
        bidsmap[dataformat][modality].append(run)

    return bidsmap


def update_bidsmap(bidsmap: dict, source_modality: str, provenance: Path, target_modality: str, run: dict, dataformat: str, clean: bool=True) -> dict:
    """
    Update the BIDS map if the modality changes:
    1. Remove the source run from the source modality section
    2. Append the (cleaned) target run to the target modality section

    Else:
    1. Use the provenance to look-up the index number in that modality
    2. Replace the run

    :param bidsmap:             Full bidsmap data structure, with all options, BIDS labels and attributes, etc
    :param source_modality:     The current modality name, e.g. 'anat'
    :param provenance:          The unique provance that is use to identify the run
    :param target_modality:     The modality name what is should be, e.g. 'dwi'
    :param run:                 The run item that is being moved
    :param dataformat:          The name of the dataformat, e.g. 'DICOM'
    :param clean:               A boolean that is passed to bids.append_run (telling it to clean-up commentedMap fields)
    :return:
    """

    if not dataformat:
        dataformat = get_dataformat(run['provenance'])

    num_runs_in = len(dir_bidsmap(bidsmap, dataformat))

    # Warn the user if the target run already exists when the run is moved to another modality
    if source_modality!=target_modality:
        if exist_run(bidsmap, dataformat, target_modality, run):
            logger.warning(f'That run from {source_modality} already exists in {target_modality}...')

        # Delete the source run
        bidsmap = delete_run(bidsmap, dataformat, source_modality, provenance)

        # Append the (cleaned-up) target run
        bidsmap = append_run(bidsmap, dataformat, target_modality, run, clean)

    else:
        for index, run_ in enumerate(bidsmap[dataformat][target_modality]):
            if run_['provenance'] == str(provenance):
                bidsmap[dataformat][target_modality][index] = run
                break

    num_runs_out = len(dir_bidsmap(bidsmap, dataformat))
    if num_runs_out != num_runs_in:
        logger.error(f"Number of runs in bidsmap['{dataformat}'] changed unexpectedly: {num_runs_in} -> {num_runs_out}")

    return bidsmap


def match_attribute(longvalue, values) -> bool:
    """
    Compare the value items with / without *wildcard* with the longvalue string. If both longvalue
    and values are a list then they are directly compared as is

    Examples:
        match_attribute('my_pulse_sequence_name', 'name') -> False
        match_attribute('my_pulse_sequence_name', '*name*') -> True
        match_attribute('T1_MPRAGE', '['T1w', 'MPRAGE']') -> False
        match_attribute('T1_MPRAGE', '['T1w', 'T1_MPRAGE']') -> True
        match_attribute('T1_MPRAGE', '['*T1w*', '*MPRAGE*']') -> True

    :param longvalue:   The long string that is being searched in
    :param values:      Either a list with search items or a string that is matched one-to-one
    :return:            True if a match is found or both longvalue and values are identical or
                        empty / None. False otherwise
    """

    # Consider it a match if both longvalue and values are identical or empty / None
    if longvalue==values or (not longvalue and not values):
        return True

    if not longvalue or not values:
        return False

    # Make sure we start with string types
    longvalue = str(longvalue)
    values    = str(values)

    # Interpret attribute lists as lists
    def cast2list(string: str):
        if string.startswith('[') and string.endswith(']'):
            try:
                string = ast.literal_eval(string)
                if not isinstance(string, list):
                    logger.error(f"Attribute value '{string}' is not a list")
            except:
                logger.error(f"Could not interpret attribute value '{string}'")
        return string

    longvalue = cast2list(longvalue)
    values    = cast2list(values)

    # Account for lists in the template (to combine similar mappings)
    if not isinstance(values, list):
        values = [values]

    # If they are both lists, compare them as they are
    elif isinstance(longvalue, list):
        return str(longvalue)==str(values)

    # Compare the value items (with / without wildcard) with the longvalue string items
    if not isinstance(longvalue, list):
        longvalue = [longvalue]
    for value in values:
        if any([fnmatch.fnmatch(str(item), str(value)) for item in longvalue]):
            return True

    return False


def exist_run(bidsmap: dict, dataformat: str, modality: str, run_item: dict, matchbidslabels: bool=False) -> bool:
    """
    Checks if there is already an entry in runlist with the same attributes and, optionally, bids values as in the input run

    :param bidsmap:         Full bidsmap data structure, with all options, BIDS labels and attributes, etc
    :param dataformat:      The information source in the bidsmap that is used, e.g. 'DICOM'
    :param modality:        The modality in the source that is used, e.g. 'anat'. Empty values will search through all modalities
    :param run_item:        The run (listitem) that is searched for in the modality
    :param matchbidslabels: If True, also matches the BIDS-labels, otherwise only run['attributes']
    :return:                True if the run exists in runlist
    """

    if not dataformat:
        dataformat = get_dataformat(run_item['provenance'])

    if not modality:
        for modality in bidsmodalities + (unknownmodality, ignoremodality):
            if exist_run(bidsmap, dataformat, modality, run_item, matchbidslabels):
                return True

    if not bidsmap[dataformat] or not bidsmap[dataformat][modality]:
        return False

    for run in bidsmap[dataformat][modality]:

        # Begin with match = False only if all attributes are empty
        match = any([run_item['attributes'][key] is not None for key in run_item['attributes']])

        # Search for a case where all run_item items match with the run_item items
        for itemkey, itemvalue in run_item['attributes'].items():
            value = run['attributes'].get(itemkey, None)    # Matching bids-labels which exist in one modality but not in the other -> None
            match = match and match_attribute(itemvalue, value)
            if not match:
                break                                       # There is no point in searching further within the run_item now that we've found a mismatch

        # See if the bidslabels also all match. This is probably not very useful, but maybe one day...
        if matchbidslabels and match:
            for itemkey, itemvalue in run_item['bids'].items():
                value = run['bids'].get(itemkey, None)      # Matching bids-labels which exist in one modality but not in the other -> None
                match = match and value==itemvalue
                if not match:
                    break                                   # There is no point in searching further within the run_item now that we've found a mismatch

        # Stop searching if we found a matching run_item (i.e. which is the case if match is still True after all run tests). TODO: maybe count how many instances, could perhaps be useful info
        if match:
            return True

    return False


def get_matching_run(sourcefile: Path, bidsmap: dict, dataformat: str, modalities: tuple = (ignoremodality,) + bidsmodalities + (unknownmodality,)) -> Tuple[dict, str, int]:
    """
    Find the first run in the bidsmap with dicom attributes that match with the dicom file. Then update the (dynamic) bids values (values are cleaned-up to be BIDS-valid)

    :param sourcefile:  The full pathname of the source dicom-file or PAR/XML file
    :param bidsmap:     Full bidsmap data structure, with all options, BIDS labels and attributes, etc
    :param dataformat:  The information source in the bidsmap that is used, e.g. 'DICOM'
    :param modalities:  The modality in which a matching run is searched for. Default = (ignoremodality,) + bidsmodalities + (unknownmodality,)
    :return:            (run, modality, index) The matching and filled-in / cleaned run item, modality and list index as in run = bidsmap[DICOM][modality][index]
                        modality = bids.unknownmodality and index = None if there is no match, the run is still populated with info from the dicom-file
    """

    if not dataformat:
        dataformat = get_dataformat(sourcefile)

    # Loop through all bidsmodalities and runs; all info goes into run_
    run_ = dict(provenance={}, attributes={}, bids={})
    for modality in modalities:

        if bidsmap[dataformat][modality] is None: continue

        for index, run in enumerate(bidsmap[dataformat][modality]):

            run_  = dict(provenance={}, attributes={}, bids={})                                             # The CommentedMap API is not guaranteed for the future so keep this line as an alternative
            match = any([run['attributes'][attrkey] is not None for attrkey in run['attributes']])          # Normally match==True, but make match==False if all attributes are empty

            # Try to see if the sourcefile matches all of the attributes and fill all of them
            for attrkey, attrvalue in run['attributes'].items():

                # Check if the attribute value matches with the info from the sourcefile
                sourcevalue = get_sourcefield(attrkey, sourcefile, dataformat)
                if attrvalue:
                    match = match and match_attribute(sourcevalue, attrvalue)

                # Fill the empty attribute with the info from the sourcefile
                run_['attributes'][attrkey] = sourcevalue

            # Try to fill the bids-labels
            for bidskey, bidsvalue in run['bids'].items():

                # Replace the dynamic bids values
                run_['bids'][bidskey] = get_dynamic_value(bidsvalue, sourcefile)

                # SeriesDescriptions (and ProtocolName?) may get a suffix like '_SBRef' from the vendor, try to strip it off
                run_ = strip_suffix(run_)

            # Stop searching the bidsmap if we have a match. TODO: check if there are more matches (i.e. conflicts)
            if match:
                run_['provenance'] = str(sourcefile.resolve())

                return run_, modality, index

    # We don't have a match (all tests failed, so modality should be the *last* one, i.e. unknownmodality)
    logger.debug(f"Could not find a matching run in the bidsmap for {sourcefile} -> {modality}")
    run_['provenance'] = str(sourcefile.resolve())

    return run_, modality, None


def get_subid_sesid(sourcefile: Path, subid: str= '<<SourceFilePath>>', sesid: str= '<<SourceFilePath>>', subprefix: str= 'sub-', sesprefix: str= 'ses-') -> Tuple[str, str]:
    """
    Extract the cleaned-up subid and sesid from the pathname if subid/sesid == '<<SourceFilePath>>', or from the dicom header

    :param sourcefile: The full pathname of the file. If it is a DICOM file, the sub/ses values are read from the DICOM field
    :param subid:      The subject identifier, i.e. name of the subject folder (e.g. 'sub-001' or just '001') or DICOM field. Can be left empty
    :param sesid:      The optional session identifier, i.e. name of the session folder (e.g. 'ses-01' or just '01') or DICOM field
    :param subprefix:  The optional subprefix (e.g. 'sub-'). Used to parse the sub-value from the provenance as default subid
    :param sesprefix:  The optional sesprefix (e.g. 'ses-'). If it is found in the provenance then a default sesid will be set
    :return:           Updated (subid, sesid) tuple, including the BIDS-compliant sub-/ses-prefix
    """

    # Input checking
    if subprefix not in str(sourcefile):
        logger.warning(f"Could not parse sub/ses-id information from '{sourcefile}': no '{subprefix}' label in its path")
        return '', ''

    # Add default value for subid and sesid (e.g. for the bidseditor)
    if subid=='<<SourceFilePath>>':
        subid = [part for part in sourcefile.parent.parts if part.startswith(subprefix)][-1]
    else:
        subid = get_dynamic_value(subid, sourcefile)
    if sesid=='<<SourceFilePath>>':
        sesid = [part for part in sourcefile.parent.parts if part.startswith(sesprefix)]
        if sesid:
            sesid = sesid[-1]
        else:
            sesid = ''
    else:
        sesid = get_dynamic_value(sesid, sourcefile)

    # Add sub- and ses- prefixes if they are not there
    subid = 'sub-' + cleanup_value(re.sub(f'^{subprefix}', '', subid))
    if sesid:
        sesid = 'ses-' + cleanup_value(re.sub(f'^{sesprefix}', '', sesid))

    return subid, sesid


def get_bidsname(subid: str, sesid: str, modality: str, run: dict, runindex: str= '', subprefix: str= 'sub-', sesprefix: str= 'ses-') -> str:
    """
    Composes a filename as it should be according to the BIDS standard using the BIDS labels in run

    :param subid:       The subject identifier, i.e. name of the subject folder (e.g. 'sub-001' or just '001'). Can be left empty
    :param sesid:       The optional session identifier, i.e. name of the session folder (e.g. 'ses-01' or just '01'). Can be left empty
    :param modality:    The bidsmodality (choose from bids.bidsmodalities)
    :param run:         The run mapping with the BIDS labels
    :param runindex:    The optional runindex label (e.g. 'run-01'). Can be left ''
    :param subprefix:   The optional subprefix (e.g. 'sub-'). Used to parse the sub-value from the provenance as default subid
    :param sesprefix:   The optional sesprefix (e.g. 'ses-'). If it is found in the provenance then a default sesid will be set
    :return:            The composed BIDS file-name (without file-extension)
    """
    assert modality in bidsmodalities + (unknownmodality, ignoremodality)

    # Try to update the sub/ses-ids
    subid, sesid = get_subid_sesid(Path(run['provenance']), subid, sesid, subprefix, sesprefix)

    # Validate and do some checks to allow for dragging the run entries between the different modality-sections
    run = copy.deepcopy(run)                # Avoid side effects when changing run
    for bidslabel in bidslabels:
        if bidslabel not in run['bids']:
            run['bids'][bidslabel] = None
        else:
            run['bids'][bidslabel] = cleanup_value(get_dynamic_value(run['bids'][bidslabel], Path(run['provenance'])))

    # Use the clean-up runindex
    if not runindex:
        runindex = run['bids']['run']

    # Compose the BIDS filename (-> switch statement)
    if modality == 'anat':

        # bidsname: sub-<participant_label>[_ses-<session_label>][_acq-<label>][_ce-<label>][_rec-<label>][_run-<index>][_mod-<label>]_suffix
        bidsname = '{sub}{_ses}{_acq}{_ce}{_rec}{_run}{_mod}_{suffix}'.format(
            sub     = subid,
            _ses    = add_prefix('_', sesid),
            _acq    = add_prefix('_acq-', run['bids']['acq']),
            _ce     = add_prefix('_ce-',  run['bids']['ce']),
            _rec    = add_prefix('_rec-', run['bids']['rec']),
            _run    = add_prefix('_run-', runindex),
            _mod    = add_prefix('_mod-', run['bids']['mod']),
            suffix  = run['bids']['suffix'])

    elif modality == 'func':

        # bidsname: sub-<label>[_ses-<label>]_task-<label>[_acq-<label>][_ce-<label>][_dir-<label>][_rec-<label>][_run-<index>][_echo-<index>]_<contrast_label>.nii[.gz]
        bidsname = '{sub}{_ses}_{task}{_acq}{_ce}{_dir}{_rec}{_run}{_echo}_{suffix}'.format(
            sub     = subid,
            _ses    = add_prefix('_', sesid),
            task    = f"task-{run['bids']['task']}",
            _acq    = add_prefix('_acq-',  run['bids']['acq']),
            _ce     = add_prefix('_ce-',   run['bids']['ce']),
            _dir    = add_prefix('_dir-',  run['bids']['dir']),
            _rec    = add_prefix('_rec-',  run['bids']['rec']),
            _run    = add_prefix('_run-',  runindex),
            _echo   = add_prefix('_echo-', run['bids']['echo']),
            suffix  = run['bids']['suffix'])

    elif modality == 'dwi':

        # bidsname: sub-<label>[_ses-<label>][_acq-<label>][_dir-<label>][_run-<index>]_dwi.nii[.gz]
        bidsname = '{sub}{_ses}{_acq}{_dir}{_run}_{suffix}'.format(
            sub     = subid,
            _ses    = add_prefix('_', sesid),
            _acq    = add_prefix('_acq-', run['bids']['acq']),
            _dir    = add_prefix('_dir-', run['bids']['dir']),
            _run    = add_prefix('_run-', runindex),
            suffix  = run['bids']['suffix'])

    elif modality == 'fmap':

        # TODO: add more fieldmap logic?

        # bidsname: sub-<label>[_ses-<label>][_acq-<label>][_ce-<label>]_dir-<label>[_run-<index>]_epi.nii[.gz]
        bidsname = '{sub}{_ses}{_acq}{_ce}{_dir}{_run}_{suffix}'.format(
            sub     = subid,
            _ses    = add_prefix('_', sesid),
            _acq    = add_prefix('_acq-', run['bids']['acq']),
            _ce     = add_prefix('_ce-',  run['bids']['ce']),
            _dir    = add_prefix('_dir-', run['bids']['dir']),
            _run    = add_prefix('_run-', runindex),
            suffix  = run['bids']['suffix'])

    elif modality == 'beh':

        # bidsname: sub-<participant_label>[_ses-<session_label>]_task-<task_name>_suffix
        bidsname = '{sub}{_ses}_{task}_{suffix}'.format(
            sub     = subid,
            _ses    = add_prefix('_', sesid),
            task    = f"task-{run['bids']['task']}",
            suffix  = run['bids']['suffix'])

    elif modality == 'pet':

        # bidsname: sub-<participant_label>[_ses-<session_label>]_task-<task_label>[_acq-<label>][_rec-<label>][_run-<index>]_suffix
        bidsname = '{sub}{_ses}_{task}{_acq}{_rec}{_run}_{suffix}'.format(
            sub     = subid,
            _ses    = add_prefix('_', sesid),
            task    = f"task-{run['bids']['task']}",
            _acq    = add_prefix('_acq-', run['bids']['acq']),
            _rec    = add_prefix('_rec-', run['bids']['rec']),
            _run    = add_prefix('_run-', runindex),
            suffix  = run['bids']['suffix'])

    elif modality == unknownmodality or modality == ignoremodality:

        # bidsname: sub-<participant_label>[_ses-<session_label>]_acq-<label>[..][_suffix]
        bidsname = '{sub}{_ses}{_task}_{acq}{_ce}{_rec}{_dir}{_run}{_echo}{_mod}{_suffix}'.format(
            sub     = subid,
            _ses    = add_prefix('_', sesid),
            _task   = add_prefix('_task-', run['bids']['task']),
            acq     = f"acq-{run['bids']['acq']}",
            _ce     = add_prefix('_ce-',   run['bids']['ce']),
            _rec    = add_prefix('_rec-',  run['bids']['rec']),
            _dir    = add_prefix('_dir-',  run['bids']['dir']),
            _run    = add_prefix('_run-',  runindex),
            _echo   = add_prefix('_echo-', run['bids']['echo']),
            _mod    = add_prefix('_mod-',  run['bids']['mod']),
            _suffix = add_prefix('_',      run['bids']['suffix']))

    else:
        raise ValueError(f'Critical error: modality "{modality}" not implemented, please inform the developers about this error')

    return bidsname


def get_dynamic_value(bidsvalue: str, sourcefile: Path) -> str:
    """
    Replaces (dynamic) bidsvalues with (DICOM) run attributes when they start with '<' and end with '>',
    but not with '<<' and '>>'

    :param bidsvalue:   The value from the BIDS key-value pair
    :param sourcefile:  The source (e.g. DICOM or PAR/XML) file from which the attribute is read
    :return:            Updated bidsvalue (if possible, otherwise the original bidsvalue is returned)
    """

    # Intelligent filling of the value is done runtime by bidscoiner
    if not bidsvalue or not isinstance(bidsvalue, str) or bidsvalue.startswith('<<') and bidsvalue.endswith('>>'):
        return bidsvalue

    # Fill any bids-label with the <annotated> dicom attribute
    if bidsvalue.startswith('<') and bidsvalue.endswith('>') and sourcefile.name:
        sourcevalue = get_sourcefield(bidsvalue[1:-1], sourcefile)
        if not sourcevalue:
            return bidsvalue
        else:
            bidsvalue = cleanup_value(str(sourcevalue))

    return bidsvalue


def get_bidsvalue(bidsfile: Union[str, Path], bidskey: str, newvalue: str= '') -> Union[Path, str]:
    """
    Sets the bidslabel, i.e. '*_bidskey-*_' is replaced with '*_bidskey-bidsvalue_'. If the key is not in the bidsname
    then the newvalue is appended to the acquisition label. If newvalue is empty (= default), then the parsed existing
    bidsvalue is returned and nothing is set

    :param bidsfile:    The bidsname (e.g. as returned from get_bidsname or fullpath)
    :param bidskey:     The name of the bidskey, e.g. 'echo' or 'suffix'
    :param newvalue:    The new bidsvalue. NB: remove non-BIDS compliant characters beforehand (e.g. using cleanup_value)
    :return:            The bidsname with the new bidsvalue or, if newvalue is empty, the existing bidsvalue
    """

    bidspath = Path(bidsfile).parent
    bidsname = Path(bidsfile).with_suffix('').stem
    bidsext  = ''.join(Path(bidsfile).suffixes)

    # Get the existing bidsvalue
    oldvalue = ''
    acqvalue = ''
    if bidskey=='suffix':
        oldvalue = bidsname.split('_')[-1]
    else:
        for label in bidsname.split('_'):
            if '-' in label:
                key, value = label.split('-', 1)
                if key==bidskey:
                    oldvalue = value
                if key=='acq':
                    acqvalue = value

    # Replace the existing bidsvalue with the new value or append the newvalue to the acquisition value
    if newvalue:
        if f'_{bidskey}-' not in bidsname + 'suffix':
            if '_acq-' not in bidsname:         # Insert the 'acq' key right after the sub/ses key-value pairs
                keyval = bidsname.split('_')
                if get_bidsvalue(bidsname, 'ses'):
                    keyval.insert(2, 'acq-')
                else:
                    keyval.insert(1, 'acq-')
                bidsname = '_'.join(keyval)
            bidskey  = 'acq'
            oldvalue = acqvalue
            newvalue = acqvalue + newvalue

        # Return the updated bidsfile
        if bidskey=='suffix':
            newbidsfile = (bidspath/(bidsname.replace(f'_{oldvalue}', f'_{newvalue}'))).with_suffix(bidsext)
        else:
            newbidsfile = (bidspath/(bidsname.replace(f'{bidskey}-{oldvalue}', f'{bidskey}-{newvalue}'))).with_suffix(bidsext)
        if isinstance(bidsfile, str):
            newbidsfile = str(newbidsfile)
        return newbidsfile

    # Or just return the parsed old bidsvalue
    else:
        return oldvalue


def increment_runindex(bidsfolder: Path, bidsname: str, ext: str='.*') -> Union[Path, str]:
    """
    Checks if a file with the same the bidsname already exists in the folder and then increments the runindex (if any)
    until no such file is found

    :param bidsfolder:  The full pathname of the bidsfolder
    :param bidsname:    The bidsname with a provisional runindex
    :param ext:         The file extension for which the runindex is incremented (default = '.*')
    :return:            The bidsname with the incremented runindex
    """

    while list(bidsfolder.glob(bidsname + ext)):

        runindex = get_bidsvalue(bidsname, 'run')
        if runindex:
            bidsname = get_bidsvalue(bidsname, 'run', str(int(runindex) + 1))

    return bidsname