Source code for est.io.utils.read

import logging
from typing import Optional, List

import h5py
import numpy
import silx.io.h5py_utils
import silx.io.utils
from silx.io.url import DataUrl
from silx.io.dictdump import h5todict

from est import settings
from est.units import ur
from est.core.types import Spectrum

from . import ascii
from .readers import fabio_reader
from .readers import silx_reader
from .ascii import split_ascii_url

_logger = logging.getLogger(__name__)



[docs]
@silx.io.h5py_utils.retry(retry_timeout=settings.DEFAULT_READ_TIMEOUT)
def get_data_from_url(url) -> numpy.ndarray:
    """Returns a numpy data from an URL.

    Examples:

    >>> # 1st frame from an EDF using silx.io.open
    >>> data = silx.io.get_data("silx:/users/foo/image.edf::/scan_0/instrument/detector_0/data[0]")

    >>> # 1st frame from an EDF using fabio
    >>> data = silx.io.get_data("fabio:/users/foo/image.edf::[0]")

    Yet 2 schemes are supported by the function.

    - If `silx` scheme is used, the file is opened using
        :meth:`silx.io.open`
        and the data is reach using usually NeXus paths.
    - If `fabio` scheme is used, the file is opened using :meth:`fabio.open`
        from the FabIO library.
        No data path have to be specified, but each frames can be accessed
        using the data slicing.
        This shortcut of :meth:`silx.io.open` allow to have a faster access to
        the data.

    .. seealso:: :class:`silx.io.url.DataUrl`

    :param Union[str,silx.io.url.DataUrl]: A data URL
    :rtype: Union[numpy.ndarray, numpy.generic]
    :raises ImportError: If the mandatory library to read the file is not
        available.
    :raises ValueError: If the URL is not valid or do not match the data
    :raises IOError: If the file is not found or in case of internal error of
        :meth:`fabio.open` or :meth:`silx.io.open`. In this last case more
        informations are displayed in debug mode.
    """
    if not isinstance(url, silx.io.url.DataUrl):
        url = silx.io.url.DataUrl(url)
    if not url.is_valid():
        raise ValueError("URL '%s' is not valid" % url.path())
    if url.scheme() == "silx":
        return silx_reader.get_data(url)
    if url.scheme() == "fabio":
        return fabio_reader.get_data(url)
    raise ValueError("Scheme '%s' not supported" % url.scheme())




[docs]
def get_ascii_data(
    url: DataUrl,
    name: str,
    columns_names: Optional[dict] = None,
    energy_unit=ur.eV,
) -> numpy.ndarray:
    scheme = url.scheme().lower()
    if scheme in ("ascii", "spec", "pymca", "pymca5", "larch", "xraylarch"):
        energy, mu = ascii.read_spectrum(
            url.file_path(),
            energy_col_name=columns_names["energy"] if columns_names else None,
            absorption_col_name=columns_names["mu"] if columns_names else None,
            monitor_col_name=columns_names["monitor"] if columns_names else None,
            scan_title=split_ascii_url(url)["scan_title"],
            energy_unit=energy_unit,
            scheme=scheme,
        )
        if name == "spectra":
            mu = numpy.ascontiguousarray(mu[:])
            return mu.reshape(-1, 1, 1)
        return energy
    raise ValueError("Scheme '%s' not supported" % url.scheme())




[docs]
@silx.io.h5py_utils.retry(retry_timeout=settings.DEFAULT_READ_TIMEOUT)
def get_est_data(url) -> List[Spectrum]:
    spectra = []
    with silx.io.h5py_utils.File(url.file_path(), "r") as hdf5:
        # get all possible entries
        entries = filter(
            lambda x: isinstance(hdf5[x], h5py.Group)
            and "est_saving_pt" in hdf5[x].keys(),
            hdf5.keys(),
        )
        entries = list(entries)
        if len(entries) == 0:
            _logger.error("no spectra dataset found in the file", url.file_path())
            return

        if len(entries) > 1:
            _logger.warning(
                "several entry detected, only one will be loaded:", entries[0]
            )
        spectra_path = "/".join((entries[0], "est_saving_pt", "spectra"))
        node_spectra = hdf5[spectra_path]
        spectrum_indexes = list(node_spectra.keys())
        spectrum_indexes = list(map(lambda x: int(x), spectrum_indexes))
        spectrum_indexes.sort()

    for index in spectrum_indexes:
        spectrum_path = "/".join((spectra_path, str(index)))
        dict_ = h5todict(h5file=url.file_path(), path=spectrum_path, asarray=False)
        spectrum = Spectrum.from_dict(dict_)
        spectra.append(spectrum)
    return spectra