Source code for est.io.utils.read
import logging
from typing import Optional, List
import h5py
import numpy
import silx.io.h5py_utils
import silx.io.utils
from silx.io.url import DataUrl
from silx.io.dictdump import h5todict
from est import settings
from est.units import ur
from est.core.types import Spectrum
from . import ascii
from .readers import fabio_reader
from .readers import silx_reader
from .ascii import split_ascii_url
_logger = logging.getLogger(__name__)
[docs]
@silx.io.h5py_utils.retry(retry_timeout=settings.DEFAULT_READ_TIMEOUT)
def get_data_from_url(url) -> numpy.ndarray:
"""Returns a numpy data from an URL.
Examples:
>>> # 1st frame from an EDF using silx.io.open
>>> data = silx.io.get_data("silx:/users/foo/image.edf::/scan_0/instrument/detector_0/data[0]")
>>> # 1st frame from an EDF using fabio
>>> data = silx.io.get_data("fabio:/users/foo/image.edf::[0]")
Yet 2 schemes are supported by the function.
- If `silx` scheme is used, the file is opened using
:meth:`silx.io.open`
and the data is reach using usually NeXus paths.
- If `fabio` scheme is used, the file is opened using :meth:`fabio.open`
from the FabIO library.
No data path have to be specified, but each frames can be accessed
using the data slicing.
This shortcut of :meth:`silx.io.open` allow to have a faster access to
the data.
.. seealso:: :class:`silx.io.url.DataUrl`
:param Union[str,silx.io.url.DataUrl]: A data URL
:rtype: Union[numpy.ndarray, numpy.generic]
:raises ImportError: If the mandatory library to read the file is not
available.
:raises ValueError: If the URL is not valid or do not match the data
:raises IOError: If the file is not found or in case of internal error of
:meth:`fabio.open` or :meth:`silx.io.open`. In this last case more
informations are displayed in debug mode.
"""
if not isinstance(url, silx.io.url.DataUrl):
url = silx.io.url.DataUrl(url)
if not url.is_valid():
raise ValueError("URL '%s' is not valid" % url.path())
if url.scheme() == "silx":
return silx_reader.get_data(url)
if url.scheme() == "fabio":
return fabio_reader.get_data(url)
raise ValueError("Scheme '%s' not supported" % url.scheme())
[docs]
def get_ascii_data(
url: DataUrl,
name: str,
columns_names: Optional[dict] = None,
energy_unit=ur.eV,
) -> numpy.ndarray:
scheme = url.scheme().lower()
if scheme in ("ascii", "spec", "pymca", "pymca5", "larch", "xraylarch"):
energy, mu = ascii.read_spectrum(
url.file_path(),
energy_col_name=columns_names["energy"] if columns_names else None,
absorption_col_name=columns_names["mu"] if columns_names else None,
monitor_col_name=columns_names["monitor"] if columns_names else None,
scan_title=split_ascii_url(url)["scan_title"],
energy_unit=energy_unit,
scheme=scheme,
)
if name == "spectra":
mu = numpy.ascontiguousarray(mu[:])
return mu.reshape(-1, 1, 1)
return energy
raise ValueError("Scheme '%s' not supported" % url.scheme())
[docs]
@silx.io.h5py_utils.retry(retry_timeout=settings.DEFAULT_READ_TIMEOUT)
def get_est_data(url) -> List[Spectrum]:
spectra = []
with silx.io.h5py_utils.File(url.file_path(), "r") as hdf5:
# get all possible entries
entries = filter(
lambda x: isinstance(hdf5[x], h5py.Group)
and "est_saving_pt" in hdf5[x].keys(),
hdf5.keys(),
)
entries = list(entries)
if len(entries) == 0:
_logger.error("no spectra dataset found in the file", url.file_path())
return
if len(entries) > 1:
_logger.warning(
"several entry detected, only one will be loaded:", entries[0]
)
spectra_path = "/".join((entries[0], "est_saving_pt", "spectra"))
node_spectra = hdf5[spectra_path]
spectrum_indexes = list(node_spectra.keys())
spectrum_indexes = list(map(lambda x: int(x), spectrum_indexes))
spectrum_indexes.sort()
for index in spectrum_indexes:
spectrum_path = "/".join((spectra_path, str(index)))
dict_ = h5todict(h5file=url.file_path(), path=spectrum_path, asarray=False)
spectrum = Spectrum.from_dict(dict_)
spectra.append(spectrum)
return spectra