Source code for apyt.io.conv

"""
The APyT file format conversion module
======================================

APT data can exist in multiple file formats---often representing the same
measurement dataset, but stored differently (e.g., as raw binary or decoded
ASCII). This module provides easy-to-use
:ref:`functions<apyt.io.conv:List of functions>` to convert between various file
formats commonly encountered in atom probe tomography (APT) workflows. It
enables standardized preprocessing and ensures compatibility across software
tools within the APyT ecosystem.


Raw file format
---------------

The APT group at the University of Stuttgart uses a binary file format to record
APT measurements. Each file entry corresponds to a single evaporation event and
follows **Little Endian** byte ordering.

The binary format includes the following fields:

============  =========  =================================
Field         Data type  Description
============  =========  =================================
U_base        float32    base voltage (V)
U_pulse       float32    pulse voltage (V)
U_reflectron  float32    reflectron voltage (V)
x_det         float32    `x` detector position (mm)
y_det         float32    `y` detector position (mm)
tof           float32    time of flight (ns)
epoch         int32      epoch of evaporation event
pulse_num     uint32     pulse number of evaporation event
============  =========  =================================


List of functions
-----------------

The following functions are available for format conversion:

* :func:`epos_to_raw`: Convert an ePOS file to a RAW file.
* :func:`raw_to_ascii`: Convert a raw measurement file to a human-readable ASCII
  file.
* :func:`tapsim_to_raw`: Convert |TAPSim| ASCII file to raw file.


.. |TAPSim| raw:: html

    <a href="https://git.mp.imw.uni-stuttgart.de/cgit.cgi/tapsim.git"
    target="_blank">TAPSim</a>


.. sectionauthor:: Sebastian M. Eich <Sebastian.Eich@imw.uni-stuttgart.de>
.. codeauthor::    Jianshu Zheng <zheng.jianshu@mp.imw.uni-stuttgart.de>
.. codeauthor::    Sebastian M. Eich <Sebastian.Eich@imw.uni-stuttgart.de>
"""
#
#
#
#
__version__ = "0.1.0"
__all__ = ["epos_to_raw", "raw_to_ascii", "tapsim_to_raw"]
#
#
#
#
# import modules
import logging
import numpy as np
import warnings
#
# import some special functions
from apyt.io.config import _EPOS_FILE_DTYPE, _RAW_FILE_DTYPE
from datetime import datetime
from pathlib import Path
from struct import pack, unpack
#
#
#
#
# set up logger
logger = logging.getLogger(__name__)
#
#
#
#
################################################################################
#
# private module-level variables
#
################################################################################
_bin_fmt = "<ffffffiI"
"""str : The format of the binary data per measured event."""
#
#
#
#
################################################################################
#
# public functions
#
################################################################################
[docs]def epos_to_raw(epos_file, raw_file = None): """ Convert an ePOS file to a RAW file. This function reads an input ePOS file, maps overlapping fields between the ePOS and RAW data types, and writes the converted data to a binary RAW file. Fields that exist in both formats are copied directly. Parameters ---------- epos_file : str or Path Path to the input ePOS file. raw_file : str or Path, optional Path to the output RAW file. If not provided, it will be generated automatically by replacing the extension of `epos_file` with `.raw`. Returns ------- Path or None Path to the generated RAW file, or ``None`` if the input file does not exist. Warns ----- UserWarning If the input ePOS file does not exist. """ # # # check existence of input ePOS file epos_file = Path(epos_file) if not epos_file.is_file(): warnings.warn( f"Input ePOS file \"{epos_file}\" does not exist.", UserWarning ) return None # # load input ePOS file logger.info(f"Reading ePOS file \"{epos_file}\".") data_in = np.fromfile(epos_file, dtype = _EPOS_FILE_DTYPE) # # # copy overlapping fields data_out = np.zeros(len(data_in), dtype = _RAW_FILE_DTYPE) for name in (set(_EPOS_FILE_DTYPE.names) & set(_RAW_FILE_DTYPE.names)): data_out[name] = data_in[name] # # # set raw file name if not provided if raw_file is None: raw_file = epos_file.with_suffix(".raw") else: raw_file = Path(raw_file) # # # write raw output file logger.info(f"Writing raw file \"{raw_file}\".") data_out.tofile(raw_file) # # # return path to output file return raw_file
# # # #
[docs]def raw_to_ascii(raw_file, ascii_file): """Convert a raw measurement file to a human-readable ASCII file. This function enables the conversion from a raw measurement file to a human-readable ASCII file. The binary file is read in chunks of 32 bytes, (representing one evaporation event), decoded into the respective data types, and written to an ASCII text file. Parameters ---------- raw_file : str The name of the raw file. ascii_file : str The name of the ASCII file. """ # # # get binary data from file print("Reading binary file \"{0:s}\" ...".format(raw_file)) data = np.fromfile(raw_file, dtype = np.dtype('V32')).tolist() # # # open file for output print("Writing ASCII file \"{0:s}\" ...".format(ascii_file)) with open(ascii_file, 'w') as f: # write header f.write("# U_base (V)\tU_pulse (V)\tU_reflectron (V)\t" "x_det (mm)\ty_det (mm)\ttof (ns)\tepoch\t\tpulse_num\n") # # set format string fmt = "%9.3f\t%8.3f\t%7.1f\t\t\t%+11.6f\t%+11.6f\t%8.3f\t%d\t" \ "%10d\n" # # convert binary data and write to file [f.write(fmt % unpack(_bin_fmt, i)) for i in data]
# # # #
[docs]def tapsim_to_raw(tapsim_file, raw_file, id_range_list): """Convert TAPSim ASCII file to raw file. This function enables the conversion from a |TAPSim| ASCII file to a raw file for further processing (e.g. reconstruction). A certain subset of columns is imported from the TAPSim file, manipulated accordingly to match the :ref:`raw file format<apyt.io.conv:Raw file format>`, and eventually written to a binary file. A constant base voltage is used for all events and the time of flight is arranged such that it is constant for one distinct species. The epoch is set to a constant time plus 1 event/s, the pulse number corresponds to the evaporation event. The conversion is illustrated in the following table: ============ ========= ==================== ============================= Raw file Data type TAPSim file Comment ============ ========= ==================== ============================= U_base float32 5000 V constant U_pulse float32 0 zero U_reflectron float32 0 zero x_det float32 col. 7 conversion from meter to millimeter y_det float32 col. 8 conversion from meter to millimeter tof float32 constant per species constant for one species, separation 50 ns epoch int32 946681200 + event (2000-01-01 00:00:00) + 1 event/s pulse_num uint32 0, 1, 2, ... corresponds to evaporation event ============ ========= ==================== ============================= Parameters ---------- tapsim_file: str The name of the TAPSim file. raw_file: str The name of the raw file. id_range_list: list The list of id ranges used for mapping the atomic species, each of type `tuple` of length 2, specifying the respective minimum and maximum id. """ # # # load evaporation index, atomic id, and detector xy-position from TAPSim # file print("Reading TAPSim file \"{0:s}\" ...".format(tapsim_file)) data = np.loadtxt(tapsim_file, skiprows = 46, usecols = (0, 1, 7, 8)) # # # filter entries with nan values for detector position length_init = len(data) data = data[~(np.isnan(data[:, 2]) | np.isnan(data[:, 3]))] if length_init != len(data): warnings.warn("{0:d} events with invalid detector positions (nan) have " "been removed.".format(length_init - len(data))) # # # initialize empty array for mapped atomic ids id = np.full(len(data), -1, dtype = int) # # loop through id ranges for id_range in id_range_list: # set index of current id range i = id_range_list.index(id_range) # # map atomic id if in current range id = np.where( (id_range[0] <= data[:, 1]) & (data[:, 1] <= id_range[-1]), i, id) # # check whether all ids have been mapped if np.count_nonzero(id == -1) > 0: raise Exception("Unspecified id detected. Please check your id ranges " "to cover all occurring ids ({0:d}, {1:d}).".format( int(data[:, 1].min()), int(data[:, 1].max()))) # # # set arbitrary timestamp required in raw file epoch = datetime(2000, 1, 1, 0, 0, 0).timestamp() # # # set voltages voltage = (5000.0, 0.0, 0.0) # # # set data types for structured array dt = np.dtype([ ('x_det', np.float32), ('y_det', np.float32), ('tof', np.float32), ('epoch', np.int32), ('pulse_num', np.uint32)]) # # create and fill structured array data_str = np.empty((len(data)), dtype = dt) data_str['x_det'] = data[:, 2] * 1000 # m to mm data_str['y_det'] = data[:, 3] * 1000 # m to mm data_str['tof'] = id * 50.0 + 50.0 # tof grouped by atomic id data_str['epoch'] = data[:, 0] + epoch # event id plus time offset data_str['pulse_num'] = data[:, 0] # event id # # convert structured array to list for faster iterator data_l = data_str.tolist() # # # open output file for writing print("Writing binary file \"{0:s}\" ...".format(raw_file)) with open(raw_file, 'wb') as f: # loop through events [f.write(pack(_bin_fmt, *voltage, *i)) for i in data_l]