"""
The APyT file format conversion module
======================================
APT data can exist in multiple file formats---often representing the same
measurement dataset, but stored differently (e.g., as raw binary or decoded
ASCII). This module provides easy-to-use
:ref:`functions<apyt.io.conv:List of functions>` to convert between various file
formats commonly encountered in atom probe tomography (APT) workflows. It
enables standardized preprocessing and ensures compatibility across software
tools within the APyT ecosystem.
Raw file format
---------------
The APT group at the University of Stuttgart uses a binary file format to record
APT measurements. Each file entry corresponds to a single evaporation event and
follows **Little Endian** byte ordering.
The binary format includes the following fields:
============ ========= =================================
Field Data type Description
============ ========= =================================
U_base float32 base voltage (V)
U_pulse float32 pulse voltage (V)
U_reflectron float32 reflectron voltage (V)
x_det float32 `x` detector position (mm)
y_det float32 `y` detector position (mm)
tof float32 time of flight (ns)
epoch int32 epoch of evaporation event
pulse_num uint32 pulse number of evaporation event
============ ========= =================================
List of functions
-----------------
The following functions are available for format conversion:
* :func:`epos_to_raw`: Convert an ePOS file to a RAW file.
* :func:`raw_to_ascii`: Convert a raw measurement file to a human-readable ASCII
file.
* :func:`tapsim_to_raw`: Convert |TAPSim| ASCII file to raw file.
.. |TAPSim| raw:: html
<a href="https://git.mp.imw.uni-stuttgart.de/cgit.cgi/tapsim.git"
target="_blank">TAPSim</a>
.. sectionauthor:: Sebastian M. Eich <Sebastian.Eich@imw.uni-stuttgart.de>
.. codeauthor:: Jianshu Zheng <zheng.jianshu@mp.imw.uni-stuttgart.de>
.. codeauthor:: Sebastian M. Eich <Sebastian.Eich@imw.uni-stuttgart.de>
"""
#
#
#
#
__version__ = "0.1.0"
__all__ = ["epos_to_raw", "raw_to_ascii", "tapsim_to_raw"]
#
#
#
#
# import modules
import logging
import numpy as np
import warnings
#
# import some special functions
from apyt.io.config import _EPOS_FILE_DTYPE, _RAW_FILE_DTYPE
from datetime import datetime
from pathlib import Path
from struct import pack, unpack
#
#
#
#
# set up logger
logger = logging.getLogger(__name__)
#
#
#
#
################################################################################
#
# private module-level variables
#
################################################################################
_bin_fmt = "<ffffffiI"
"""str : The format of the binary data per measured event."""
#
#
#
#
################################################################################
#
# public functions
#
################################################################################
[docs]def epos_to_raw(epos_file, raw_file = None):
"""
Convert an ePOS file to a RAW file.
This function reads an input ePOS file, maps overlapping fields between the
ePOS and RAW data types, and writes the converted data to a binary RAW file.
Fields that exist in both formats are copied directly.
Parameters
----------
epos_file : str or Path
Path to the input ePOS file.
raw_file : str or Path, optional
Path to the output RAW file. If not provided, it will be generated
automatically by replacing the extension of `epos_file` with `.raw`.
Returns
-------
Path or None
Path to the generated RAW file, or ``None`` if the input file does not
exist.
Warns
-----
UserWarning
If the input ePOS file does not exist.
"""
#
#
# check existence of input ePOS file
epos_file = Path(epos_file)
if not epos_file.is_file():
warnings.warn(
f"Input ePOS file \"{epos_file}\" does not exist.", UserWarning
)
return None
#
# load input ePOS file
logger.info(f"Reading ePOS file \"{epos_file}\".")
data_in = np.fromfile(epos_file, dtype = _EPOS_FILE_DTYPE)
#
#
# copy overlapping fields
data_out = np.zeros(len(data_in), dtype = _RAW_FILE_DTYPE)
for name in (set(_EPOS_FILE_DTYPE.names) & set(_RAW_FILE_DTYPE.names)):
data_out[name] = data_in[name]
#
#
# set raw file name if not provided
if raw_file is None:
raw_file = epos_file.with_suffix(".raw")
else:
raw_file = Path(raw_file)
#
#
# write raw output file
logger.info(f"Writing raw file \"{raw_file}\".")
data_out.tofile(raw_file)
#
#
# return path to output file
return raw_file
#
#
#
#
[docs]def raw_to_ascii(raw_file, ascii_file):
"""Convert a raw measurement file to a human-readable ASCII file.
This function enables the conversion from a raw measurement file to a
human-readable ASCII file. The binary file is read in chunks of 32 bytes,
(representing one evaporation event), decoded into the respective data
types, and written to an ASCII text file.
Parameters
----------
raw_file : str
The name of the raw file.
ascii_file : str
The name of the ASCII file.
"""
#
#
# get binary data from file
print("Reading binary file \"{0:s}\" ...".format(raw_file))
data = np.fromfile(raw_file, dtype = np.dtype('V32')).tolist()
#
#
# open file for output
print("Writing ASCII file \"{0:s}\" ...".format(ascii_file))
with open(ascii_file, 'w') as f:
# write header
f.write("# U_base (V)\tU_pulse (V)\tU_reflectron (V)\t"
"x_det (mm)\ty_det (mm)\ttof (ns)\tepoch\t\tpulse_num\n")
#
# set format string
fmt = "%9.3f\t%8.3f\t%7.1f\t\t\t%+11.6f\t%+11.6f\t%8.3f\t%d\t" \
"%10d\n"
#
# convert binary data and write to file
[f.write(fmt % unpack(_bin_fmt, i)) for i in data]
#
#
#
#
[docs]def tapsim_to_raw(tapsim_file, raw_file, id_range_list):
"""Convert TAPSim ASCII file to raw file.
This function enables the conversion from a |TAPSim| ASCII file to a raw
file for further processing (e.g. reconstruction). A certain subset of
columns is imported from the TAPSim file, manipulated accordingly to match
the :ref:`raw file format<apyt.io.conv:Raw file format>`, and eventually
written to a binary file. A constant base voltage is used for all events and
the time of flight is arranged such that it is constant for one distinct
species. The epoch is set to a constant time plus 1 event/s, the pulse
number corresponds to the evaporation event.
The conversion is illustrated in the following table:
============ ========= ==================== =============================
Raw file Data type TAPSim file Comment
============ ========= ==================== =============================
U_base float32 5000 V constant
U_pulse float32 0 zero
U_reflectron float32 0 zero
x_det float32 col. 7 conversion from meter to
millimeter
y_det float32 col. 8 conversion from meter to
millimeter
tof float32 constant per species constant for one species,
separation 50 ns
epoch int32 946681200 + event (2000-01-01 00:00:00) +
1 event/s
pulse_num uint32 0, 1, 2, ... corresponds to evaporation
event
============ ========= ==================== =============================
Parameters
----------
tapsim_file: str
The name of the TAPSim file.
raw_file: str
The name of the raw file.
id_range_list: list
The list of id ranges used for mapping the atomic species, each of type
`tuple` of length 2, specifying the respective minimum and maximum id.
"""
#
#
# load evaporation index, atomic id, and detector xy-position from TAPSim
# file
print("Reading TAPSim file \"{0:s}\" ...".format(tapsim_file))
data = np.loadtxt(tapsim_file, skiprows = 46, usecols = (0, 1, 7, 8))
#
#
# filter entries with nan values for detector position
length_init = len(data)
data = data[~(np.isnan(data[:, 2]) | np.isnan(data[:, 3]))]
if length_init != len(data):
warnings.warn("{0:d} events with invalid detector positions (nan) have "
"been removed.".format(length_init - len(data)))
#
#
# initialize empty array for mapped atomic ids
id = np.full(len(data), -1, dtype = int)
#
# loop through id ranges
for id_range in id_range_list:
# set index of current id range
i = id_range_list.index(id_range)
#
# map atomic id if in current range
id = np.where(
(id_range[0] <= data[:, 1]) & (data[:, 1] <= id_range[-1]), i, id)
#
# check whether all ids have been mapped
if np.count_nonzero(id == -1) > 0:
raise Exception("Unspecified id detected. Please check your id ranges "
"to cover all occurring ids ({0:d}, {1:d}).".format(
int(data[:, 1].min()), int(data[:, 1].max())))
#
#
# set arbitrary timestamp required in raw file
epoch = datetime(2000, 1, 1, 0, 0, 0).timestamp()
#
#
# set voltages
voltage = (5000.0, 0.0, 0.0)
#
#
# set data types for structured array
dt = np.dtype([
('x_det', np.float32), ('y_det', np.float32), ('tof', np.float32),
('epoch', np.int32), ('pulse_num', np.uint32)])
#
# create and fill structured array
data_str = np.empty((len(data)), dtype = dt)
data_str['x_det'] = data[:, 2] * 1000 # m to mm
data_str['y_det'] = data[:, 3] * 1000 # m to mm
data_str['tof'] = id * 50.0 + 50.0 # tof grouped by atomic id
data_str['epoch'] = data[:, 0] + epoch # event id plus time offset
data_str['pulse_num'] = data[:, 0] # event id
#
# convert structured array to list for faster iterator
data_l = data_str.tolist()
#
#
# open output file for writing
print("Writing binary file \"{0:s}\" ...".format(raw_file))
with open(raw_file, 'wb') as f:
# loop through events
[f.write(pack(_bin_fmt, *voltage, *i)) for i in data_l]