"""
The APyT local composition module
=================================
The `apyt.analysis.chemistry.locomp` module provides tools for evaluating local
chemical compositions within spherical sub-volumes of a three-dimensional atom
probe data set. This is particularly useful for analyzing spatial variations in
composition at the nanoscale.
The input data must contain 3D spatial coordinates along with atomic identity
information (i.e., element type IDs).
Local composition can be computed using one of two mutually exclusive methods:
1. **Fixed neighbor count**: Spheres are constructed to contain a constant
number of atoms (neighbors).
2. **Fixed radius/volume**: Spheres are defined with a constant radius.
These methods are controlled via a parameter dictionary with the following keys:
* ``type``: either ``"neighbor"`` or ``"volume"``
* ``param``: the number of neighbors (if type is ``"neighbor"``), or the radius
of the sphere (if type is ``"volume"``)
Spheres can be centered:
* Around every atom in the dataset, or
* On a regular 3D grid with user-defined minimum spacing to avoid overlapping
spheres. (See :func:`get_query_points` for grid generation details.)
Neighbor searches are performed using the
|SciPy_KDTree| class from the SciPy library, ensuring efficient spatial queries
even for large datasets.
List of functions
-----------------
This module provides the following functions for computing and analyzing local
composition histograms and related statistics:
* :func:`calc_stats`: Calculate histogram and statistics.
* :func:`check_periodic_box`: Check periodic boundary conditions.
* :func:`emulate_efficiency`: Emulate detector efficiency for simulated data.
* :func:`get_composition`: Get local compositions for query points.
* :func:`get_extended_cell_geometry`: Get simulation cell geometry in
*extended xyz* format.
* :func:`get_margin_filter`: Automatically filter margin region.
* :func:`get_query_points`: Get query points for neighbor search.
.. |SciPy_KDTree| raw:: html
<a href="https://docs.scipy.org/doc/scipy/reference/generated/
scipy.spatial.KDTree.html" target="_blank">KDTree</a>
.. sectionauthor:: Sebastian M. Eich <Sebastian.Eich@imw.uni-stuttgart.de>
.. codeauthor:: Sebastian M. Eich <Sebastian.Eich@imw.uni-stuttgart.de>
"""
#
#
#
#
__version__ = '0.1.0'
__all__ = [
'calc_stats',
'check_periodic_box',
'emulate_efficiency',
'get_composition',
'get_extended_cell_geometry',
'get_margin_filter',
'get_query_points'
]
#
#
#
#
# import modules
import multiprocessing
import numpy as np
import warnings
#
# import some special functions
from functools import partial
from psutil import virtual_memory
from scipy.spatial import cKDTree
from sys import getsizeof, stderr
#
#
#
#
################################################################################
#
# private module-level variables
#
################################################################################
_mem_threshold = 0.50
"""float : The approximate maximum amount of available memory to use."""
#
#
#
#
################################################################################
#
# public functions
#
################################################################################
[docs]def calc_stats(data, **kwargs):
"""Calculate histogram and evaluate histogram statistics.
Parameters
----------
data : ndarray, shape (n,)
The data over which to calculate the histogram.
Keyword Arguments
-----------------
bin_method : str
The method to use to determine the bin width for the histogram. This
argument will be passed through to the ``numpy.histogram()`` method.
Default: ``None``, which evaluates to a bin width of ``1.0``.
correlation : int
The correlation factor of the samples. If the samples are correlated,
the statistical error estimates will be underestimated, but can be
corrected with this option. Default: ``1``, i.e. all samples are
uncorrelated.
Returns
-------
(hist, edges, centers, hist_norm) : tuple
The tuple containing the respective histogram counts *hist*, the bin
edges *edges*, the bin centers *centers*, and the normalized histogram
counts *hist_norm*, each of type *ndarray, shape (m,)* or
*shape (m+1,)*, respectively.
(μ, Δμ, Var, ΔVar, Var/μ, Δ(Var/μ)) : tuple
The statistical histogram data including error estimates, each of type
*float*.
"""
#
#
# get binning_method
bin_method = kwargs.get('bin_method', None)
#
#
# set data properties
num = len(data)
data_min = min(data)
data_max = max(data)
#
#
# calculate histogram
if bin_method is None:
# use bin width of 1.0
bins = np.linspace(
data_min - 0.5, data_max + 0.5, data_max - data_min + 2)
bin_centers = np.arange(data_min, data_max + 1)
#
counts, bin_edges = np.histogram(data, bins = bins)
elif isinstance(bin_method, str):
# pass through binning method
counts, bin_edges = np.histogram(data, bins = bin_method)
#
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
#
#
# normalize histogram
counts_norm = counts / (num * (bin_edges[1:] - bin_edges[:-1]))
#
#
# calculate central moments
mu = sum(counts * bin_centers) / num
mu_2 = sum(counts * (bin_centers - mu)**2) / num
mu_4 = sum(counts * (bin_centers - mu)**4) / num
#
#
# set number of independent samples (may be lower than actual sample size
# due to user-defined correlation);
# using int type may result in integer overflow below; convert to float64
n = np.float64(num / kwargs.get('correlation', 1))
#
#
# calculate central moment estimates (correction does not matter for
# sufficiently large n)
# https://mathworld.wolfram.com/h-Statistic.html
h_2 = n * mu_2 / (n - 1)
h_4 = (3 * (3 - 2 * n) * n**2 * mu_2**2 + \
n**2 * (n**2 - 2 * n + 3) * mu_4) / \
((n - 3) * (n - 2) * (n - 1) * n)
#
# calculate estimate of relative variance
h_2_r = h_2 / mu
#
#
# calculate statistical errors; use fabs to catch floating point errors
# https://stats.stackexchange.com/a/157305
Δmu = np.sqrt(h_2 / n)
Δh_2 = np.sqrt(np.fabs((h_4 - (n - 3) / (n - 1) * h_2**2) / n))
Δh_2_r = np.sqrt(Δh_2**2 + h_2_r**2 * Δmu**2) / mu
#
#
# return histogram data and statistics
return (counts, bin_edges, bin_centers, counts_norm), \
(mu, Δmu, h_2, Δh_2, h_2_r, Δh_2_r)
#
#
#
#
[docs]def check_periodic_box(comment):
"""Check periodic boundary conditions.
This method checks an OVITO comment line for the presence of the periodic
boundary flags. If found, this method returns the (periodic) box dimensions,
otherwise ``None`` is returned.
Parameters
----------
comment : str
A valid OVITO comment line from a file in *xyz* format.
Returns
-------
box : ndarray, shape (3,) or None
The box dimensions for periodic boundary conditions.
"""
#
#
# split comment line into single tokens
comment = comment.split()
#
# set default return value
box = None
#
#
# check pbc flag
try:
pbc_index = comment.index('pbc')
if comment[pbc_index + 1] == '1' and \
comment[pbc_index + 2] == '1' and \
comment[pbc_index + 3] == '1':
#
# get box dimensions
box = np.zeros(3)
for i in range(0, 3):
try:
# look for dimension
pos = comment.index('cell_vec' + str(i + 1))
box[i] = comment[pos + i + 1]
except:
print('ERROR: Could not find box size for dimension {0:d}.'
.format(i + 1), file = stderr)
exit(1)
except:
pass
return box
#
#
#
#
[docs]def emulate_efficiency(data, p, seed = None):
"""Emulate detector efficiency for simulated data.
For data prior to evaporation and reconstruction, the limited detector
efficiency can be emulated for a simulated data set by randomly choosing
the particles with a certain probability which corresponds to the detection
efficiency ``p``.
Parameters
----------
data : ndarray, shape (n, 4)
The *n* types and three-dimensional coordinates of the atoms.
p : float
The detector efficiency to emulate.
seed : int, optional
The optional seed to initialize the random number generator. Defaults to
``None``.
Returns
-------
data_r : ndarray, shape (m, 4)
The *m* types and three-dimensional coordinates of the randomly selected
atoms.
"""
#
#
# initialize random number generator
rng = np.random.default_rng(seed)
#
# draw random number for each particle
random_numbers = rng.random(len(data))
#
# return randomly selected particles
return data[(random_numbers <= p)]
#
#
#
#
[docs]def get_composition(data, query_points, query, **kwargs):
"""Get local compositions for query points.
Depending on the value of ``type`` in the **query** dictionary argument,
either the nearest neighbors (``"neighbor"``) or neighbors within a fixed
distance/volume (``"volume"``) will be searched for the provided query
points. The composition in terms of number of type 2 particles will be
returned for each query point. In addition, either the sphere radii or the
total number of atoms in the spheres will be returned.
Parameters
----------
data : ndarray, shape (n, 4)
The *n* types and three-dimensional coordinates of the atoms.
query_points: ndarray, shape (m, 3)
The *m* three-dimensional query points for the search.
query : dict
The dictionary containing the query type (``"neighbor"`` or
``"volume"``)
and the query parameter ``param`` (number of neighbors or neighbor
search cutoff).
Keyword Arguments
-----------------
box : ndarray, shape (3,) or None
The periodic box dimensions (if present).
memory : float
The maximum amount of memory (GB) to use. If not provided, an internally
specified fraction of the available system memory will be used at
maximum.
verbose : bool
Whether to be verbose. Default: ``False``.
workers : int
The optional number of worker threads to use. If not provided, all
system cores will be used.
Returns
-------
r : ndarray, shape (m,)
The *m* sphere radii, i.e. the distance to the furthest neighbor for the
``"neighbor"`` query type.
n_2 : ndarray, shape (m,)
The *m* numbers of type 2 atoms in the spheres.
or
Returns
-------
n : ndarray, shape (m,)
The *m* numbers of total atoms in the spheres for the ``"volume"`` query
type.
n_2 : ndarray, shape (m,)
The *m* numbers of type 2 atoms in the spheres.
"""
#
#
# set atomic types
types = data[:, 0].astype(int)
#
#
# build tree
if kwargs.get('verbose', False) == True:
print('Building tree ...')
tree = cKDTree(data[:, 1:4], boxsize = kwargs.get('box', None))
#
#
# we may require at least 2 GB of available memory
mem_available = virtual_memory().available * 1e-9
if mem_available < 2.0:
print('ERROR: Found only {0:.2f} GB of available memory. This may not '
'be sufficient for calculations.\nExiting...'
.format(mem_available), file = stderr)
exit(1)
#
#
# call respective wrapper for neighbor search
if query['type'] == 'neighbor':
return _query_nearest(tree, query_points, query, types, **kwargs)
elif query['type'] == 'volume':
return _query_volume(tree, query_points, query, types, **kwargs)
#
#
#
#
[docs]def get_extended_cell_geometry(comment):
"""Get simulation cell geometry in *extended xyz* format.
This method checks an OVITO comment line in standard *xyz* format for the
simulation cell geometry and, if present, returns the cell geometry in
*extended xyz* format.
Parameters
----------
comment : str
A valid OVITO comment line from a file in *xyz* format.
Returns
-------
cell : str
The simulation cell geometry in *extended xyz* format (empty if no cell
geometry found).
"""
#
#
# split comment line into single tokens
comment = comment.split()
#
#
# get cell vectors
cell = ""
for i in range(1, 4):
try:
index = comment.index('cell_vec{0:d}'.format(i)) + 1
cell += " ".join(comment[index:index + 3]) + " "
except:
warnings.warn("Could not find 'cell_vec{0:d}' in comment line. "
"Skipping …".format(i))
return ""
cell = "Lattice=\"" + cell[:-1] + "\" "
#
# get cell origin
try:
index = comment.index('cell_orig') + 1
cell += "Origin=\"" + " ".join(comment[index:index + 3]) + "\" "
except:
warnings.warn("Could not find 'cell_orig' in comment line. "
"Skipping …" )
#
#
# return cell geometry
return cell
#
#
#
#
[docs]def get_margin_filter(query_points, r, box_l, box_u, threshold = 1.1):
"""Automatically filter margin region.
Query points close to the surface in the margin region should not be
included in the evaluation since the nearest neighbors will typically not be
confined in a spherical volume, but rather in a truncated sphere. Only if
the distance of the query point to the surface is sufficiently large, the
maximum nearest neighbor distance becomes equal to the sphere radius. The
critical condition is reached if the distance of the query point is
identical to the maximum nearest neighbor distance. By default, an
additional buffer of 10% is included, which can be controlled by the
optional ``threshold`` argument.
Parameters
----------
query_points : ndarray, shape (n, 3)
The *n* three-dimensional query points.
r : ndarray, shape (n,)
The *n* sphere radii (maximum nearest neighbor distances).
box_l : ndarray, shape (3,)
The lower box boundary.
box_u : ndarray, shape (3,)
The upper box boundary.
threshold : float
The optional threshold used for filtering. Defaults to 1.1.
Returns
-------
mask : ndarray, shape (n,)
The boolean mask indicating which query points do **not**
belong to the margin region.
"""
#
#
# calculate distance of each query point to surface (first distance to
# surface in each dimension, then minimum of all dimensions)
print("Calculating filter mask for query points in margin region ...")
d = np.amin(np.minimum(query_points - box_l, box_u - query_points),
axis = 1)
#
#
# create filter mask (only use query points where distance of 'sphere'
# center from surface is greater than 'sphere' radius including additional
# threshold)
mask = (d >= threshold * r)
print("Margin region contains {0:d} query points."
.format(np.count_nonzero(~mask)))
#
#
# return filter mask
return mask
#
#
#
#
[docs]def get_query_points(coords, **kwargs):
"""Get query points for neighbor search.
This method can be used to specify the query points for the neighbor search.
If no additional argument is provided, all atomic positions will be used as
query points. The ``margin`` keyword argument can be used to exclude surface
artifacts. With the ``distance`` keyword argument, a minimum separation
between the query points is ensured, which is achieved by the construction
of a regular three-dimensional grid. For periodic boxes, the box dimensions
should be passed using the ``box`` keyword argument. By default, internal
consistency checks for appropriate use of the ``margin`` keyword argument
are performed, but these checks can be disabled with the
``no_margin_checks`` keyword argument.
Parameters
----------
coords : ndarray, shape (n, 3)
The *n* three-dimensional coordinates.
Keyword Arguments
-----------------
box : ndarray, shape (3,) or None
The periodic box dimensions (if present).
distance : float
The (minimum) separation between the query points.
margin : float
The width of the margin region to exclude for the query points.
no_margin_checks : bool
Whether to disable internal margin checks.
Returns
-------
query_points : ndarray, shape (m, 3)
The *m* three-dimensional query points for the neighbor search.
"""
#
#
# get optional keyword arguments
distance = kwargs.get('distance', None)
is_periodic = (kwargs.get('box', None) is not None)
margin = kwargs.get('margin', None)
no_margin_checks = kwargs.get('no_margin_checks', False)
#
#
# do some error checking for non-reasonable combination of options
if no_margin_checks == False:
if is_periodic == True and margin is not None:
raise Exception("You cannot use the \"--margin\" option with "
"periodic boundary conditions.")
if is_periodic == False and margin is None:
raise Exception("You must use the \"--margin\" option to exclude "
"surface artifacts. (See \"--help\" for details.)")
#
#
#
#
# filter margin region if requested
if margin is not None:
coords = _filter_margin(coords, margin)
#
#
#
#
# construct 3d grid if requested
if distance is not None:
if distance <= 0.0:
raise Exception("Distance ({0:.3f}) must be positive.".
format(distance))
#
#
# get minimum and maximum positions for each direction
if is_periodic:
min_pos = [0.0, 0.0, 0.0]
max_pos = kwargs.get('box')
else:
min_pos = np.amin(coords, axis = 0)
max_pos = np.amax(coords, axis = 0)
#
#
# construct 1d grid for each direction
grid = []
for i in range(0, 3):
# number of grid points
n_grid = int((max_pos[i] - min_pos[i]) / distance) + 1
if n_grid <= 1:
raise Exception("Cannot construct grid. Separation too big?")
#
# separation between grid points
delta = (max_pos[i] - min_pos[i]) / (n_grid - 1)
#
# construct grid
if is_periodic:
# exclude end point
grid.append(np.linspace(
min_pos[i], max_pos[i] - delta, num = n_grid - 1))
else:
# include start and end point
grid.append(np.linspace(min_pos[i], max_pos[i], num = n_grid))
# construct 3d grid out of 1d grids
return np.vstack(
np.meshgrid(grid[0], grid[1], grid[2], indexing = 'ij')
).reshape(3, -1).T
# otherwise use all positions
else:
return coords
#
#
#
#
################################################################################
#
# private module-level functions
#
################################################################################
def _filter_margin(coords, w):
"""Filter coordinates which have a distance of lower than :math:`w` to the
boundaries.
Parameters
----------
coords : ndarray, shape (n, 3)
The *n* three-dimensional coordinates.
w : float
The width :math:`w` of the margin region to exclude.
Returns
-------
coords_filtered : ndarray, shape (m, 3)
The *m* three-dimensional filtered coordinates.
"""
#
#
if w <= 0.0:
print('ERROR: Margin ({0:.3f}) must be positive.'.format(w),
file = stderr)
exit(1)
#
# set minimum and maximum positions
min_pos = np.amin(coords, axis = 0)
max_pos = np.amax(coords, axis = 0)
#
# filter query points
return coords[
(coords[:, 0] > min_pos[0] + w) &
(coords[:, 1] > min_pos[1] + w) &
(coords[:, 2] > min_pos[2] + w) &
(coords[:, 0] < max_pos[0] - w) &
(coords[:, 1] < max_pos[1] - w) &
(coords[:, 2] < max_pos[2] - w)]
#
#
#
#
def _get_composition(indices, types):
"""Evaluate composition within a neighbor list.
Parameters
----------
indices: ndarray, shape (m,) or list of length m
The indices of the *m* neighbors, i.e. the neighbor list.
types : ndarray, shape(n,)
The *n* atomic types.
Returns
-------
n_2 : int
The number of type 2 atoms in the neighbor list.
"""
#
#
# count type 2 atoms
return np.count_nonzero(types[indices] == 2)
#
#
#
#
def _query(tree, query_points, query, types, **kwargs):
"""Query neighbors.
Depending on the value of ``type`` in the **query** dictionary argument,
either the nearest neighbors (``"neighbor"``) or neighbors within a fixed
distance/volume (``"volume"``) will be searched.
Parameters
----------
tree : cKDTree
The cKDTree object.
query_points: ndarray, shape (m, 3)
The *m* three-dimensional query points for the search.
query : dict
The dictionary containing the query type and the query parameter (number
of neighbors or neighbor search cutoff).
types : ndarray, shape(n,)
The *n* atomic types.
Keyword Arguments
-----------------
workers : int
The number of worker threads to use. Defaults to ``-1``, i.e. use all
system cores.
Returns
-------
r : ndarray, shape (m,)
The *m* sphere radii, i.e. the distance to the furthest neighbor for the
``"neighbor"`` query type.
n_2 : ndarray, shape (m,)
The *m* numbers of type 2 atoms in the spheres.
or
Returns
-------
n : ndarray, shape (m,)
The *m* numbers of total atoms in the spheres for the ``"volume"`` query
type.
n_2 : ndarray, shape (m,)
The *m* numbers of type 2 atoms in the spheres.
"""
#
#
# depending on the invocation mode, we need to search neighbors differently,
# i.e. constant number or constant volume
if query['type'] == 'neighbor':
# query neighbors
dists, indices = tree.query(
query_points, k = query['param'],
workers = kwargs.get('workers', -1))
#
#
# distances are sorted, so maximum distance is last entry;
# create copy of maximum distances to allow freeing of full distance
# array
if query['param'] == 1:
r_sphere = np.copy(dists)
else:
r_sphere = np.copy(dists[:, -1])
dists = None
elif query['type'] == 'volume':
# query neighbors
indices = tree.query_ball_point(
query_points, query['param'], workers = kwargs.get('workers', -1),
return_sorted = False)
#
#
#
#
# we need to call _get_composition with a second argument, so we create a
# partial object
get_composition_partial_obj = partial(_get_composition, types = types)
#
#
# calculate compositions in parallel
pool = multiprocessing.Pool()
# (setting an explicit value for the chunk size in pool.map() may reduce
# memory usage)
n_2 = np.asarray(pool.map(get_composition_partial_obj, indices))
#
# in volume mode, we also need to evaluate the number of neighbors
if query['type'] == 'volume':
neighbor_counts = np.asarray(pool.map(len, indices))
#
pool.close()
pool.join()
#
# free full index array
indices = None
#
#
#
#
# return maximum neighbor counts and compositions
if query['type'] == 'volume':
return neighbor_counts, n_2
# return maximum radii and compositions
elif query['type'] == 'neighbor':
return r_sphere, n_2
#
#
#
#
def _query_nearest(tree, query_points, query, types, **kwargs):
"""Query nearest neighbors.
Before the neighbor search is performed, this method estimates the amount of
memory needed for the search. If insufficient free memory is available, the
neighbor search is split into smaller chunks so that approximately an
internally specified fraction of the currently available memory is used at
maximum (if not a lower amount is specified otherwise with the ``memory``
keyword argument).
Parameters
----------
tree : cKDTree
The cKDTree object.
query_points: ndarray, shape (m, 3)
The *m* three-dimensional query points for the search.
query : dict
The dictionary containing the query type (``"neighbor"``) and the query
parameter (number of neighbors).
types : ndarray, shape(n,)
The *n* atomic types.
Keyword Arguments
-----------------
verbose : bool
Whether to be verbose. Default: ``False``.
memory : float
The optional maximum amount of memory (GB) to use.
workers : int
The optional number of worker threads to use.
Returns
-------
r : ndarray, shape (m,)
The *m* sphere radii, i.e. the distance to the furthest neighbor.
n_2 : ndarray, shape (m,)
The *m* numbers of type 2 atoms in the spheres.
"""
#
#
# set verbosity
verbose = kwargs.get('verbose', False)
#
#
# estimated memory in GB (factor two accounts for indices and distances)
mem_estimated = len(query_points) * query['param'] * 2 * 8 * 1e-9
#
# get internal memory limit
mem_limit = _mem_threshold * virtual_memory().available * 1e-9
#
#
# check allowed memory
mem_req = mem_limit if kwargs.get('memory') is None \
else kwargs.get('memory')
if mem_req > mem_limit:
print('WARNING: Requested amount of memory ({0:.1f} GB) exceeds '
'internal limit ({1:.1f} GB). Using internal limit.'.
format(mem_req, mem_limit))
mem_req = mem_limit
#
#
# test for sufficient available memory
if mem_estimated > mem_req:
if verbose == True:
print('NOTE: Estimated memory usage ({0:.1f} GB) exceeds '
'requested/available memory ({1:.1f} GB).'.format(
mem_estimated, mem_req))
#
# set number of chunks
chunks = int(np.ceil(mem_estimated / mem_req))
if verbose == True:
print(' Splitting problem into {0:d} chunks. (This may cause '
'overhead.)'.format(chunks))
#
#
# initialize empty arrays
dists = np.array([], dtype = float)
compositions = np.array([], dtype = int)
#
# split query points into smaller chunks
if verbose == True:
print('Searching neighbors and evaluating compositions ',
end = '', flush = True)
for query_points_partial in np.array_split(query_points, chunks):
if verbose == True:
print('.', end = '', flush = True)
#
# get partial results
dists_partial, compositions_partial = _query(
tree, query_points_partial, query, types, **kwargs)
#
# append partial results
dists = np.append(dists, dists_partial)
compositions = np.append(compositions, compositions_partial)
if verbose == True:
print('')
#
# return complete results
return dists, compositions
else:
# search all neighbors at once
if verbose == True:
print('Searching neighbors and evaluating compositions ...')
return _query(tree, query_points, query, types, **kwargs)
#
#
#
#
def _query_volume(tree, query_points, query, types, **kwargs):
"""Query neighbors within cutoff distance.
Before the neighbor search is performed, this method estimates the amount of
memory needed for the search. If insufficient free memory is available, the
neighbor search is split into smaller chunks. However, the
``query_ball_point()`` method over-allocates memory in a weird way so that a
precise estimate is difficult. Here, a rather conservative approach is used.
Parameters
----------
tree : cKDTree
The cKDTree object.
query_points: ndarray, shape (m, 3)
The *m* three-dimensional query points for the search.
query : dict
The dictionary containing the query type (``"volume"``) and the query
parameter (neighbor search cutoff).
types : ndarray, shape(n,)
The *n* atomic types.
Keyword Arguments
-----------------
verbose : bool
Whether to be verbose. Default: ``False``.
memory : float
The optional maximum amount of memory (GB) to use.
workers : int
The number of worker threads to use.
Returns
-------
n : ndarray, shape (m,)
The *m* numbers of total atoms in the spheres.
n_2 : ndarray, shape (m,)
The *m* numbers of type 2 atoms in the spheres.
"""
#
#
# set verbosity
verbose = kwargs.get('verbose', False)
#
#
# in order to estimate memory usage, we have to obtain the approximate
# number of neighbors first with a sample set
samples = min(1000, len(query_points))
neighbor_list = tree.query_ball_point(
query_points[0:samples], query['param'])
#
# estimated memory in GB (factor eight accounts for weird memory
# over-allocation)
mem_estimated = 8 * 1e-9 * len(query_points) / samples * \
(getsizeof(neighbor_list) + sum(map(getsizeof, neighbor_list)))
samples = None
#
#
# get internal memory limit
mem_limit = _mem_threshold * virtual_memory().available * 1e-9
#
#
# check allowed memory
mem_req = mem_limit if kwargs.get('memory') is None \
else kwargs.get('memory')
if mem_req > mem_limit:
print('WARNING: Requested amount of memory ({0:.1f} GB) exceeds '
'internal limit ({1:.1f} GB). Using internal limit.'.
format(mem_req, mem_limit))
mem_req = mem_limit
#
#
# test for sufficient available memory
if mem_estimated > mem_req:
if verbose == True:
print('NOTE: Estimated memory usage ({0:.1f} GB) exceeds '
'requested/available memory ({1:.1f} GB).'.format(
mem_estimated, mem_req))
#
# set number of chunks
chunks = int(np.ceil(mem_estimated / mem_req))
if verbose == True:
print(' Splitting problem into {0:d} chunks. (This may cause '
'overhead.)'.format(chunks))
#
#
# initialize empty arrays
neighbors = np.array([], dtype = int)
compositions = np.array([], dtype = int)
#
# split query points into smaller chunks
if verbose == True:
print('Searching neighbors and evaluating compositions ',
end = '', flush = True)
for query_points_partial in np.array_split(query_points, chunks):
if verbose == True:
print('.', end = '', flush = True)
#
# get partial results
neighbors_partial, compositions_partial = _query(
tree, query_points_partial, query, types, **kwargs)
#
# append partial results
neighbors = np.append(neighbors, neighbors_partial)
compositions = np.append(compositions, compositions_partial)
if verbose == True:
print('')
#
# return complete results
return neighbors, compositions
else:
# search all neighbors at once
if verbose == True:
print('Searching neighbors and evaluating compositions ...')
return _query(tree, query_points, query, types, **kwargs)