Source code for pyretis.analysis.path_analysis

# -*- coding: utf-8 -*-
# Copyright (c) 2023, PyRETIS Development Team.
# Distributed under the LGPLv2.1+ License. See LICENSE for more info.
"""Methods for analysis of path ensembles.

Important methods defined here
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

analyse_path_ensemble (:py:func:`.analyse_path_ensemble`)
    Method to analyse a path ensemble, it will calculate crossing
    probabilities and information about moves etc. This method
    can be applied to files as well as path ensemble objects.

analyse_path_ensemble_object (:py:func:`.analyse_path_ensemble_object`)
    Method to analyse a path ensemble, it will calculate crossing
    probabilities and information about moves etc. This method is
    intended to work directly on path ensemble objects.

match_probabilities (:py:func:`.match_probabilities`)
    Match probabilities from several path ensembles and calculate
    efficiencies and the error for the matched probability.

retis_flux (:py:func:`.retis_flux`)
    Calculate the initial flux with errors for a RETIS simulation.

retis_rate (:py:func:`.retis_rate`)
    Calculate the rate constant with errors for a RETIS simulation.
"""
import os
import logging
import numpy as np
from pyretis.analysis.analysis import running_average, block_error_corr
from pyretis.analysis.histogram import histogram, histogram_and_avg
from pyretis.inout.formats import OrderPathFile

logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
logger.addHandler(logging.NullHandler())

np.set_printoptions(legacy='1.25')

__all__ = ['analyse_path_ensemble', 'analyse_repptis_ensemble',
           'match_probabilities', 'retis_flux', 'retis_rate',
           'perm_calculations', 'skip_paths']


SHOOTING_MOVES = {'sh', 'ss', 'wt', 'wf', 'ts'}
INITIAL_MOVES = {'ki', 'ld', 'is', 're'}


[docs]def _pcross_lambda_cumulative(orderparam, ordermin, ordermax, ngrid, weights=None, ha_weights=None): """Obtain crossing probability as a function of the order parameter. It will do the actual calculation of the crossing probability as a function of order parameter. It is split off from `pcross_lambda` since the analysis is intended to be backward compatible with the output/results from the old ``TISMOL FORTRAN`` program. Parameters ---------- orderparam : numpy.array Array containing the order parameters. ordermin : float Minimum allowed order parameter. ordermax : float Maximum allowed order parameter. ngrid : int This is the number of grid points. weights : numpy.array, optional The weight of each order parameter. This is used in order to count a specific order parameter more than once. If not given, the values in `orderparam` will be weighted equally. ha_weights : numpy.array, optional The weights of the high acceptance scheme (e.g. Stone Skipping) Returns ------- lamb: numpy.array The grid on which pcross is based. pcross : numpy.array Estimated crossing probability distribution. """ lamb = np.linspace(ordermin, ordermax, ngrid) pcross = np.zeros(ngrid) delta_l = lamb[1] - lamb[0] sumw = 0.0 for i, orderp in enumerate(orderparam): idx = np.floor((orderp - ordermin) / delta_l) idx = max(0, int(idx) + 1) # +1: idx is here defined so that lamb[idx-1] <= orderp < lamb[idx] # further this lambda will contribute up to and including lamb[idx] # this is accomplished by the idx+1 when summing weights below weight = 1. if weights is None else weights[i] ha_weight = 1. if ha_weights is None else ha_weights[i] sumw += weight*ha_weight if idx >= ngrid: pcross += weight*ha_weight else: pcross[:idx + 1] += weight*ha_weight # +1 to include up to idx pcross /= sumw # normalisation return lamb, pcross
[docs]def _get_path_length(path, ensemble_number): """Return the path length for different moves. Different moves may have a different way of obtaining the path length. (Example: time-reversal vs. shooting move). Parameters ---------- path : dict This is the dict containing the information about the path. It can typically be obtained by iterating over the path ensemble object, e.g. with a `for path in path_ensemble.get_paths():`. ensemble_number : int This integer identifies the ensemble. This is used for the swapping moves in [0^-] and [0^+]. Returns ------- out : int The path length """ move = path['generated'][0] return_table = {'tr': 0, 's+': 0, 's-': 0, '00': 0, 'mr': 0} if move in return_table: if move == 's+' and ensemble_number == 0: return path['length'] - 2 if move == 's-' and ensemble_number == 1: return path['length'] - 2 return return_table[move] if move in SHOOTING_MOVES: return path['length'] - 1 if move in INITIAL_MOVES: logger.info('Skipped initial path (move "%s")', move) return None logger.warning('Skipped path with unknown mc move: %s', move) return None
[docs]def _update_shoot_stats(shoot_stats, path): """Update the shooting statistics with the status of the given path. Parameters ---------- shoot_stats : dict This dict contains the results from the analysis of shooting moves. E.g. `shoot_stats[key]` contain the order parameters for the status `key` which can be the different statuses defined in `pyretis.core.path._STATUS` or 'REJ' (for rejected). path : dict This is the path information, represented as a dictionary. Returns ------- out : None Returns `None` but will update `shoot_stats` for shooting moves. """ move = path['generated'][0] if move in SHOOTING_MOVES: orderp = path['generated'][1] status = path['status'] if status not in shoot_stats: shoot_stats[status] = [] shoot_stats[status].append(orderp) if status != 'ACC': shoot_stats['REJ'].append(orderp) shoot_stats['ALL'].append(orderp)
[docs]def _create_shoot_histograms(shoot_stats, bins): """Create histograms and scale for the shoot analysis. Parameters ---------- shoot_stats : dict This dict contains the results from the analysis of shooting moves. E.g. `shoot_stats[key]` contain the order parameters for the status `key` which can be the different statuses defined in `pyretis.core.path._STATUS` or 'REJ' (for rejected). bins : int The number of bins to use for the histograms. Returns ------- out[0] : dict For each possible status ('ACC, 'BWI', etc.) this dict will contain a histogram as returned by the histogram function. It will also contain a 'REJ' key which is the concatenation of all rejections and a 'ALL' key which is simply all the values. out[1] : dict For each possible status ('ACC, 'BWI', etc.) this dict will contain the scale factors for the histograms. The scale factors are obtained by dividing with the 'ALL' value. See Also -------- :py:func:`.histogram` in :py:mod:`.histogram`. """ histograms = {} scale = {} for key in shoot_stats: if not shoot_stats[key]: logger.warning('No shoots data found for %s (empty histogram)', key) shoot_stats[key] = np.array([]) mind = 0.0 maxd = 0.1 else: shoot_stats[key] = np.array(shoot_stats[key]) mind = shoot_stats[key].min() maxd = shoot_stats[key].max() histograms[key] = histogram(shoot_stats[key], bins=bins, limits=(mind, maxd), density=True) scale[key] = (float(len(shoot_stats[key])) / float(len(shoot_stats['ALL']))) return histograms, scale
[docs]def analyse_path_ensemble(path_ensemble, settings): """Analyse a path ensemble. This function will make use of the different analysis functions and analyse a path ensemble. This function is more general than the `analyse_path_ensemble_object` function in that it should work on both `PathEnsemble` and `PathEnsembleFile` objects. The running average is updated on-the-fly, see Wikipedia for details [wikimov]_. Parameters ---------- path_ensemble : object like :py:class:`.PathEnsemble` This is the path ensemble to analyse. settings : dict This dictionary contains settings for the analysis. We make use of the following keys: * `ngrid`: The number of grid points for calculating the crossing probability as a function of the order parameter. * `maxblock`: The max length of the blocks for the block error analysis. Note that this will maximum be equal the half of the length of the data, see `block_error` in `.analysis`. * `blockskip`: Can be used to skip certain block lengths. A `blockskip` equal to `n` will consider every n'th block up to `maxblock`, i.e. it will use block lengths equal to `1`, `1+n`, `1+2n`, etc. * `bins`: The number of bins to use for creating histograms. Returns ------- out : dict This dictionary contains the main results for the analysis which can be used for plotting or other kinds of output. See Also -------- :py:func:`._update_shoot_stats`, :py:func:`.pcross_lambda_cumulative` and :py:func:`._create_shoot_histograms`. References ---------- .. [wikimov] Wikipedia, "Moving Average", https://en.wikipedia.org/wiki/Moving_average """ detect = settings['tis']['detect'] ens_number = path_ensemble.ensemble_number if ens_number == 0: return analyse_path_ensemble0(path_ensemble, settings) result = {'cycle': [], 'detect': detect, 'ensemble': path_ensemble.ensemble_name, 'ensembleid': ens_number, 'interfaces': list(path_ensemble.interfaces)} orderparam = [] # list of all accepted order parameters weights = [] pdata = [] length_acc = [] length_all = [] shoot_stats = {'REJ': [], 'ALL': []} ha_w = 0 nacc = 0 nacc_swap = 0 npath = 0 npath_swap = 0 skip_lines = settings.get("analysis", {}).get("skip", 0) skip_weight = 0 production_run = False # True from the first ACC path not in I.M. for i, path in enumerate(path_ensemble.get_paths()): # loop over all paths if not production_run and path['status'] == 'ACC' and \ path['generated'][0] in SHOOTING_MOVES: production_run = True if not production_run: continue npath += 1 if path['generated'][0] in ('s-', 's+'): npath_swap += 1 if path['status'] == 'ACC': nacc_swap += 1 if path['status'] == 'ACC': nacc += 1 weights.append(1) ha_w = 1./path.get('weight', 1.) orderparam.append(path['ordermax'][0]) length_acc.append([path['length'], ha_w]) if ens_number != 0: success = 1 if path['ordermax'][0] > detect else 0. pdata.append([success, ha_w]) # Store data for block analysis elif nacc != 0: # just increase the weights weights[-1] += 1 if i < skip_lines: skip_weight += 1 # we also update the running average of the probability here: result['cycle'].append(path['cycle']) # get the length - note that this length depends on the type of move # see the `_get_path_length` function. length = _get_path_length(path, ens_number) if length is not None: length_all.append([length, ha_w]) # update the shoot stats, this will only be done for shooting moves _update_shoot_stats(shoot_stats, path) # Update the weights for skipped paths weights, nacc = skip_paths(weights, nacc, skip_weight) # Check if we have enough data to do analysis: assert nacc != 0, f'No accepted paths to analyse in ensemble {ens_number}' # When restarting a simulations, or by stacking together different # simulations, the cycles might not be in order. We thus reset the counter. result['cycle'] = np.arange(len(result['cycle'])) # 1) result['prun'] is already calculated. pdata = np.array(pdata) result['pdata'] = np.array(pdata) # 2) lambda pcross: analysis = settings['analysis'] if ens_number != 0: # 1) result['prun'] is calculated here with the correct path weights. result['prun'] = running_average(np.repeat(pdata, weights, axis=0)) result['pdata'] = np.array(pdata) # 2) lambda pcross: orderparam = np.array(orderparam) ordermax = min(orderparam.max(), max(path_ensemble.interfaces)) result['pcross'] = _pcross_lambda_cumulative( orderparam, path_ensemble.interfaces[1], ordermax, analysis['ngrid'], weights=weights, ha_weights=result['pdata'][:, 1]) # 3) block error analysis: result['blockerror'] = block_error_corr( data=np.repeat(pdata, weights, axis=0), maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) # 4) length analysis: length_acc = np.array(length_acc) length_all = np.array(length_all) hist1 = histogram_and_avg(np.repeat(length_acc[:, 0], weights), analysis['bins'], density=True, weights=np.repeat(length_acc[:, 1], weights)) hist2 = histogram_and_avg(np.array(length_all[:, 0]), analysis['bins'], density=True, weights=length_all[:, 1]) result['pathlength'] = (hist1, hist2) # 5) shoots analysis: result['shoots'] = _create_shoot_histograms(shoot_stats, analysis['bins']) # 6) Add some simple efficiency metrics: result['tis-cycles'] = npath result['efficiency'] = [float(nacc - nacc_swap) / float(npath - npath_swap)] if npath_swap == 0: result['efficiency'].append(np.nan) else: result['efficiency'].append(float(nacc_swap) / float(npath_swap)) # extra analysis for the [0^- and 0^+] ensembles in case we will determine # the initial flux: if ens_number in [0, 1]: result['lengtherror'] = block_error_corr( data=np.repeat(length_acc, weights, axis=0), maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) lenge2 = result['lengtherror'][4] * hist1[2][0] / (hist1[2][0]-2.) result['fluxlength'] = [hist1[2][0]-2.0, lenge2, lenge2 * (hist1[2][0]-2.)] result['fluxlength'].append(result['efficiency'][1] * lenge2**2) return result
def analyse_path_ensemble0(path_ensemble, settings): """Analyse the [0^-] ensemble. Parameters ---------- path_ensemble : object like :py:class:`.PathEnsemble` This is the path ensemble to analyse. settings : dict This dictionary contains settings for the analysis. We make use of the following keys: * `ngrid`: The number of grid points for calculating the crossing probability as a function of the order parameter. * `maxblock`: The max length of the blocks for the block error analysis. Note that this will maximum be equal the half of the length of the data, see `block_error` in `.analysis`. * `blockskip`: Can be used to skip certain block lengths. A `blockskip` equal to `n` will consider every n'th block up to `maxblock`, i.e. it will use block lengths equal to `1`, `1+n`, `1+2n`, etc. * `bins`: The number of bins to use for creating histograms. Returns ------- result : dict The results from the analysis for the ensemble. """ simulation_settings = settings.get('simulation', {}) permeability = simulation_settings.get('permeability', False) detect = settings['tis']['detect'] ensemble_number = path_ensemble.ensemble_number sub_cycles = settings.get('engine', {}).get('subcycles', 1) timestep = settings.get('engine', {}).get('timestep', 1)*sub_cycles if permeability: path_ensemble.interfaces = (settings['simulation'].get('zero_left', float('-inf') ), path_ensemble.interfaces[1], path_ensemble.interfaces[2]) result = {'cycle': [], 'detect': detect, 'ensemble': path_ensemble.ensemble_name, 'ensembleid': ensemble_number, 'interfaces': list(path_ensemble.interfaces), 'permeability': permeability} length_acc, length_all, weights = [], [], [] # Regular setting[analysis] is overwritten by the code that generates # out.rst, but this one is complete analysis = settings['analysis'] # Permeability analysis bin_edges = analysis.get('tau_ref_bin', None) end_acc, tau_acc, tau_ref_acc = [], [], [] tau_ref_bins = make_tau_ref_bins(list(path_ensemble.interfaces), nbins=10, tau_region=bin_edges) shoot_stats = {'REJ': [], 'ALL': []} nacc, npath = 0, 0 nacc_swap, npath_swap = 0, 0 production_run = True fn_op = path_ensemble.filename.rsplit("/", 1)[0]+"/order.txt" o_ps = None op_data = None skip_lines = settings.get("analysis", {}).get("skip", 0) skip_weight = 0 if os.path.isfile(fn_op): op_file = OrderPathFile(fn_op, 'r') o_ps = op_file.load() op_file.close() for num, path in enumerate(path_ensemble.get_paths()): # loop over paths if o_ps is not None: err = False try: o_p = next(o_ps) except StopIteration: o_ps = None err = True if not err: cycle_comment = [i for i in o_p['comment'] if "Cycle" in i][-1] op_cycle = int( cycle_comment.split(',')[0].split(':')[1].strip()) err = op_cycle != path['cycle'] if not err: op_data = o_p['data'].T if err: op_data = None msg = ("Order-file does not align with path-ensemble setting " "op data to None") logger.warning(msg) if path['generated'][0] in INITIAL_MOVES: production_run = False if not production_run and path['status'] == 'ACC' and \ path['generated'][0] in SHOOTING_MOVES: production_run = True if not production_run: continue npath += 1 if path['generated'][0] in ('s-', 's+'): npath_swap += 1 if path['status'] == 'ACC': nacc_swap += 1 if path['status'] == 'ACC': nacc += 1 ha_w = 1./path.get('weight', 1.) weights.append(1) length_acc.append([path['length'], ha_w]) # Get the end point end_acc.append(path['interface'][-1]) # Calculate tau tau_acc.append(_calc_tau(op_data, bin_edges, timestep=timestep)) tau_ref_acc.append( [_calc_tau(op_data, edges, timestep=timestep) for edges in tau_ref_bins] ) elif nacc != 0: # just increase the weights weights[-1] += 1 if num < skip_lines: skip_weight += 1 result['cycle'].append(path['cycle']) length = _get_path_length(path, ensemble_number) if length is not None: ha_w = 1./path.get('weight', 1.) length_all.append([length, ha_w]) # update the shoot stats, this will only be done for shooting moves _update_shoot_stats(shoot_stats, path) # Update the weights to skip properly weights, nacc = skip_paths(weights, nacc, skip_weight) length_acc = np.array(length_acc) length_all = np.array(length_all) # Check if we have enough data to do analysis: ens_number = 0 assert nacc != 0, f'No accepted paths to analyse in ensemble {ens_number}' # Perform the different analysis tasks: result['cycle'] = np.array(result['cycle']) # 1) length analysis: hist1 = histogram_and_avg(np.repeat(length_acc[:, 0], weights), analysis['bins'], density=True) hist2 = histogram_and_avg(np.array(length_all[:, 0]), analysis['bins'], density=True) result['pathlength'] = (hist1, hist2) # 2) block error of lengths: result['lengtherror'] = block_error_corr(data=np.repeat(length_acc, weights, axis=0), maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) # 3) shoots analysis: result['shoots'] = _create_shoot_histograms(shoot_stats, analysis['bins']) # 4) Add some simple efficiency metrics: result['efficiency'] = [float(nacc - nacc_swap) / float(npath - npath_swap)] if npath_swap == 0: result['efficiency'].append(np.nan) else: result['efficiency'].append(float(nacc_swap) / float(npath_swap)) lenge2 = result['lengtherror'][4] * hist1[2][0] / (hist1[2][0]-2.) result['fluxlength'] = [hist1[2][0]-2.0, lenge2, lenge2 * (hist1[2][0]-2.)] result['fluxlength'].append(result['efficiency'][1] * lenge2**2) result['tis-cycles'] = npath # 5) optional, do the permeability analysis if permeability: end_acc = [1 if i == 'R' else 0 for i in end_acc] # Xi is 1/running average result['xi'] = running_average(np.repeat(end_acc, weights)) result['xierror'] = block_error_corr(result['xi'], maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) result['tau'] = running_average(np.repeat(tau_acc, weights)) result['tauerror'] = block_error_corr(result['tau'], maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) result['tau_bin'] = bin_edges result['tau_ref'] = running_average(np.repeat(tau_ref_acc, repeats=weights, axis=0))[-1] result['tau_ref_bins'] = tau_ref_bins return result
[docs]def analyse_repptis_ensemble(path_ensemble, settings): """Analyse a repptis path ensemble. This function is a modification of analyse_path_ensemble(), which make use of the different analysis functions and analyse a repptis path ensemble. Parameters ---------- path_ensemble : object like :py:class:`.PathEnsemble` This is the path ensemble to analyse. settings : dict This dictionary contains settings for the analysis. We make use of the following keys: * `ngrid`: The number of grid points for calculating the crossing probability as a function of the order parameter. * `maxblock`: The max length of the blocks for the block error analysis. Note that this will maximum be equal the half of the length of the data, see `block_error` in `.analysis`. * `blockskip`: Can be used to skip certain block lengths. A `blockskip` equal to `n` will consider every n'th block up to `maxblock`, i.e. it will use block lengths equal to `1`, `1+n`, `1+2n`, etc. * `bins`: The number of bins to use for creating histograms. Returns ------- out : dict This dictionary contains the main results for the analysis which can be used for plotting or other kinds of output. """ detect = settings['tis']['detect'] ens_number = path_ensemble.ensemble_number if ens_number == 0: return analyse_path_ensemble0(path_ensemble, settings) result = {'cycle': [], 'detect': detect, 'ensemble': path_ensemble.ensemble_name, 'ensembleid': ens_number, 'interfaces': list(path_ensemble.interfaces)} orderparam = [] # list of all accepted order parameters weights = [] nacc_sl, nacc_sr = 0, 0 weights_sl, weights_sr = [], [] pdata = [] pdata_sl = [] # start left pdata_sr = [] # start right length_acc = [] length_all = [] shoot_stats = {'REJ': [], 'ALL': []} ha_w = 0 nacc = 0 nacc_swap = 0 npath = 0 npath_swap = 0 skip_lines = settings.get("analysis", {}).get("skip", 0) skip_weight = 0 production_run = False # True from the first ACC path not in I.M. lmrs = [] # keep track of a path's left, middle and right ordermin = [] intfs000 = path_ensemble.interfaces ptypes = [[0, 0, 0, 0]] # LML, LMR, RMR, RML amounts for i, path in enumerate(path_ensemble.get_paths()): # loop over all paths # we skip the first number of lines that should not be part of # the analysis. if i < skip_lines: continue if not production_run and path['status'] == 'ACC' and \ path['generated'][0] in SHOOTING_MOVES: production_run = True if not production_run: continue npath += 1 if path['generated'][0] in ('s-', 's+'): npath_swap += 1 if path['status'] == 'ACC': nacc_swap += 1 if path['status'] == 'ACC': nacc += 1 weights.append(1) ha_w = 1./path.get('weight', 1.) orderparam.append(path['ordermax'][0]) ordermin.append(path['ordermin'][0]) length_acc.append([path['length'], ha_w]) lmrs.append(''.join(path['interface'])) if ens_number != 0: success = 1 if path['ordermax'][0] > detect else 0. pdata.append([success, ha_w]) # Store data for block analysis if lmrs[-1][0] == 'L': nacc_sl += 1 weights_sl.append(1) success_sl = 1 if path['ordermax'][0] > intfs000[2] else 0. pdata_sl.append([success_sl, ha_w]) ptype = [0, 1, 0, 0] if success_sl == 1 else [1, 0, 0, 0] elif lmrs[-1][0] == 'R': weights_sr.append(1) nacc_sr += 1 success_sr = 1 if path['ordermin'][0] < intfs000[0] else 0. pdata_sr.append([success_sr, ha_w]) ptype = [0, 0, 0, 1] if success_sr == 1 else [0, 0, 1, 0] elif nacc != 0: # just increase the weights weights[-1] += 1 if ens_number != 0: if lmrs[-1][0] == 'L': weights_sl[-1] += 1 elif lmrs[-1][0] == 'R': weights_sr[-1] += 1 if i < skip_lines: skip_weight += 1 # Update ptypes with the last ptype ptypes.append(ptype) # we also update the running average of the probability here: result['cycle'].append(path['cycle']) # get the length - note that this length depends on the type of move # see the `_get_path_length` function. length = _get_path_length(path, ens_number) if length is not None: length_all.append([length, ha_w]) # update the shoot stats, this will only be done for shooting moves _update_shoot_stats(shoot_stats, path) # save the ptypes, which will be used for recursive block analysis result['ptypes'] = np.cumsum(np.array(ptypes), axis=0) # Check if we have enough data to do analysis: assert nacc != 0, f'No accepted paths to analyse in ensemble {ens_number}' # When restarting a simulations, or by stacking together different # simulations, the cycles might not be in order. We thus reset the counter. result['cycle'] = np.arange(len(result['cycle'])) # 1) result['prun'] is already calculated. pdata = np.array(pdata) result['pdata'] = np.array(pdata) # 2) lambda pcross: analysis = settings['analysis'] if ens_number != 0: # 1) result['prun'] is calculated here with the correct path weights. result['prun_sl'] = running_average(np.repeat(pdata_sl, weights_sl, axis=0)) result['prun_sr'] = running_average(np.repeat(pdata_sr, weights_sr, axis=0)) result['pdata'] = np.array(pdata) # 2) lambda pcross: orderparam = np.array(orderparam) ordermax = min(orderparam.max(), max(path_ensemble.interfaces)) result['pcross'] = _pcross_lambda_cumulative( orderparam, path_ensemble.interfaces[1], ordermax, analysis['ngrid'], weights=weights, ha_weights=result['pdata'][:, 1]) # 3) block error analysis: # if we get here, it means that either pdata_sl and/or pdata_sr is not # empty. So we set the non-empty one first, and then equate the empty # one to zeros and copy the blockerror of the non-empty one. if len(pdata_sl) > 0: result['blockerror_sl'] = block_error_corr( data=np.repeat(pdata_sl, weights_sl, axis=0), maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) if len(pdata_sr) > 0: result['blockerror_sr'] = block_error_corr( data=np.repeat(pdata_sr, weights_sr, axis=0), maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) if len(pdata_sl) == 0: result['blockerror_sl'] = result['blockerror_sr'] result['prun_sl'] = np.zeros_like(result['prun_sr']) if len(pdata_sr) == 0: result['blockerror_sr'] = result['blockerror_sl'] result['prun_sr'] = np.zeros_like(result['prun_sl']) # PPRETIS analysis pp_dic = {'lmrs': np.array(lmrs), 'interfaces': path_ensemble.interfaces, 'weights': np.array(weights), 'ordermin': np.array(ordermin), 'ordermax': np.array(orderparam)} pptup = cross_dist_distr(pp_dic) result['repptis'] = pptup # 4) length analysis: length_acc = np.array(length_acc) length_all = np.array(length_all) hist1 = histogram_and_avg(np.repeat(length_acc[:, 0], weights), analysis['bins'], density=True, weights=np.repeat(length_acc[:, 1], weights)) hist2 = histogram_and_avg(np.array(length_all[:, 0]), analysis['bins'], density=True, weights=length_all[:, 1]) result['pathlength'] = (hist1, hist2) # 5) shoots analysis: result['shoots'] = _create_shoot_histograms(shoot_stats, analysis['bins']) # 6) Add some simple efficiency metrics: result['tis-cycles'] = npath result['efficiency'] = [float(nacc - nacc_swap) / float(npath - npath_swap)] swap_acc_ratio = float(nacc_swap) / float(npath_swap) if npath_swap > 0 \ else np.nan result['efficiency'].append(swap_acc_ratio) # extra analysis for the [0^- and 0^+] ensembles in case we will determine # the initial flux: if ens_number in [0, 1]: result['lengtherror'] = block_error_corr( data=np.repeat(length_acc, weights, axis=0), maxblock=analysis['maxblock'], blockskip=analysis['blockskip']) lenge2 = result['lengtherror'][4] * hist1[2][0] / (hist1[2][0]-2.) result['fluxlength'] = [hist1[2][0]-2.0, lenge2, lenge2 * (hist1[2][0]-2.)] result['fluxlength'].append(result['efficiency'][1] * lenge2**2) return result
[docs]def match_probabilities(path_results, detect, settings=None): """Match probabilities from several path ensembles. It calculates the efficiencies and errors for the matched probability. Parameters ---------- path_results : list These are the results from the path analysis. `path_results[i]` contains the output from `analyse_path_ensemble` applied to ensemble no. `i`. Here we make use of the following keys from `path_results[i]`: * `pcross`: The crossing probability. * `prun`: The running average of the crossing probability. * `blockerror`: The output from the block error analysis. * `efficiency`: The output from the efficiency analysis. detect : list of floats These are the detect interfaces used in the analysis. settings : dict, optional This dictionary contains settings for the analysis. Here we make use of the following keys from the analysis section: * `ngrid`: The number of grid points for calculating the crossing probability as a function of the order parameter. * `maxblock`: The max length of the blocks for the block error analysis. Note that this will maximum be equal the half of the length of the data, see `block_error` in `.analysis`. * `blockskip`: Can be used to skip certain block lengths. A `blockskip` equal to `n` will consider every n'th block up to `maxblock`, i.e. it will use block lengths equal to `1`, `1+n`, `1+2n`, etc. * `bins`: The number of bins to use for creating histograms. Returns ------- results : dict These are results for the over-all probability and error and also some over-all TIS efficiencies. """ results = {'matched-prob': [], 'overall-prun': [], 'overall-err': [], 'overall-prob': [[], []]} accprob = 1.0 accprob_err = 0.0 minlen_pdata, minlen_prun = float('inf'), float('inf') for idet, result in zip(detect, path_results): # do match only in part left of idetect: idx = np.where(result['pcross'][0] <= idet)[0] results['overall-prob'][0].extend(result['pcross'][0][idx]) results['overall-prob'][1].extend(result['pcross'][1][idx] * accprob) # update probabilities, error and efficiency: mat = np.column_stack((result['pcross'][0], result['pcross'][1])) mat[:, 1] *= accprob results['matched-prob'].append(mat) accprob *= result['prun'][-1] accprob_err += result['blockerror'][4]**2 # Find the maximum number of cycles for ensemble minlen_pdata = min(minlen_pdata, len(result['pdata'])) minlen_prun = min(minlen_prun, len(result['prun'])) # Finally Construct the cumulative output now results['overall-cycle'] = np.arange(minlen_prun) results['overall-prun'] = [1]*minlen_prun results['overall-pdata'] = [[1, 1]]*minlen_pdata for result in path_results: results['overall-prun'] = np.multiply(result['prun'][-minlen_prun:], results['overall-prun']) results['overall-pdata'] = np.multiply(result['pdata'][-minlen_pdata:], results['overall-pdata']) if settings is not None: analysis = settings['analysis'] results['overall-error'] = block_error_corr(results['overall-pdata'], analysis['maxblock'], analysis['blockskip']) results['overall-prob'] = np.transpose(results['overall-prob']) results['prob'] = accprob results['relerror'] = np.sqrt(accprob_err) return results
[docs]def retis_flux(results0, results1, timestep): """Calculate the initial flux for RETIS. Parameters ---------- results0 : dict Results from the analysis of ensemble [0^-] results1 : dict Results from the analysis of ensemble [0^+] timestep : float The simulation timestep. Returns ------- flux : float The initial flux. flux_error : float The relative error in the initial flux. """ flux0 = results0['fluxlength'] flux1 = results1['fluxlength'] tsum = flux0[0] + flux1[0] flux = 1.0 / (tsum * timestep) flux_error = (np.sqrt((flux0[1]*flux0[0])**2 + (flux1[1]*flux1[0])**2) / tsum) return flux, flux_error
[docs]def retis_rate(pcross, pcross_relerror, flux, flux_relerror): """Calculate the rate constant for RETIS. Parameters ---------- pcross : float Estimated crossing probability pcross_relerror : float Relative error in crossing probability. flux : float The initial flux. flux_relerror : float Relative error in the initial flux. Returns ------- rate : float The rate constant rate_error : float The relative error in the rate constant. """ rate = pcross * flux rate_error = np.sqrt(pcross_relerror**2 + flux_relerror**2) return rate, rate_error
[docs]def perm_calculations(results0, pcross, pcross_err): """Calculate the permeability. This function collects the relevant permeability properties from the output data. It then calculates the permeability and its error based on the values of xi, tau, and the crossing probability pcross. Paramaters ---------- results0 : dict Results from the analysis of ensemble [0^-] pcross : float Estimated crossing probability pcross_err : float Relative error in crossing probability. Returns ------- xxi : float The value of xi tau : float The value of tau perm : float The value of the permeability xi_err : float The error in xi tau_err : float The error in tau perm_err : float The error in permeability """ out = results0['out'] permeability = out.get("permeability", False) xxi, tau, perm, xi_err, tau_err, perm_err = ( None, None, None, float('inf'), float('inf'), float('inf')) if permeability: xxi = out['xi'][-1] xi_err = out['xierror'][4] tau = out['tau'][-1] tau_err = out['tauerror'][4] perm = xxi * (1 / tau) * pcross perm_err = np.sqrt(xi_err**2+tau_err**2+pcross_err**2) return xxi, tau, perm, xi_err, tau_err, perm_err
[docs]def _calc_tau(op_data, bin_edges=None, timestep=1): """Calculate the value of tau. This function calculates tau, a measure of time spent within a certain interval (bin) in the [0^-'] ensemble. It does so by counting the number of times the order parameter falls within this interval defined by the bin_edges. It then multiplies this count by the timestep and divides by the width of the interval. If bin_edges is None or not a 2-element list, or if the order parameter data op_data is None, it just returns 0. Parameters ---------- op_data : list The order parameter data from the [0^-'] ensemble. bin_edges : list, optional The edges of the bin to calculate tau for. If None, it returns 0. timestep : float, optional The simulation timestep. Default is 1. Returns ------- tau : float The value of tau. """ if bin_edges is None or len(bin_edges) != 2 or op_data is None: return 0 order = op_data[1] bin_l, bin_r = tuple(bin_edges) delta = bin_r-bin_l count = sum([1 for i in order if bin_l < i <= bin_r]) return count*timestep/delta
def make_tau_ref_bins(interfaces, nbins=10, tau_region=None): """Make the reference bins for calculating tau. This function makes reference bins for calculating tau based on the [0^-'] interfaces and a number of bins. If the left interface is -inf and no tau region is provided, it returns a list with None. If a tua region is provided, it adjusts the left interface accordingly. It then divides the range from left to right interface into nbins equal parts and returns these as a list of tuples. Parameters ---------- interfaces : list The interfaces from the [0^-'] ensemble. nbins : int, optional The number of bins to divide the range into. Default is 10. tau_region : list, optional The region to calculate tau for. If None, it uses the whole range. Returns ------- ref_bins : list of tuples The reference bins for calculating tau. """ left, _, right = interfaces if left == float('-inf') and (tau_region is None or tau_region == []): return [None] if left == float('-inf'): # Take the left of the tau_region to be the middle left = right-2*(right-tau_region[0]) diff = (right-left)/nbins return [(left+diff*i, left+diff*(i+1)) for i in range(nbins)]
[docs]def skip_paths(weights, nacc, nskip): """Skips not counted paths. Parameters ---------- weights : array of integers The weights of the paths. nacc : integer Number of accepted paths. nskip : integer Number of paths to skip. Returns ------- new_weights : array of integers The new weights of the paths once skipped the nskip ones. new_acc : integer Recomputed number of accepted paths. """ new_weights = [] new_acc = nacc i = 0 while nskip > 0: try: weight = weights[i] except IndexError as err: raise RuntimeError("Skipping more trajs than available") from err new_weight = max(weight-nskip, 0) diff = weight-new_weight new_acc -= 1 nskip -= diff new_weights.append(new_weight) i += 1 # Slicing on an index that does not exist gives an empty list # and always works new_weights += weights[i:] return new_weights, new_acc
def cross_dist_distr(pp_dic): """Return the distribution of lambda values for the LMR and RML paths. It calculates the distribution of lambda_max values for LM* paths, and the distribution of lambda_min values for RM* paths. Parameters ---------- pp_dic : dict The partial-path dictionary. It contains the following keys: 'lmrs' : array of strings (type of paths: "LML", "LMR", ...) 'weights' : array of integers (weights of the paths) 'interfaces' : array of floats (interfaces of the pathensemble) 'ordermin' : array of floats (lambda_mins of the paths) 'ordermax' : array of floats (lambda_maxs of the paths) Returns ------- left : float The left interface of the pathensemble. middle : float The middle interface of the pathensemble. right : float The right interface of the pathensemble. percents : array of floats The distribution of lambda_max values for LM* paths, given at lambda values 'lambs' between middle and right interfaces. lambs : array of floats The lambda values at which the lambda_max distribution is given. percents2 : array of floats The distribution of lambda_min values for RM* paths, given at lambda values 'lambs2' between left and middle interfaces. lambs2 : array of floats The lambda values at which the lambda_min distribution is given. """ lmrs = pp_dic['lmrs'] weights = pp_dic['weights'] interfaces = pp_dic['interfaces'] ordermin = pp_dic['ordermin'] ordermax = pp_dic['ordermax'] # LM*: paths = select_with_or_masks(ordermax, [lmrs == "LML", lmrs == "LMR"]) repeat = np.repeat(paths, select_with_or_masks(weights, [lmrs == "LML", lmrs == "LMR"])) left, middle, right = interfaces[0], interfaces[1], interfaces[2] percents = [] lambs = np.linspace(middle, np.max(paths), 100) if paths.size != 0 else \ np.linspace(middle, right, 100) for i in lambs: percents.append(np.sum(repeat >= i)) percents = percents / percents[0] if percents else percents # RM*: paths2 = select_with_or_masks(ordermin, [lmrs == "RMR", lmrs == "RML"]) repeat2 = np.repeat(paths2, select_with_or_masks(weights, [lmrs == "RMR", lmrs == "RML"])) percents2 = [] lambs2 = np.linspace(np.min(paths2), middle, 100) if paths2.size != 0 \ else np.linspace(left, middle, 100) for i in lambs2: percents2.append(np.sum(repeat2 <= i)) percents2 = percents2 / percents2[-1] if percents2 else percents2 return left, middle, right, percents, lambs, percents2, lambs2 def select_with_or_masks(array, masks): """Select elements of array that are True in at least one of the masks. Parameters ---------- array : np.array Array to select elements from masks : list of masks where each mask is a boolean array with the same shape as array. Returns ------- np.array A new array with the elements of A that are True in at least one of the masks. """ # first check whether masks have the same shape as A for mask in masks: assert mask.shape == array.shape # now we can use the masks to select the elements of A union_mask = np.any(masks, axis=0).astype(bool) return array[union_mask]