Source code for pyretis.testing.compare

# -*- coding: utf-8 -*-
# Copyright (c) 2023, PyRETIS Development Team.
# Distributed under the LGPLv2.1+ License. See LICENSE for more info.
"""Methods that might be useful for several tests.

This module defines methods that can be used for comparing results.

"""
import math
import os
import filecmp
import numpy as np
from pyretis.testing.helpers import search_for_files
from pyretis.inout.formats.energy import EnergyPathFile
from pyretis.inout.formats.order import OrderPathFile
from pyretis.inout.formats.path import PathExtFile


# Names of the expected output files in archive directories:
ARCHIVE_FILES = {'energy.txt', 'order.txt', 'traj.txt'}
# Names of other expected output files:
OUTPUT_FILES = {'energy.txt', 'order.txt', 'pathensemble.txt'}
# Define readers for loading data:
READERS = {
    'energy': EnergyPathFile,
    'order': OrderPathFile,
    'traj': PathExtFile,
}


[docs]def read_files(*files, read_comments=True): """Read files into memory. Here, we assume that we are given small files and that we can read these into memory. Parameters ---------- files : list of strings These are the paths to the files we are to read. read_comments : boolean If False, we skip lines starting with a "#". Returns ------- out : list of list of strings The data read from the different files. """ all_data = [] for filename in files: data = [] with open(filename, 'r', encoding="utf8") as infile: for line in infile: if not read_comments and line.strip().startswith('#'): continue data.append(line) all_data.append(data) return all_data
[docs]def compare_files_lines(file1, file2, skip=None): """Compare two files, line by line. Parameters ---------- file1 : string The path to the first file to compare. file2 : string The path to the second file to compare. skip : list of integers, optional These are line numbers we are to skip. Returns ------- out[0] : boolean True if the files are deemed to be equal. out[1] : string A descriptive message of the result of the comparison. """ all_data = read_files(file1, file2, read_comments=True) assert len(all_data) == 2 data1, data2 = all_data[0], all_data[1] if len(data1) != len(data2): return False, 'The number of lines in the files differ' for i, (lini, linj) in enumerate(zip(data1, data2)): if skip and i in skip: continue if not lini == linj: return False, f'Line {i} differs: {lini.strip()} != {linj.strip()}' return True, 'Files are equal'
[docs]def compare_files_columns(file1, file2, file_type, skip=None): """Compare two output PyRETIS files. This method compares files where numbers are stored in columns and the columns have specific labels. Here, we also compare labels and comments. Parameters ---------- file1 : string The path to the first file to compare. file2 : string The path to the second file to compare. file_type : string A string used to determine the file type. skip : list of strings, optional A list of items from the loaded data we are to skip. This can, for instance, be certain energy terms that are not absolute and can't easily be compared. Returns ------- out[0] : boolean True if the files are deemed to be equal. out[1] : string A descriptive message of the result of the comparison. """ reader = READERS[file_type] data1 = reader(file1, 'r').load() data2 = reader(file2, 'r').load() # Compare the files by compare the block found in the file: for block1, block2 in zip(data1, data2): # Start with block comments: if block1['comment'] != block2['comment']: return False, 'Block comment differs' # Compare terms found in the blocks: if sorted(block1['data'].keys()) != sorted(block2['data'].keys()): return False, 'Different items in block data' # Compare numerical data: for key, val in block1['data'].items(): if skip and key in skip: continue if not np.allclose(val, block2['data'][key]): return False, 'Block terms differ' return True, 'Files are equal'
[docs]def compare_files_numerical(file1, file2): """Compare two output PyRETIS files. Here, we compare files that contain numerical data. We don't care about comments here, we just compare the actual numerical data. Parameters ---------- file1 : string The path to the first file to compare. file2 : string The path to the second file to compare. Returns ------- out[0] : boolean True if the files are deemed to be equal. out[1] : string A descriptive message of the result of the comparison. """ data1 = np.loadtxt(file1) data2 = np.loadtxt(file2) if not np.allclose(data1, data2): return False, 'Numerical data differ' return True, 'Files are equal'
[docs]def compare_files(file1, file2, skip=None, mode='line'): """Compare two files. Parameters ---------- file1 : string The path to the first file to compare. file2 : string The path to the second file to compare. skip : list of strings or list of ints, optional A list of items that are to be skipped in the comparison. mode : string A string used to determine how we do the comparison: ``'numerical'`` will select a comparison in which the file is parsed and numerical data compared; ``'line'`` will select a line-by-line comparison; anything else will perform a comparison using :py:func:`filecmp.cmp`. Returns ------- out[0] : boolean True if the files were found to be equal, False otherwise. out[1] : string A string with information about the comparison result. """ if mode == 'numerical': equal, msg = compare_files_numerical(file1, file2) elif mode == 'line': equal, msg = compare_files_lines(file1, file2, skip=skip) else: equal = filecmp.cmp(file1, file2, shallow=False) msg = 'Files are equal' if equal else 'Files are not equal' return equal, msg
[docs]def compare_traj_archive(dir1, dir2): """Compare archived trajectories. These archives consist of trajectory information such as energies, order parameters and positions. Here, we will not compare the actual raw trajectory data, but we verify that the output written by PyRETIS is identical in the two cases. Parameters ---------- dir1 : string The path to the first directory to use in the comparison. dir2 : string The path to the second directory to use in the comparison. Returns ------- out : list of tuples This list contains the files which differed, if any. """ errors = [] files1 = sorted(search_for_files(dir1)) files2 = sorted(search_for_files(dir2)) # Are the number of files equal: if len(files1) != len(files2): errors.append((dir1, dir2)) return errors # Compare the files that are written by PyRETIS: for file1, file2 in zip(files1, files2): basename1 = os.path.basename(file1) basename2 = os.path.basename(file2) if basename1 != basename2: errors.append((file1, file2)) continue if basename1 in ARCHIVE_FILES: equal, _ = compare_files(file1, file2, mode='cmp') if not equal: errors.append((file1, file2)) return errors
[docs]def compare_pathensemble_files(file1, file2, rel_tol=1e-5, skip=None): """Compare two path ensemble files. We compare line-by-line, but skip comments and we check that numbers are close, as judged by the given relative tolarance. Parameters ---------- file1 : string The path to the first file to consider in the comparison. file2 : string The path to the second file to consider in the comparison. rel_tol : float, optional A relative tolerance which is used to determine if numbers are almost equal. skip : list of integers, optional These are columns we are to skip in the comparison. Returns ------- out[0] : boolean True if the files are equal, False otherwise. out[1] : string A message describing the result of the comparison. """ all_data = read_files(file1, file2, read_comments=False) assert len(all_data) == 2 if not len(all_data[0]) == len(all_data[1]): return False, 'The number of lines in the files differ' # Define the expected data types for the columns in the path # ensemble files: data_types = { 0: int, 1: int, 2: int, 3: str, 4: str, 5: str, 6: int, 7: str, 8: str, 9: float, 10: float, 11: int, 12: int, 13: float, 14: int, 15: int, } for i, (line1, line2) in enumerate(zip(*all_data)): stuff1 = line1.split() stuff2 = line2.split() for col, func in data_types.items(): if skip and col in skip: continue if func == str: check = func(stuff1[col]) == func(stuff2[col]) else: check = math.isclose( func(stuff1[col]), func(stuff2[col]), rel_tol=rel_tol ) if not check: return False, f'Files differ on line {i}, column {col}' return True, 'Files are equal'