#!/usr/bin/env python2.7 from __future__ import print_function desc = '''Generate the difference of two YAML files into a new YAML file (works on pair of directories too). A new attribute 'Added' is set to True or False depending whether the entry is added or removed from the first input to the next. The tools requires PyYAML.''' import yaml # Try to use the C parser. try: from yaml import CLoader as Loader except ImportError: from yaml import Loader import optrecord import argparse from collections import defaultdict from multiprocessing import cpu_count, Pool if __name__ == '__main__': parser = argparse.ArgumentParser(description=desc) parser.add_argument( 'yaml_dir_or_file_1', help='An optimization record file or a directory searched for optimization ' 'record files that are used as the old version for the comparison') parser.add_argument( 'yaml_dir_or_file_2', help='An optimization record file or a directory searched for optimization ' 'record files that are used as the new version for the comparison') parser.add_argument( '--jobs', '-j', default=cpu_count(), type=int, help='Max job count (defaults to %(default)s, the current CPU count)') parser.add_argument( '--no-progress-indicator', '-n', action='store_true', default=False, help='Do not display any indicator of how many YAML files were read.') parser.add_argument('--output', '-o', default='diff.opt.yaml') args = parser.parse_args() files1 = optrecord.find_opt_files(args.yaml_dir_or_file_1) files2 = optrecord.find_opt_files(args.yaml_dir_or_file_2) print_progress = not args.no_progress_indicator all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress) all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress) added = set(all_remarks2.values()) - set(all_remarks1.values()) removed = set(all_remarks1.values()) - set(all_remarks2.values()) for r in added: r.Added = True for r in removed: r.Added = False result = added | removed for r in result: r.recover_yaml_structure() with open(args.output, 'w') as stream: yaml.dump_all(result, stream)