from itertools import * import os import json import io import zipfile # HOW TO USE: # This python file generates a set of input files for each of the 11 experiments mentioned in the paper, # 'Combining the k-CNF and XOR Phase-Transitions' (http://www.cs.rice.edu/~kgm2/Papers/ijcai16-cnfxor.pdf), # designed to be combined with cnfxor.py and run on a remote server using Slurm. # # The instructions below demonstrate how to generate '_inputs.zip' for each experiment, # and then how to run the data in '_inputs.zip' to run the experiment on slurm. # The output files from each experiment are included in '_outputs.zip' for each experiment. # # All experiments used in the paper are contained in the "experiments" list # # To generate _inputs.zip for the experiment "exp": # 1) Adjust LOCAL_DIRECTORY_BASE to a local directory where experiment files can be created. # 2) Adjust REMOTE_DIRECTORY_BASE to a remote directory where experiment code can be placed. # 3) Adjust REMOTE_EXPERIMENT_BASE to a remote directory designed for large-scale job data access. # 4) Adjust CPUS_PER_NODE to the number of cpus you would like each slurm node to use. # 5) Adjust JOB_TIME to the time to be given for each slurm worker. # 6) Choose an experiment "exp" to run from the "experiments" list and a number "num" of workers to use on slurm. # Each worker will use CPUS_PER_NODE cpus on a single node. # 7) Run "exp.setup(num)" to generate input files at [LOCAL_DIRECTORY_BASE]/CNFXOR/experiments/[exp.id] # # To run the experiment "exp" after generating a new _inputs.zip for exp: # 1) In [REMOTE_DIRECTORY_BASE]/experiments/cnfxor, place: # cnfxor.py # cryptominisat4_1 (Version 5 of cryptominisat should work as well) # (If running k=2 or k=3) cryptominisat2_9 (Release 2.9.11) # 2) In [REMOTE_EXPERIMENT_BASE]/cnfxor/[exp.id], place: # The contents of [LOCAL_DIRECTORY_BASE]/CNFXOR/experiments/[exp.id]/_inputs.zip # (this zip file can be regenerated using the steps above). # 3) On the remote server, execute the command returned in step 6 when generating '_inputs.zip. # # For any errors or help customizing this for your use, email Jeffrey Dudek at jmd11@rice.edu # Local directory to create project files. Files will be created in [LOCAL_DIRECTORY_BASE]/CNFXOR/experiments/ LOCAL_DIRECTORY_BASE = 'C:/Work/Projects/' # Remote directory to store project files. REMOTE_DIRECTORY_BASE = 'experiments/' # Remote directory to store experiment data. REMOTE_EXPERIMENT_BASE = '/scratch/jmd11/experiments/' # The number of cpus used by the SAT solver on each slurm node. CPUS_PER_NODE = 12 # The maximum time to be used for each worker, in the form hh:mm:ss JOB_TIME = '8:00:00' # All experiments run, in the form (k, n) experiments_kn = [(2, 25), (2, 50), (2, 100), (2, 150), (3, 25), (3, 50), (3, 100), (4, 25), (4, 50), (5, 25), (5, 50)] # Helper methods to generate experiment ranges. def float_fixed_range(start, step, entries): # Generate the list [start, start+step, start+2*step, ..., start+(entries-1)*step] return [start + (i * step) for i in xrange(entries)] def square(*c_args): # Compute the Cartesian Product of multiple lists of parameters. ranges = [a[1] for a in c_args] names = [a[0] for a in c_args] values = product(*ranges) res = [] for changing_arg in values: fresh_args = {name: value for value, name in zip(changing_arg, names)} res.append(fresh_args) return res # The maximum cnf-density used for each k value cnfmax = {2: 2, 3: 6, 4: 12, 5: 26} # Create the data points used for each experiment, 61 x 61 # For cnf density, from 0 to cnfmax[k] across 61 values # For xor density, from 0 to 1.2 across 61 values experiment_data_points = {k: square(('cnf_density', float_fixed_range(0, float(cnfmax[k]) / 60, 61)), ('xor_density', float_fixed_range(0, 0.02, 61))) for k in (2, 3, 4, 5)} # Some methods to generate the proper solver command def generate_solver(solver_exec, solver_args): # Generate a result = solver_exec for key, value in solver_args.iteritems(): if key == 'alarmtime': continue result += ' --' + str(key) if value: result += '=' + str(value) if 'alarmtime' in solver_args: result = "doalarm -t profile " + str(solver_args['alarmtime']) + ' [project_directory]' + result return result def generate_cms29(solver_args=None, gaussian_elimination=True): # Generate the command to use CryptoMiniSat 2.9 if not solver_args: solver_args = {} solver_args = dict(solver_args) solver_args['nosolprint'] = None solver_args['verbosity'] = '0' if gaussian_elimination: solver_args['gaussuntil'] = '400' else: solver_args['gaussuntil'] = '0' return generate_solver('cryptominisat2_9', solver_args) def generate_cms41(solver_args=None, gaussian_elimination=True): # Generate the command to use CryptoMiniSat 4.1 if not solver_args: solver_args = {} solver_args = dict(solver_args) solver_args['printsol'] = '0' solver_args['verb'] = '0' if gaussian_elimination: solver_args['autodisablegauss'] = '0' else: solver_args['autodisablegauss'] = '1' return generate_solver('cryptominisat4_1', solver_args) # Include the solver information for each experiment # (We switch between CryptoMiniSat version 2.9 and version 4.1 at different densities for the best performance) for args in experiment_data_points[2]: if args['xor_density']/1.2 + args['cnf_density']/2 < .6: args['solver'] = generate_cms41({'threads': CPUS_PER_NODE, 'maxtime': 1000}) else: args['solver'] = generate_cms29({'threads': CPUS_PER_NODE, 'maxtime': 1000}) for args in experiment_data_points[3]: if args['xor_density'] < 0.5 and args['cnf_density'] < 1.65: args['solver'] = generate_cms41({'threads': CPUS_PER_NODE, 'maxtime': 1000}) else: args['solver'] = generate_cms29({'threads': CPUS_PER_NODE, 'maxtime': 1000}) for args in experiment_data_points[4]: args['solver'] = generate_cms41({'threads': CPUS_PER_NODE, 'maxtime': 1000}) for args in experiment_data_points[5]: args['solver'] = generate_cms41({'threads': CPUS_PER_NODE, 'maxtime': 1000}) # The slurm script used to start each job. BASE_SCRIPT = """#!/bin/bash #SBATCH --time=[$$TIME] #SBATCH --job-name=[$$FULLNAME] #SBATCH --partition=[$$PART] #SBATCH --nodes=1 #SBATCH --cpus-per-task=[$$CPUS] #SBATCH --mem-per-cpu=4000m echo Experiment: [$$ID] echo Worker Num: $SLURM_ARRAY_TASK_ID echo Total Workers: $1 echo Base Arguments: [$$ARGS] [$$MODULES] python [$$SCRIPT] [$$ARGS] [$$IN]""" # A class used to produce the input data files for each experiment. class Experiment: def __init__(self, basename, exp_id, script, time, base_args, modules=None, changing_args=None, dependencies=None): """ :param basename: string name of the experiment ('CNFXOR') :param exp_id: string name of the experiment instance ('alpha1') :param script: name of the python script to run, relative to experiment folder :param time: time to be given to each worker (10:00) :param base_args: dictionary to be passed to script :param changing_args: a list of dictionaries, iterating over all work items :param dependencies: a list of any other changeable file dependencies of the script The script and all dependencies will be reuploaded on every run :return: """ self.basename = basename self.id = exp_id self.script = script self.time = time self.args = base_args self.changing_args = changing_args self.modules = modules if not self.modules: self.modules = [] self.args['experiment_name'] = basename self.args['experiment_id'] = exp_id self.args['script'] = script if dependencies is None: dependencies = [] if script not in dependencies: dependencies = dependencies + [script] self.dependencies = dependencies def local_project_path(self, filename=''): return LOCAL_DIRECTORY_BASE + self.basename + '/' + filename def remote_project_path(self, filename=''): return REMOTE_DIRECTORY_BASE + self.basename.lower() + '/' + filename def local_experiment_path(self, filename=''): return self.local_project_path('experiments/' + self.id + '/' + filename) def remote_experiment_path(self, filename=''): return REMOTE_EXPERIMENT_BASE + self.basename.lower() + '/' + self.id + '/' + filename def setup(self, numworkers, partition='commons'): # Create the local directory structure if not os.path.exists(self.local_experiment_path()): os.makedirs(self.local_experiment_path()) # Provide workers with a temporary directory to use self.args["temp_directory"] = self.remote_experiment_path("temp/") # Craft the job script for the experiment script = BASE_SCRIPT json_args = json.dumps(self.args).replace('"', '\\"') json_args = '"' + json_args + '"' script = script.replace("[$$ARGS]", json_args) script = script.replace("[$$SCRIPT]", self.remote_project_path(self.script)) script = script.replace("[$$FULLNAME]", self.basename.lower() + '_' + self.id) script = script.replace("[$$ID]", str(self.id)) script = script.replace("[$$IN]", '"' + self.remote_experiment_path('$SLURM_ARRAY_TASK_ID.in') + '"') script = script.replace("[$$TIME]", str(self.time)) script = script.replace("[$$PART]", partition) script = script.replace("[$$CPUS]", str(CPUS_PER_NODE)) modules_load = '' for m in self.modules: modules_load += 'module load ' + m + '\n' script = script.replace("[$$MODULES]", modules_load) # Save the job script locally for the experiment job_file = '_run.sh' with io.open(self.local_experiment_path(job_file), 'w', newline='\n') as f: f.write(unicode(script)) # Craft input files for each worker input_files = [str(worker_id) + '.in' for worker_id in xrange(numworkers)] inputs = [io.open(self.local_experiment_path(input_file), 'w', newline='\n') for input_file in input_files] for job_count, fresh_args in enumerate(self.changing_args): fresh_args = dict(fresh_args) number = str(job_count).zfill(len(str(len(self.changing_args)))) fresh_args['output_file'] = self.remote_experiment_path(number + '.out') fresh_args['task_id'] = str(job_count) inputs[job_count % numworkers].write(unicode(json.dumps(fresh_args))) inputs[job_count % numworkers].write(unicode('\n')) for input_file in inputs: input_file.close() input_files.append(job_file) # Include the job script in the list of files to copy print 'Created local files' # Compress input files locally input_zip = '_inputs.zip' with zipfile.ZipFile(self.local_experiment_path(input_zip), 'w') as zipf: for input_file in input_files: zipf.write(self.local_experiment_path(input_file), input_file) print 'Compressed local files' # Generate a command to complete submission command = 'sbatch' command += ' --output=' + self.remote_experiment_path('slurm-%A_%a.out') command += ' --array=0-' + str(numworkers - 1) command += ' ' + self.remote_experiment_path(job_file) command += ' ' + str(numworkers) return command def __str__(self): return "<" + self.basename + ":" + self.id + ">" class CNFXORExperiment(Experiment): def __init__(self, experiment_id, time, base_args, changing_args, dependencies=None): Experiment.__init__(self, 'CNFXOR', experiment_id, 'cnfxor.py', time, base_args, modules=['CMake', 'GCC/4.8.5'], changing_args=changing_args, dependencies=dependencies) experiments = [ CNFXORExperiment('ijcai16_k{0}_n{1}'.format(k, n), JOB_TIME, base_args={ 'trials': 100, 'cnf_length': k, 'xor_prob': 0.5, 'nvars': n}, changing_args=experiment_data_points[k]) for k, n in experiments_kn ]