Source code for rl_salamandra_alignment.utils.job_submission

"""Utilities for submitting jobs with SLURM
    """
import subprocess
    
[docs] def submit_job(job_file: str, dependency: str = None) -> str: """Function to submit a job and return its job ID Args: job_file (str): Path to Slurm job file dependency (str, optional): job id of dependency for submitting a new job. Defaults to None. Returns: str: job id of submitted job """ command = ["sbatch", "--parsable"] if dependency: command.append(f"--dependency=afterok:{dependency}") command.append(job_file) result = subprocess.run(command, capture_output=True, text=True) if result.returncode != 0: print(f"Error submitting job: {result.stderr}") return None job_id = result.stdout.strip() return job_id
[docs] def submit_job_with_retries( job_file: str, n_retries: int ) -> str: """Submit a slurm job n_retries times with dependencies Args: job_file (str): Path to Slurm job file n_retries (int): number of retries Returns: str: job id of last submitted job """ previous_job_id = None for i in range(1, n_retries): job_id = submit_job(job_file, dependency=previous_job_id) if job_id: print(f"Submitted job {i} with ID {job_id}") previous_job_id = job_id else: print(f"Failed to submit job {i}") break return job_id