YamboRestart

This is the basic workflow and will run a single yambo calculation, wrapping the YamboCalculation class. The adding values is a restart logic, with a tolerance for failed calculations due to

  • Time Exhaustion on the queue: run a new calculation with 50% more time and copying the partial results obtained in the failed one.

  • Parallization errors: use the built-in parallelizer to attempt a fixing.

  • Memory errors: reduce mpi(/2) and increase threads(*2) to attempt a better memory distribution. Redefine parallelism options with the parallelizer. It can increase resources only if mpi = 1.

After each calculation, this workflow will check the exit status(provided by the parser) and, if the calculation is failed, try to fix some parameters in order to resubmit the calculation and obtain results. As inputs, we have to provide the maximum number of iterations that the workchain will perform. The YamboRestart inherits the BaseRestartWorkchain class, as included in the aiida-core package. This simplifies a lot the error detection/handling mechanisms and provides a unified restart logic for all the AiiDA plugins.

As in a YamboCalculation, you have to provide all the necessary inputs, paying attention to the fact that now the builder has the attribute ‘yambo’ for the variables that refers to YamboCalculation part of the workchain. The only exception is the ‘parent_folder’, that is provided as direct YamboRestart input. Other common YamboRestart inputs are:

builder.max_walltime #max_walltime for a given machine/cluster
builder.max_iterations #from BaseRestartWorkchain: maximum number of attempt to succesfully done the calculation.
builder.clean_workdir #from BaseRestartWorkchain: If `True`, work directories of all called calculation jobs will be cleaned at the end of execution.

Here an example of typical script to run a YamboRestart workchain:

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
import sys
import os
from aiida.plugins import DataFactory, CalculationFactory
from aiida.orm import List, Dict
from aiida.engine import submit
from aiida_yambo.workflows.yamborestart import YamboRestart
import argparse

def get_options():

    parser = argparse.ArgumentParser(description='SCF calculation.')
    parser.add_argument(
        '--code',
        type=int,
        dest='code_pk',
        required=True,
        help='The yambo codename to use')

    parser.add_argument(
        '--precode',
        type=int,
        dest='precode_pk',
        required=True,
        help='The p2y codename to use')

    parser.add_argument(
        '--parent',
        type=int,
        dest='parent_pk',
        required=True,
        help='The parent to use')

    parser.add_argument(
        '--restarts',
        type=int,
        dest='max_iterations',
        required=True,
        help='maximum number of restarts')

    parser.add_argument(
        '--time',
        type=int,
        dest='max_wallclock_seconds',
        required=False,
        default=30*60,
        help='max wallclock in seconds')

    parser.add_argument(
        '--nodes',
        type=int,
        dest='num_machines',
        required=False,
        default=1,
        help='number of machines')

    parser.add_argument(
        '--mpi',
        type=int,
        dest='num_mpiprocs_per_machine',
        required=False,
        default=1,
        help='number of mpi processes per machine')

    parser.add_argument(
        '--threads',
        type=int,
        dest='num_cores_per_mpiproc',
        required=False,
        default=1,
        help='number of threads per mpi process')

    parser.add_argument(
        '--queue_name',
        type=str,
        dest='queue_name',
        required=False,
        default=None,
        help='queue(PBS) or partition(SLURM) name')

    parser.add_argument(
        '--qos',
        type=str,
        dest='qos',
        required=False,
        default=None,
        help='qos name')

    parser.add_argument(
        '--account',
        type=str,
        dest='account',
        required=False,
        default=None,
        help='account name')

    args = parser.parse_args()

    ###### setting the machine options ######
    options = {
        'code_pk': args.code_pk,
        'precode_pk': args.precode_pk,
        'parent_pk': args.parent_pk,
        'max_iterations': args.max_iterations,
        'max_wallclock_seconds': args.max_wallclock_seconds,
        'resources': {
            "num_machines": args.num_machines,
            "num_mpiprocs_per_machine": args.num_mpiprocs_per_machine,
            "num_cores_per_mpiproc": args.num_cores_per_mpiproc,
        },
        'custom_scheduler_commands': u"export OMP_NUM_THREADS="+str(args.num_cores_per_mpiproc),
        }

    if args.queue_name:
        options['queue_name']=args.queue_name

    if args.qos:
        options['qos']=args.qos

    if args.account:
        options['account']=args.account

    return options

def main(options):

    ###### setting the gw parameters ######

    Dict = DataFactory('dict')

    params_gw = {
            'HF_and_locXC': True,
            'dipoles': True,
            'ppa': True,
            'gw0': True,
            'em1d': True,
            'Chimod': 'hartree',
            #'EXXRLvcs': 40,
            #'EXXRLvcs_units': 'Ry',
            'BndsRnXp': [1, 10],
            'NGsBlkXp': 2,
            'NGsBlkXp_units': 'Ry',
            'GbndRnge': [1, 10],
            'DysSolver': "n",
            'QPkrange': [[1, 1, 8, 9]],
            'DIP_CPU': "1 1 1",
            'DIP_ROLEs': "k c v",
            'X_CPU': "1 1 1 1",
            'X_ROLEs': "q k c v",
            'SE_CPU': "1 1 1",
            'SE_ROLEs': "q qp b",
        }
    params_gw = Dict(dict=params_gw)

    ###### creation of the YamboRestart ######

    builder = YamboRestart.get_builder()
    builder.yambo.metadata.options.max_wallclock_seconds = \
            options['max_wallclock_seconds']
    builder.yambo.metadata.options.resources = \
            dict = options['resources']

    if 'queue_name' in options:
        builder.yambo.metadata.options.queue_name = options['queue_name']

    if 'qos' in options:
        builder.yambo.metadata.options.qos = options['qos']

    if 'account' in options:
        builder.metadata.options.account = options['account']
        
    builder.yambo.metadata.options.custom_scheduler_commands = options['custom_scheduler_commands']

    builder.yambo.parameters = params_gw

    builder.yambo.precode_parameters = Dict(dict={})
    builder.yambo.settings = Dict(dict={'INITIALISE': False, 'COPY_DBS': False})

    builder.yambo.code = load_node(options['code_pk'])
    builder.yambo.preprocessing_code = load_node(options['precode_pk'])

    builder.parent_folder = load_node(options['parent_pk']).outputs.remote_folder

    builder.max_iterations = Int(options['max_iterations'])

    return builder


if __name__ == "__main__":
    options = get_options()
    builder = main(options)
    running = submit(builder)
    print("Submitted YamboRestart workchain; with pk=<{}>".format(running.pk))

the outputs are the same of a YamboCalculation.