Source code for aiida_yambo.parsers.parsers

# -*- coding: utf-8 -*-

from __future__ import absolute_import
from aiida.orm import FolderData
from aiida.parsers.parser import Parser
from aiida.common.exceptions import OutputParsingError
from aiida.common.exceptions import UniquenessError
from aiida.common.exceptions import ValidationError, ParsingError
import numpy
import copy
from aiida.orm import ArrayData
from aiida.orm import BandsData
from aiida.orm import KpointsData
from aiida.orm import Dict
from aiida.orm import StructureData
from aiida.plugins import DataFactory, CalculationFactory
import glob, os, re

from yamboparser.yambofile import *
from yamboparser.yambofolder import *

from aiida_yambo.calculations.yambo import YamboCalculation
from aiida_yambo.utils.common_helpers import *
from aiida_yambo.parsers.utils import *

from aiida_quantumespresso.calculations.pw import PwCalculation
from aiida_quantumespresso.calculations import _lowercase_dict, _uppercase_dict
from six.moves import range
import cmath
import netCDF4

import pathlib
import tempfile

[docs]SingleFileData = DataFactory('core.singlefile')

[docs]__copyright__ = u"Copyright (c), 2014-2015, École Polytechnique Fédérale de Lausanne (EPFL), Switzerland, Laboratory of Theory and Simulation of Materials (THEOS). All rights reserved."
[docs]__license__ = "Non-Commercial, End-User Software License Agreement, see LICENSE.txt file"
[docs]__version__ = "0.4.1"
[docs]__authors__ = " Miki Bonacci (miki.bonacci@unimore.it)," \
              " Gianluca Prandini (gianluca.prandini@epfl.ch)," \
              " Antimo Marrazzo (antimo.marrazzo@epfl.ch)," \
              " Michael Atambo (michaelontita.atambo@unimore.it)", \
              " and the AiiDA team. The parser relies on the yamboparser module by Henrique Pereira Coutada Miranda."


[docs]class YamboParser(Parser):
    """This class is a wrapper class for the Parser class for Yambo calculators from yambopy.

    *IMPORTANT:* This plugin can parse netcdf files produced by yambo if the
    python netcdf libraries are installed, otherwise they are ignored.
    Accepts data from yambopy's YamboFolder  as a list of YamboFile instances.
    The instances of YamboFile have the following attributes:

    ::
      .data: A Dict, with k-points as keys and  in each futher a dict with obeservalbe:value pairs ie. { '1' : {'Eo': 5, 'B':1,..}, '15':{'Eo':5.55,'B': 30}... }
      .warnings:     list of strings, one warning  per string.
      .errors:       list of errors, one error per string.
      .memory        list of string, info on memory allocated and freed
      .max_memory    maximum memory allocated or freed during the run
      .last_memory   last memory allocated or freed during the run
      .last_memory_time   last point in time at which  memory was  allocated or freed
      .*_units       units (e.g. Gb or seconds)
      .wall_time     duration of the run (as parsed from the log file)
      .last_time     last time reported (as parsed from the log file)
      .kpoints: When non empty is a Dict of kpoint_index: kpoint_triplet values i.e.                  { '1':[0,0,0], '5':[0.5,0.0,5] .. }
      .type:   type of file accordParseing to YamboFile types include:
      1. 'report'    : 'r-..' report files
      2. 'output_gw'  : 'o-...qp': quasiparticle output file   ...           .. etc
      N. 'unknown' : when YamboFile was unable to deduce what type of file
      .timing: list of timing info.

    Saved data:

    o-..qp : ArrayData is stored in a format similar to the internal yambo db format (two arrays):
             [[E_o,E-E_o,S_c],[...]]  and
             [[ik,ib,isp],...]
             First is the observables, and the second array contains the kpoint index, band index
             and spin index if spin polarized else 0. BandsData can not be used as the k-point triplets
             are not available in the o-.qp file.

    r-..    : BandsData is stored with the proper list of K-points, bands_labels.

    """

    def __init__(self, calculation):
        """Initialize the instance of YamboParser"""
        from aiida.common import AIIDA_LOGGER
        self._logger = AIIDA_LOGGER.getChild('parser').getChild(
            self.__class__.__name__)
       # check for valid input
        if calculation.process_type=='aiida.calculations:yambo.yambo':
            yambo_parent=True
        else:
            raise OutputParsingError(
                "Input calculation must be a YamboCalculation, not {}".format(calculation.process_type))

        self._calc = calculation
        self.last_job_info = self._calc.get_last_job_info()
        self._eels_array_linkname = 'array_eels'
        self._eps_array_linkname = 'array_eps'
        self._alpha_array_linkname = 'array_alpha'
        self._qp_array_linkname = 'array_qp'
        self._QP_db_linkname = 'QP_db'
        self._ndb_linkname = 'array_ndb'
        self._ndb_QP_linkname = 'array_ndb_QP'
        self._ndb_CHI_linkname = 'array_chi'
        self._ndb_EXC_linkname = 'array_excitonic_states'
        self._ndb_HF_linkname = 'array_ndb_HFlocXC'
        self._lifetime_bands_linkname = 'bands_lifetime'
        self._quasiparticle_bands_linkname = 'bands_quasiparticle'
        self._parameter_linkname = 'output_parameters'
        self._system_info_linkname = 'system_info'
        super(YamboParser, self).__init__(calculation)

[docs]    def parse(self, retrieved, **kwargs):
        """Parses the datafolder, stores results.

        This parser for this code ...
        """
        from aiida.common.exceptions import InvalidOperation
        from aiida.common import exceptions
        from aiida.common import AIIDA_LOGGER

        # suppose at the start that the job is unsuccess, unless proven otherwise
        success = False

        # check whether the yambo calc was an initialisation (p2y)
        try:
            settings_dict = self._calc.inputs.settings.get_dict()
            settings_dict = _uppercase_dict(
                settings_dict, dict_name='settings')
        except AttributeError:
            settings_dict = {}

        initialise = settings_dict.pop('INITIALISE', None)
        verbose_timing = settings_dict.pop('T_VERBOSE', False)
            
        # select the folder object
        try:
            retrieved = self.retrieved
        except exceptions.NotExistent:
            return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER

        try:
            input_params = self._calc.inputs.parameters.get_dict()
        except AttributeError:
            if not initialise:
                raise ParsingError("Input parameters not found!")
            else:
                input_params = {}
        # retrieve the cell: if parent_calc is a YamboCalculation we must find the original PwCalculation
        # going back through the graph tree.

        parent_calc = find_pw_parent(self._calc)
        cell = parent_calc.inputs.structure.cell
        try:
            parent_save_path = take_calc_from_remote(self._calc.inputs.parent_folder).outputs.output_parameters.get_dict().pop('ns_db1_path',None)
        except:
            parent_save_path: parent_save_path = '.'
    

        output_params = {'warnings': [], 'yambo_wrote_dbs': False, 'game_over': False,
        'p2y_completed': False, 'last_time':0,\
        'requested_time':self._calc.attributes['max_wallclock_seconds'], 'last_time_units':'seconds',\
        'memstats':[], 'para_error':False, 'memory_error':False,'timing':[],'time_error': False, 'has_gpu': False,
        'yambo_version':'5.x', 'Fermi(eV)':0,'ns_db1_path':parent_save_path,'X_par_allocation_error':False,'errors':[],'corrupted_fragment':False}
        ndbqp = {}
        ndbhf = {}
        q = None
        chi = {}
        excitonic_states = {}

        # Create temporary directory
        with tempfile.TemporaryDirectory() as dirpath:
            # Open the output file from the AiiDA storage and copy content to the temporary file
            for filename in retrieved.base.repository.list_object_names():
                # Create the file with the desired name
                temp_file = pathlib.Path(dirpath) / filename
                with retrieved.open(filename, 'rb') as handle:
                    temp_file.write_bytes(handle.read())


            if 'ns.db1' in os.listdir(dirpath):
                output_params['ns_db1_path'] = dirpath

            for filename in os.listdir(dirpath):
                
                if 'stderr' in filename:
                    with retrieved.open(filename) as stderr:
                        parse_scheduler_stderr(stderr, output_params)
                
                elif 'ndb.BS_diago' in filename: #BSE in AiiDA 2.x still not supported
                    q, chi, excitonic_states = parse_BS(dirpath+'/',
                                                                filename,
                                                                output_params['ns_db1_path'])            
            
            try:
                results = YamboFolder(dirpath)
            except Exception as e:
                success = False
                return self.exit_codes.PARSER_ANOMALY

            for result in results.yambofiles:
                if results is None:
                    continue

                #This should be automatic in yambopy...
                if result.type=='log':
                    parse_log(result, output_params, timing = verbose_timing)
                if result.type=='report':
                    parse_report(result, output_params)

                #if 'eel' in result.filename:
                #    eels_array = self._aiida_array(result.data)
                #    self.out(self._eels_array_linkname, eels_array)
                #elif 'eps' in result.filename:
                #    eps_array = self._aiida_array(result.data)
                #    self.out(self._eps_array_linkname, eps_array)
                #elif 'alpha' in result.filename:
                #    alpha_array = self._aiida_array(result.data)
                #    self.out(self._alpha_array_linkname,alpha_array)

                
                if 'ndb.QP' == result.filename:
                    ndbqp = copy.deepcopy(result.data)
                    
                    if len(numpy.where(numpy.isnan(ndbqp['E-Eo'].data))[0])>0:
                        return self.exit_codes.NaN_AS_OUTPUT
                        
                    QP_db = SingleFileData(dirpath+'/'+result.filename)
                    self.out(self._QP_db_linkname,QP_db)
                    

                elif 'ndb.HF_and_locXC' == result.filename:
                    ndbhf = copy.deepcopy(result.data)

                elif 'gw0___' in input_params['arguments']:
                    if self._aiida_bands_data(result.data, cell, result.kpoints):
                        arr = self._aiida_bands_data(result.data, cell,
                                                    result.kpoints)
                        if type(arr) == BandsData:  # ArrayData is not BandsData, but BandsData is ArrayData
                            self.out(self._quasiparticle_bands_linkname,arr)
                        if type(arr) == ArrayData:  #
                            self.out(self._qp_array_linkname,arr)

                elif 'life___' in input_params['arguments']:
                    if self._aiida_bands_data(result.data, cell, result.kpoints):
                        arr = self._aiida_bands_data(result.data, cell,
                                                    result.kpoints)
                        if type(arr) == BandsData:
                            self.out(self._alpha_array_linkname+'_bands',arr)
                        elif type(arr) == ArrayData:
                            self.out(self._alpha_array_linkname + '_arr', arr)
            
            yambo_wrote_dbs(output_params)

        if output_params['game_over']:
            success = True
        elif output_params['p2y_completed'] and initialise:
            success = True
        
        #last check on time
        delta_time = (float(output_params['requested_time'])-float(output_params['last_time'])) \
                  / float(output_params['requested_time'])
        
        if success == False:
            if delta_time > -2 and delta_time < 0.16:
                    output_params['time_error']=True

        params=Dict(output_params)
        self.out(self._parameter_linkname,params)  # output_parameters

        if success and 'gw0' in input_params['arguments'] and not ndbqp and not initialise:
            success = False
        elif success and 'bse' in input_params['arguments'] and not initialise and not (chi or eels_array or eps_array or alpha_array):
            success = False

        if success == False:


            if output_params['corrupted_fragment']:
                return self.exit_codes.Variable_NOT_DEFINED
            elif output_params['time_error']:
                return self.exit_codes.WALLTIME_ERROR
            elif output_params['para_error']:
                return self.exit_codes.PARA_ERROR
            elif output_params['X_par_allocation_error']:
                return self.exit_codes.X_par_MEMORY_ERROR              
            elif output_params['memory_error']:
                return self.exit_codes.MEMORY_ERROR
            else:
                return self.exit_codes.NO_SUCCESS
        
        else: 
            # we store  all the information from the ndb.* files rather than in separate files
            # if possible, else we default to separate files. #to check MB
            if ndbqp and ndbhf:  #
                self.out(self._ndb_linkname,self._sigma_c(ndbqp, ndbhf))
            else:
                if ndbqp:
                    self.out(self._ndb_QP_linkname,self._aiida_ndb_qp(ndbqp))
                if ndbhf:
                    self.out(self._ndb_HF_linkname,self._aiida_ndb_hf(ndbhf))
            
            if chi:  #
                self.out(self._ndb_CHI_linkname,self._aiida_array(chi))
            
            if excitonic_states:  #
                self.out(self._ndb_EXC_linkname,self._aiida_array(excitonic_states))

[docs]    def _aiida_array_bse(self, data):
        arraydata = ArrayData()
        full = data.pop('0')
        for i in data.keys():
            for k in full.keys():
                full[k].append(data[i][k][0])
        for ky in full.keys():
            arraydata.set_array(ky.replace('-','_').replace('`','_prime_').replace('/','_'), np.array(full[ky]))
        return arraydata

[docs]    def _aiida_array(self, data):
        arraydata = ArrayData()
        for ky in data.keys():
            arraydata.set_array(ky.replace('-','_minus_'), data[ky])
        return arraydata

[docs]    def _aiida_bands_data(self, data, cell, kpoints_dict):
        if not data:
            return False
        kpt_idx = sorted(data.keys())  #  list of kpoint indices
        try:
            k_list = [kpoints_dict[i]
                      for i in kpt_idx]  # list of k-point triplet
        except KeyError:
            # kpoint triplets are not present (true  for .qp and so on, can not use BandsData)
            # We use the internal Yambo Format  [ [Eo_1, Eo_2,... ], ...[So_1,So_2,] ]
            #                                  QP_TABLE  [[ib_1,ik_1,isp_1]      ,[ib_n,ik_n,isp_n]]
            # Each entry in DATA has corresponding legend in QP_TABLE that defines its details
            # like   ib= Band index,  ik= kpoint index,  isp= spin polarization index.
            #  Eo_1 =>  at ib_1, ik_1 isp_1.
            pdata = ArrayData()
            QP_TABLE = []
            ORD = []
            Eo = []
            E_minus_Eo = []
            So = []
            Z = []
            for ky in data.keys():  # kp == kpoint index as a string  1,2,..
                for ind in range(len(data[ky]['Band'])):
                    try:
                        Eo.append(data[ky]['Eo'][ind])
                    except KeyError:
                        pass
                    try:
                        E_minus_Eo.append(data[ky]['E-Eo'][ind])
                    except KeyError:
                        pass
                    try:
                        So.append(data[ky]['Sc|Eo'][ind])
                    except KeyError:
                        pass
                    try:
                        Z.append(data[ky]['Z'][ind])
                    except KeyError:
                        pass
                    ik = int(ky)
                    ib = data[ky]['Band'][ind]
                    isp = 0
                    if 'Spin_Pol' in list(data[ky].keys()):
                        isp = data[ky]['Spin_Pol'][ind]
                    QP_TABLE.append([ik, ib, isp])
            pdata.set_array('Eo', numpy.array(Eo))
            pdata.set_array('E_minus_Eo', numpy.array(E_minus_Eo))
            pdata.set_array('So', numpy.array(So))
            pdata.set_array('Z', numpy.array(Z))
            pdata.set_array('qp_table', numpy.array(QP_TABLE))
            return pdata
        quasiparticle_bands = BandsData()
        quasiparticle_bands.set_cell(cell)
        quasiparticle_bands.set_kpoints(k_list, cartesian=True)
        # labels will come from any of the keys in the nested  kp_point data,
        # there is a uniform set of observables for each k-point, ie Band, Eo, ...
        # ***FIXME BUG does not seem to handle spin polarizes at all when constructing bandsdata***
        bands_labels = [
            legend for legend in sorted(data[list(data.keys())[0]].keys())
        ]
        append_list = [[] for i in bands_labels]
        for kp in kpt_idx:
            for i in range(len(bands_labels)):
                append_list[i].append(data[kp][bands_labels[i]])
        generalised_bands = [numpy.array(it) for it in append_list]
        quasiparticle_bands.set_bands(
            bands=generalised_bands, units='eV', labels=bands_labels)
        return quasiparticle_bands

[docs]    def _aiida_ndb_qp(self, data):
        """
        Save the data from ndb.QP to the db
        """
        pdata = ArrayData()
        for quantity in data.keys():
            name_quantity = quantity.replace('-','_minus_')
            pdata.set_array(name_quantity, numpy.array(data[quantity]))
        return pdata

[docs]    def _aiida_ndb_hf(self, data):
        """Save the data from ndb.HF_and_locXC

        """
        pdata = ArrayData()
        for quantity in data.keys():
            name_quantity = quantity.replace('-','_minus_')
            pdata.set_array(name_quantity, numpy.array(data[quantity]))
        return pdata

[docs]    def _sigma_c(self, ndbqp, ndbhf):
        """Calculate S_c if missing from  information parsed from the  ndb.*

         Sc = 1/Z[ E-Eo] -S_x + Vxc
        """
        Z = numpy.array(ndbqp['Z'])
        E_minus_Eo = numpy.array(ndbqp['E-Eo'])
        Sx = numpy.array(ndbhf['Sx'])
        Vxc = numpy.array(ndbhf['Vxc'])
        try:
            Sc = numpy.array(ndbqp['So'])
        except:
            Sc = 1 / Z * E_minus_Eo - Sx + Vxc
        pdata = ArrayData()
        for quantity in ndbqp.keys():
            name_quantity = quantity.replace('-','_minus_')
            pdata.set_array(name_quantity, numpy.array(ndbqp[quantity]))
        for quantity in ndbhf.keys():
            name_quantity = quantity.replace('-','_minus_')
            pdata.set_array(name_quantity, numpy.array(ndbhf[quantity]))
        pdata.set_array('Sc', Sc)
        return pdata