import quippy as qp import numpy as np import argparse def atomicno_to_sym(atno): pdict={1: 'H', 2: 'He', 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', 19: 'K', 20: 'Ca', 21: 'Sc', 22: 'Ti', 23: 'V', 24: 'Cr', 25: 'Mn', 26: 'Fe', 27: 'Co', 28: 'Ni', 29: 'Cu', 30: 'Zn', 31: 'Ga', 32: 'Ge', 33: 'As', 34: 'Se', 35: 'Br', 36: 'Kr', 37: 'Rb', 38: 'Sr', 39: 'Y', 40: 'Zr', 41: 'Nb', 42: 'Mo', 43: 'Tc', 44: 'Ru', 45: 'Rh', 46: 'Pd', 47: 'Ag', 48: 'Cd', 49: 'In', 50: 'Sn', 51: 'Sb', 52: 'Te', 53: 'I', 54: 'Xe', 55: 'Cs', 56: 'Ba', 57: 'La', 58: 'Ce', 59: 'Pr', 60: 'Nd', 61: 'Pm', 62: 'Sm', 63: 'Eu', 64: 'Gd', 65: 'Tb', 66: 'Dy', 67: 'Ho', 68: 'Er', 69: 'Tm', 70: 'Yb', 71: 'Lu', 72: 'Hf', 73: 'Ta', 74: 'W', 75: 'Re', 76: 'Os', 77: 'Ir', 78: 'Pt', 79: 'Au', 80: 'Hg', 81: 'Tl', 82: 'Pb', 83: 'Bi', 84: 'Po', 85: 'At', 86: 'Rn', 87: 'Fr', 88: 'Ra', 89: 'Ac', 90: 'Th', 91: 'Pa', 92: 'U', 93: 'Np', 94: 'Pu', 95: 'Am', 96: 'Cm', 97: 'Bk', 98: 'Cf', 99: 'Es', 100: 'Fm', 101: 'Md', 102: 'No', 103: 'Lr', 104: 'Rf', 105: 'Ha', 106: 'Sg', 107: 'Ns', 108: 'Hs', 109: 'Mt', 110: 'Unn', 111: 'Unu'} return pdict[atno] def get_spkitMax(frames): ''' Get the set of species their maximum number across frames. :param frames: list of quippy frames object :return: Dictionary with species as key and return its largest number of occurrence ''' spkitMax = {} for frame in frames: atspecies = {} for z in frame.z: if z in atspecies: atspecies[z] += 1 else: atspecies[z] = 1 for (z, nz) in atspecies.iteritems(): if z in spkitMax: if nz > spkitMax[z]: spkitMax[z] = nz else: spkitMax[z] = nz return spkitMax def get_spkit(frame): ''' Get the set of species their number across frame. :param frame: One quippy frames object :return: ''' spkit = {} for z in frame.z: if z in spkit: spkit[z]+=1 else: spkit[z] = 1 return spkit def get_soap(frame, spkit, spkitMax, centerweight=1., gaussian_width=0.5, cutoff=5.0, cutoff_transition_width=0.5, nmax=8, lmax=6): ''' Get the soap vectors (power spectra) for each frameic environments in frame. :param frame: A quippy atomsList object :param spkit: Dictionary with specie as key and number of corresponding frame as item. Returned by get_spkit(frame). :param spkitMax: Dictionary with species as key and return its largest number of occurrence. Returned by get_spkitMax(frames) . :param centerweight: Center atom weight :param gaussian_width: Atom Gaussian std :param cutoff: Cutoff radius for each atomic environment in the unit of cell and positions. :param cutoff_transition_width: Steepness of the smooth environmental cutoff radius. Smaller -> steeper :param nmax: Number of radial basis functions. :param lmax: Number of Spherical harmonics. :return: Soap vectors of Atoms quippy class. Dictionary (keys:atomic number of the central atom, items: list of power spectra for each central atom with corresponding atomic number ) ''' zsp = spkitMax.keys() zsp.sort() lspecies = 'n_species=' + str(len(zsp)) + ' species_Z={ ' for z in zsp: lspecies = lspecies + str(z) + ' ' lspecies = lspecies + '}' frame.set_cutoff(cutoff) frame.calc_connect() soap = {} for (z, nz) in spkit.iteritems(): soapstr = "soap central_reference_all_species=F central_weight=" + str(centerweight)+\ " covariance_sigma0=0.0 atom_sigma=" + str(gaussian_width) +\ " cutoff=" + str(cutoff) + \ " cutoff_transition_width=" + str(cutoff_transition_width) + \ " n_max=" + str(nmax) + " l_max=" + str(lmax) + ' ' + lspecies +\ ' Z=' + str(z) desc = qp.descriptors.Descriptor(soapstr) sps = desc.calc(frame)["descriptor"] soap[z] = sps return soap def Soap2AlchemySoap(rawsoap, spkit, nmax, lmax): ''' Convert the soap vector of an environment from quippy descriptor to soap vectors with chemical channels. :param rawsoap: numpy array dim:(N,) containing the soap vector of one environment :param spkit: Dictionary with specie as key and number of corresponding atom as item. Returned by get_spkit(frame). :param nmax: Number of radial basis functions. :param lmax: Number of Spherical harmonics. :return: Dictionary (keys: species tuples (sp1,sp2), items: soap vector, numpy array dim:(nmax ** 2 * (lmax + 1),) ) ''' # spkit keys are the center species in the full frame zspecies = sorted(spkit.keys()) nspecies = len(spkit.keys()) alchemySoap = {} ipair = {} # initialize the alchemical soap for s1 in xrange(nspecies): for s2 in xrange( nspecies): # range(s1+1): we actually need to store also the reverse pairs if we want to go alchemical alchemySoap[(zspecies[s2], zspecies[s1])] = np.zeros(nmax ** 2 * (lmax + 1), float) ipair[(zspecies[s2], zspecies[s1])] = 0 isoap = 0 isqrttwo = 1.0 / np.sqrt(2.0) # selpair and revpair are modified and in turn modify soaps because they are all pointing at the same memory block for s1 in xrange(nspecies): for n1 in xrange(nmax): for s2 in xrange(s1 + 1): selpair = alchemySoap[(zspecies[s2], zspecies[s1])] # we need to reconstruct the spectrum for the inverse species order, that also swaps n1 and n2. # This is again only needed to enable alchemical combination of e.g. alpha-beta and beta-alpha. Shit happens revpair = alchemySoap[(zspecies[s1], zspecies[s2])] isel = ipair[(zspecies[s2], zspecies[s1])] for n2 in xrange(nmax if s2 < s1 else n1 + 1): for l in xrange(lmax + 1): # print s1, s2, n1, n2, isel, l+(self.lmax+1)*(n2+self.nmax*n1), l+(self.lmax+1)*(n1+self.nmax*n2) # selpair[isel] = rawsoap[isoap] if (s1 != s2): selpair[isel] = rawsoap[ isoap] * isqrttwo # undo the normalization since we will actually sum over all pairs in all directions! revpair[l + (lmax + 1) * (n1 + nmax * n2)] = selpair[isel] else: # diagonal species (s1=s2) have only half of the elements. # this is tricky. we need to duplicate diagonal blocks "repairing" these to be full. # this is necessary to enable alchemical similarity matching, where we need to combine # alpha-alpha and alpha-beta environment fingerprints selpair[l + (lmax + 1) * (n2 + nmax * n1)] = rawsoap[isoap] * (1 if n1 == n2 else isqrttwo) selpair[l + (lmax + 1) * (n1 + nmax * n2)] = rawsoap[isoap] * (1 if n1 == n2 else isqrttwo) # selpair[l + (lmax + 1) * (n2 + nmax * n1)] = selpair[l + (lmax + 1) * (n1 + nmax * n2)] \ # = rawsoap[isoap] * (1 if n1 == n2 else isqrttwo) isoap += 1 isel += 1 ipair[(zspecies[s2], zspecies[s1])] = isel return alchemySoap def get_Soaps(frames,chem_channels=False, centerweight=1.0, gaussian_width=0.5, cutoff=3.5, cutoff_transition_width=0.5 , nmax=8, lmax=6): ''' Compute the SOAP vectors for each atomic environment in frames and reorder them into chemical channels. :param frames: list of quippy frames object :param centerweight: Center atom weight :param gaussian_width: Atom Gaussian std :param cutoff: Cutoff radius for each atomic environment in the unit of cell and positions. :param cutoff_transition_width: Steepness of the smooth environmental cutoff radius. Smaller -> steeper :param nmax: Number of radial basis functions. :param lmax: Number of Spherical harmonics. :return: Nested List/Dictionary: list->frames, dict->(keys:atomic number, items:list of atomic environment), list->atomic environment, dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number inside the atomic environment), items: SOAP vector, flat numpy array) ''' Soaps = [] # get the set of species their maximum number across frames spkitMax = get_spkitMax(frames) for frame in frames: # to avoid side effect due to pointers atm = frame.copy() # get the set of species their number across atom spkit = get_spkit(atm) # get the soap vectors (power spectra) for each atomic environments in atm rawsoaps = get_soap(atm, spkit, spkitMax, centerweight, gaussian_width, cutoff, cutoff_transition_width, nmax, lmax) # chemical channel separation for each central atom species # and each atomic environment if chem_channels: alchemySoap = {} for (z, soap) in rawsoaps.iteritems(): Nenv, Npowerspectrum = soap.shape lsp = [] # loop over the local environments of specie z for it in xrange(Nenv): # soap[it] is (1,Npowerspectrum) so need to transpose it # convert the soap vector of an environment from quippy descriptor to soap vectors # with chemical channels. lsp.append(Soap2AlchemySoap(soap[it].T, spkit, nmax, lmax)) # gather list of environment over the atomic number alchemySoap[z] = lsp # gather soaps over the atom Soaps.append(alchemySoap) # out put rawSoap else: Soaps.append(rawsoaps) return Soaps def get_AvgSoaps(frames, chem_channels=False, centerweight=1.0, gaussian_width=0.5, cutoff=3.5, cutoff_transition_width=0.5, nmax=8, lmax=6): ''' Compute the average SOAP vectors for each atomic environment in frames and reorder them into chemical channels. :param frames: list of quippy frames object :param centerweight: Center atom weight :param gaussian_width: Atom Gaussian std :param cutoff: Cutoff radius for each atomic environment in the unit of cell and positions. :param cutoff_transition_width: Steepness of the smooth environmental cutoff radius. Smaller -> steeper :param nmax: Number of radial basis functions. :param lmax: Number of Spherical harmonics. :return: Nested List/Dictionary: list->frames, dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number inside the atomic environment), items: SOAP vector, flat numpy array) ''' AvgSoaps = [] # get the set of species their maximum number across frames spkitMax = get_spkitMax(frames) for frame in frames: # to avoid side effect due to pointers atm = frame.copy() # get the set of species their number across atom spkit = get_spkit(atm) # get the soap vectors (power spectra) for each atomic environments in atm rawsoaps = get_soap(atm, spkit, spkitMax, centerweight, gaussian_width, cutoff, cutoff_transition_width, nmax, lmax) # compute the average soap over an atomic environment (creates a matrix with each line a soap # vector of one atomic environement and then average over the rows) avgrawsoap = np.concatenate(rawsoaps.values(), axis=0).mean(axis=0) # chemical channel separation for each each atomic environment if chem_channels: AvgSoaps.append(Soap2AlchemySoap(avgrawsoap, spkit, nmax, lmax)) # output average rawSoaps else: AvgSoaps.append(avgrawsoap) return AvgSoaps def get_AvgDeltaKernel(AvgRawSoapListA, AvgRawSoapListB): return np.dot(np.array(AvgRawSoapListA),np.array(AvgRawSoapListB).T) def get_AvgDeltaSim(AvgSoapA, AvgSoapB, chem_channels=False): if chem_channels: AvgDeltaSim = 0 for spA in AvgSoapA: for spB in AvgSoapB: if np.all(spA != spB): continue elif np.all(spA == spB): AvgDeltaSim += np.vdot(AvgSoapA[spA], AvgSoapB[spB]) else: AvgDeltaSim = np.vdot(AvgSoapA, AvgSoapB) return AvgDeltaSim def get_ChemDelta(alchemyAvgSoapA,alchemyAvgSoapB): chemicalSim = {} for spA in alchemyAvgSoapA: for spB in alchemyAvgSoapB: if np.all(spA == spB): chemicalSim[spA + spB] = 1. else: chemicalSim[spA + spB] = 0. return chemicalSim def get_AvgSim(alchemyAvgSoapA, alchemyAvgSoapB, chemicalSimGen): chemicalSim = chemicalSimGen(alchemyAvgSoapA, alchemyAvgSoapB) AvgSim = 0 for spA in alchemyAvgSoapA: for spB in alchemyAvgSoapB: theta = chemicalSim[spA + spB] if theta == 0.: continue else: AvgSim += theta * np.vdot(alchemyAvgSoapA[spA], alchemyAvgSoapB[spB]) return AvgSim def dumpAlchemySoapstxt(alchemySoaps,fout): ''' Print in text format the alchemySoaps using the same format as in glosim --verbose :param alchemySoaps: Nested List/Dictionary: list->frames, dict->(keys:atomic number, items:list of atomic environment), list->atomic environment, dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number inside the atomic environment), items: SOAP vector, flat numpy array) :param fout: Writable python io object :return: None ''' for iframe, alchemySoap in enumerate(alchemySoaps): fout.write("# Frame %d \n" % (iframe)) for zatom, soapEnvList in alchemySoap.iteritems(): for ienv, soapEnv in enumerate(soapEnvList): fout.write("# Species %d Environment %d \n" % (zatom, ienv)) for (sp1, sp2), soap in soapEnv.iteritems(): fout.write("%d %d " % (sp1, sp2)) for sj in soap: fout.write("%8.4e " % (sj)) fout.write("\n") def dumpAlchemySoapspickle(alchemySoaps, fout): ''' Dump alchemySoaps in pickle binary format. Read with pck.load(filename) :param alchemySoaps: Nested List/Dictionary: list->frames, dict->(keys:atomic number, items:list of atomic environment), list->atomic environment, dict->(keys:chemical channel, (sp1,sp2) sp* is atomic number inside the atomic environment), items: SOAP vector, flat numpy array) :param fout: Writable python io object :return: None ''' import cPickle as pck pck.dump(alchemySoaps,fout,protocol=pck.HIGHEST_PROTOCOL) if __name__ == '__main__': parser = argparse.ArgumentParser(description="""Computes the SOAP vectors of a list of atomic frame and differenciate the chemical channels. Ready for alchemical kernel.""") parser.add_argument("filename", nargs=1, help="Name of the LibAtom formatted xyz input file") parser.add_argument("-n", type=int, default='8', help="Number of radial functions for the descriptor") parser.add_argument("-l", type=int, default='6', help="Maximum number of angular functions for the descriptor") parser.add_argument("-c", type=float, default='5.0', help="Radial cutoff") parser.add_argument("-cotw", type=float, default='0.5', help="Cutoff transition width") parser.add_argument("-g", type=float, default='0.5', help="Atom Gaussian sigma") parser.add_argument("-cw", type=float, default='1.0', help="Center atom weight") parser.add_argument("-prefix", type=str, default='', help="Prefix for output files (defaults to input file name)") parser.add_argument("-first", type=int, default='0', help="Index of first frame to be read in") parser.add_argument("-last", type=int, default='0', help="Index of last frame to be read in") parser.add_argument("-outformat", type=str, default='pickle', help="Choose how to dump the alchemySoaps, e.g. pickle (default) or text (same as from glosim --verbose)") args = parser.parse_args() filename = args.filename[0] prefix = args.prefix centerweight = args.cw gaussian_width = args.g cutoff = args.c cutoff_transition_width = args.cotw nmax = args.n lmax = args.l first = args.first if args.first>0 else None last = args.last if args.last>0 else None if args.outformat in ['text','pickle']: outformat = args.outformat else: raise Exception('outformat is not recognised') if prefix=="": prefix=filename if prefix.endswith('.xyz'): prefix=prefix[:-4] prefix += "-n"+str(nmax)+"-l"+str(lmax)+"-c"+str(cutoff)+\ "-g"+str(gaussian_width)+ "-cw"+str(centerweight)+ \ "-cotw" +str(cutoff_transition_width) print "using output prefix =", prefix # Reads input file using quippy print "Reading input file", filename # Reads the file and create a list of quippy frames object frames = qp.AtomsList(filename, start=first, stop=last) alchemySoaps = get_Soaps(frames, centerweight=centerweight, gaussian_width=gaussian_width, cutoff=cutoff, cutoff_transition_width=cutoff_transition_width, nmax=nmax, lmax=lmax,chem_channels=True) if outformat == 'text': with open(prefix + "-soap.dat", "w") as fout: dumpAlchemySoapstxt(alchemySoaps, fout) elif outformat == 'pickle': with open(prefix + "-soap.pck", "w") as fout: dumpAlchemySoapspickle(alchemySoaps, fout)