#
# The high-throughput toolkit (httk)
# Copyright (C) 2012-2015 Rickard Armiento
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import string
from fractions import Fraction
from httk.atomistic.data import periodictable
from httk.core import FracVector
import httk
from httk.atomistic import *
[docs]def out_to_struct(ioa):
"""
Example input::
OUTPUT CELL INFORMATION
Symmetry information:
Trigonal crystal system.
Space group number : 165
Hall symbol : -P 3 2"c
Hermann-Mauguin symbol : P-3c1
Bravais lattice vectors :
0.8660254 -0.5000000 0.0000000
0.0000000 1.0000000 0.0000000
0.0000000 0.0000000 1.0231037
All sites, (lattice coordinates):
Atom a1 a2 a3
La 0.6609000 0.0000000 0.2500000
La 0.3391000 0.0000000 0.7500000
...
F 0.0000000 0.0000000 0.2500000
F 0.0000000 0.0000000 0.7500000
Unit cell volume : 328.6477016 A^3
Unit cell density : 3.5764559 u/A^3 = 5.9388437 g/cm^3
"""
results = {}
results['output_cell'] = []
results['input_cell'] = []
results['coords'] = []
results['sgcoords'] = []
results['occupancies'] = []
results['sgoccupancies'] = []
results['in_output'] = False
results['in_input'] = False
results['in_input_coords'] = False
results['in_coords'] = False
results['in_cell'] = False
results['in_input_cell'] = False
results['in_bib'] = False
results['bib'] = ""
results['seen'] = {}
results['sgseen'] = {}
results['idx'] = 0
results['sgidx'] = 0
def read_cell(results, match):
if results['in_input']:
results['input_cell'].append([(match.group(1)), (match.group(2)), (match.group(3))])
elif results['in_output']:
results['output_cell'].append([(match.group(1)), (match.group(2)), (match.group(3))])
def read_coords_occs(results, match):
if results['in_input']:
coordstr = 'sgcoords'
occstr = 'sgoccupancies'
seenstr = 'sgseen'
idxstr = 'sgidx'
elif results['in_output']:
coordstr = 'coords'
occstr = 'occupancies'
seenstr = 'seen'
idxstr = 'idx'
else:
return
newcoord = (match.group(2), match.group(3), match.group(4))
#newcoord = FracVector.create([match.group(2),match.group(3),match.group(4)]).limit_denominator(5000000).simplify()
species = match.group(1).split("/")
occups = match.group(6).split("/")
for j in range(len(species)):
occup = {'atom': periodictable.atomic_number(species[j]), 'ratio': FracVector.create(occups[j]), }
if newcoord in results[seenstr]:
idx = results[seenstr][newcoord]
#print("OLD",results[occstr],idx)
results[occstr][idx].append(occup)
else:
results[seenstr][newcoord] = results[idxstr]
results[coordstr].append(newcoord)
results[occstr].append([occup])
results[idxstr] += 1
#print("NEW",results[occstr],results[idxstr])
def read_coords(results, match):
if results['in_input']:
coordstr = 'sgcoords'
occstr = 'sgoccupancies'
seenstr = 'sgseen'
idxstr = 'sgidx'
elif results['in_output']:
coordstr = 'coords'
occstr = 'occupancies'
seenstr = 'seen'
idxstr = 'idx'
else:
return
newcoord = (match.group(2), match.group(3), match.group(4))
#newcoord = FracVector.create([match.group(2),match.group(3),match.group(4)]).limit_denominator(5000000).simplify()
species = match.group(1)
occup = {'atom': periodictable.atomic_number(species)}
if newcoord in results[seenstr]:
idx = results[seenstr][newcoord]
#print("XOLD",results[occstr],idx)
results[occstr][idx].append(occup)
else:
results[seenstr][newcoord] = results[idxstr]
results[coordstr].append(newcoord)
results[occstr].append([occup])
results[idxstr] += 1
#print("XNEW",results[occstr],results['idx'])
#if results['in_input']:
# results['sgcoords'].append(newcoord)
# results['sgoccupancies'].append(periodictable.atomic_symbol(match.group(1)))
#elif results['in_output']:
# results['coords'].append(newcoord)
# results['occupancies'].append(periodictable.atomic_symbol(match.group(1)))
def read_volume(results, match):
results['volume'] = match.group(1)
def read_hall(results, match):
if results['in_input']:
results['sghall'] = match.group(1)
elif results['in_output']:
results['hall'] = match.group(1)
def read_id(results, match):
results['id'] = match.group(1)
def coords_stop(results, match):
results['in_coords'] = False
def coords_start(results, match):
if results['in_input']:
results['in_input_coords'] = True
elif results['in_output']:
results['in_coords'] = True
def cell_stop(results, match):
results['in_cell'] = False
def input_cell_stop(results, match):
results['in_input_cell'] = False
def cell_start(results, match):
results['in_cell'] = True
def input_cell_start(results, match):
results['in_input_cell'] = True
def output_start(results, match):
results['in_output'] = True
results['in_input'] = False
def read_version(results, match):
results['version'] = match.group(1)
def read_name(results, match):
expr = httk.basic.parse_parexpr(match.group(1))
# Grab the last, nested, parenthesed expression
p = ""
for x in expr:
if x[0] == 0:
p = x[1]
results['name'] = p
def read_bib(results, match):
if match.group(1).strip() != 'Failed to get author information, No journal information':
results['bib'] += match.group(1).strip()
def bib_start(results, match):
results['in_bib'] = True
def bib_stop_input_start(results, match):
results['in_bib'] = False
results['in_input'] = True
def read_source(results, match):
results['source'] = match.group(1).rstrip('.')
out = httk.basic.micro_pyawk(ioa, [
['^ *INPUT CELL INFORMATION *$', None, bib_stop_input_start],
['^ *CIF2CELL ([0-9.]*)', None, read_version],
['^ *Output for (.*\)) *$', None, read_name],
['^ *Database reference code: *([0-9]+)', None, read_id],
['^ *All sites, (lattice coordinates): *$', lambda results, match: results['in_cell'], cell_stop],
['^ *Representative sites *: *$', lambda results, match: results['in_input_cell'], input_cell_stop],
['^ *$', lambda results, match: results['in_coords'], coords_stop],
['^ *([-0-9.]+) +([-0-9.]+) +([-0-9.]+) *$', lambda results, match: results['in_cell'] or results['in_input_cell'], read_cell],
['^ *([a-zA-Z]+) +([-0-9.]+) +([-0-9.]+) +([-0-9.]+) *$', lambda results, match: results['in_coords'] or results['in_input_coords'], read_coords],
['^ *([a-zA-Z/]+) +([-0-9.]+) +([-0-9.]+) +([-0-9.]+)( +([-0-9./]+)) *$', lambda results, match: results['in_coords'] or results['in_input_coords'], read_coords_occs],
# ['^ *Hermann-Mauguin symbol *: *(.*)$',lambda results,match: results['in_output'],read_spacegroup],
['^ *Hall symbol *: *(.*)$', lambda results, match: results['in_output'] or results['in_input'], read_hall],
['^ *Unit cell volume *: *([-0-9.]+) +A\^3 *$', lambda results, match: results['in_output'], read_volume],
['^ *Bravais lattice vectors : *$', lambda results, match: results['in_output'], cell_start],
['^ *Lattice parameters: *$', lambda results, match: results['in_input'], input_cell_start],
['^ *Atom +a1 +a2 +a3', lambda results, match: results['in_output'] or results['in_input'], coords_start],
['^ *OUTPUT CELL INFORMATION *$', None, output_start],
['^([^\n]*)$', lambda results, match: results['in_bib'], read_bib],
['^ *BIBLIOGRAPHIC INFORMATION *$', None, bib_start],
['CIF file exported from +(.*) *$', None, read_source]
], debug=False, results=results)
out['bib'] = out['bib'].strip()
rc_a, rc_b, rc_c = [float(x) for x in out['input_cell'][0]]
rc_alpha, rc_beta, rc_gamma = [float(x) for x in out['input_cell'][1]]
uc_a, uc_b, uc_c = [float(x) for x in out['output_cell'][0]]
uc_alpha, uc_beta, uc_gamma = [float(x) for x in out['output_cell'][1]]
rc_cell = FracVector.create(out['input_cell'])
uc_cell = FracVector.create(out['output_cell'])
coords = FracVector.create(out['coords']).limit_denominator(5000000).simplify()
sgcoords = FracVector.create(out['sgcoords']).limit_denominator(5000000).simplify()
hall_symbol = out['hall']
sghall_symbol = out['sghall']
tags = {}
if 'source' in out:
tags['source'] = out['source']+":"+out['id']
if 'bib' in out and out['bib'] != '':
refs = [out['bib']]
else:
refs = None
if 'name' in out:
tags['name'] = filter(lambda x: x in string.printable, out['name'])
# This is to handle a weird corner case, where atoms in a disordered structure
# are placed on equivalent but different sites in the representative representation; then
# they will be mapped to the same sites in the filled cell. Our solution in that case is
# to throw away the rc_cell, since it is incorrect - it has equivalent sites that appear
# different even though they are the same.
remaining_filled_sites = list(out['occupancies'])
for i in range(len(out['sgoccupancies'])):
if out['sgoccupancies'][i] in remaining_filled_sites:
remaining_filled_sites = filter(lambda a: a != out['sgoccupancies'][i], remaining_filled_sites)
if len(remaining_filled_sites) > 0:
rc_cell_broken = True
else:
rc_cell_broken = False
if not rc_cell_broken:
struct = Structure.create(rc_lengths=rc_cell[0],
rc_angles=rc_cell[1],
rc_reduced_occupationscoords=sgcoords,
uc_cell=uc_cell,
uc_reduced_occupationscoords=coords,
uc_volume=out['volume'],
rc_occupancies=out['sgoccupancies'],
uc_occupancies=out['occupancies'],
spacegroup=sghall_symbol,
tags=tags, refs=refs, periodicity=0)
else:
struct = Structure.create(uc_cell=uc_cell,
uc_reduced_occupationscoords=coords,
uc_volume=out['volume'],
uc_occupancies=out['occupancies'],
spacegroup=sghall_symbol,
tags=tags, refs=refs, periodicity=0)
# A bit of santiy check to trigger on possible bugs from cif2cell
if len(struct.uc_sites.counts) != len(struct.rc_sites.counts):
print(struct.uc_sites.counts, struct.rc_sites.counts)
raise Exception("cif2cell_if.out_to_struct: non-sensible parsing of cif2cell output.")
#if 'volume' in out:
# vol = FracVector.create(out['volume'])
#else:
# volstruct = httk.Structure.create(a=a,b=b,c=c,alpha=alpha,beta=beta,gamma=gamma, occupancies=out['sgoccupancies'], coords=sgcoords, hall_symbol=sghall_symbol, refs=refs)
# vol = volstruct.volume
#struct = httk.Structure.create(cell,occupancies=out['occupancies'],coords=coords,volume=vol,tags=tags,hall_symbol=hall_symbol, refs=refs)
#struct._sgstructure = httk.SgStructure.create(a=a,b=b,c=c,alpha=alpha,beta=beta,gamma=gamma, occupancies=out['sgoccupancies'], coords=sgcoords, hall_symbol=sghall_symbol)
#print("HERE WE ARE:",out['sgoccupancies'],sgcoords,sghall_symbol)
#print("HERE WE ARE:",out['occupancies'],coords)
#struct = httk.Structure.create(cell, volume=vol, unique_occupations=out['sgoccupancies'], uc_occupations=out['occupancies'], unique_reduced_occupationscoords=sgcoords, uc_reduced_occupationscoords=coords, spacegroup=sghall_symbol, tags=tags, refs=refs, periodicity=0)
#print("HERE",sgcoords, coords)
#counts = [len(x) for x in out['occupancies']]
#p1structure = httk.Structure.create(cell,occupancies=out['occupancies'],coords=coords,volume=vol,tags=tags, refs=refs, periodicity=0)
#struct.set_p1structure(p1structure)
return struct