Source code for ipie.analysis.extraction
# Copyright 2022 The ipie Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Authors: Fionn Malone <fmalone@google.com>
# Joonho Lee <linusjoonho@gmail.com>
#
import json
import h5py
import numpy
import pandas as pd
from ipie.utils.misc import get_from_dict
[docs]def extract_hdf5_data(filename, block_idx=1):
shapes = {}
with h5py.File(filename, 'r') as fh5:
keys = fh5[f'block_size_{block_idx}/data/'].keys()
shape_keys = fh5[f'block_size_{block_idx}/shape/'].keys()
data = numpy.concatenate([fh5[f'block_size_{block_idx}/data/{d}'][:].real for d in keys])
for k in shape_keys:
shapes[k] = {
'names': fh5[f'block_size_{block_idx}/names/{k}'][()],
'shape': fh5[f'block_size_{block_idx}/shape/{k}'][:],
'offset': fh5[f'block_size_{block_idx}/offset/{k}'][()],
'size': fh5[f'block_size_{block_idx}/size/{k}'][()],
'scalar': bool(fh5[f'block_size_{block_idx}/scalar/{k}'][()]),
'num_walker_props': fh5[f'block_size_{block_idx}/num_walker_props'][()],
'walker_header': fh5[f'block_size_{block_idx}/walker_prop_header'][()]
}
size_keys = fh5[f'block_size_{block_idx}/max_block'].keys()
max_block = sum(fh5[f'block_size_{block_idx}/max_block/{d}'][()] for d in size_keys)
return data[:max_block+1], shapes
[docs]def extract_observable(filename, name='energy', block_idx=1):
data, info = extract_hdf5_data(filename, block_idx=block_idx)
obs_info = info.get(name)
if obs_info is None:
raise RuntimeError(f"Unknown value for name={name}")
obs_slice = slice(obs_info['offset'], obs_info['offset'] + obs_info['size'])
if obs_info['scalar']:
obs_data = data[:,obs_slice].reshape((-1,)+tuple(obs_info['shape']))
nwalk_prop = obs_info['num_walker_props']
weight_data = data[:,:nwalk_prop].reshape((-1,nwalk_prop))
results = pd.DataFrame(numpy.hstack([weight_data, obs_data]))
header = list(obs_info['walker_header']) + obs_info['names'].split()
results.columns = [n.decode('utf-8') for n in header]
return results
else:
obs_data = data[:,obs_slice]
nsamp = data.shape[0]
walker_averaged = obs_data[:,:-1] / obs_data[:,-1].reshape((nsamp, -1))
return walker_averaged.reshape((nsamp,) + tuple(obs_info['shape']))
[docs]def extract_data_sets(files, group, estimator, raw=False):
data = []
for f in files:
data.append(extract_data(f, group, estimator, raw))
return pd.concat(data)
[docs]def extract_data_from_textfile(filename):
output = []
start_collecting = False
header = ''
with open(filename, 'r') as f:
for line in f:
if 'End Time' in line:
break
if start_collecting and 'End Time' not in line:
data = [float(s) for s in line.split()]
output.append(data)
if ('Iteration' in line or 'Block' in line) and ':' not in line:
header = line.split()
start_collecting = True
elif 'Block' in line and ':' not in line:
header = line.split()
start_collecting = True
data = numpy.array(output)
results = pd.DataFrame({k: v for k, v in zip(header, data.T)})
return results
[docs]def extract_data(filename, group, estimator, raw=False):
fp = get_param(filename, ["propagators", "free_projection"])
with h5py.File(filename, "r") as fh5:
dsets = list(fh5[group][estimator].keys())
data = numpy.array([fh5[group][estimator][d][:] for d in dsets])
if "rdm" in estimator or raw:
return data
else:
header = fh5[group]["headers"][:]
header = numpy.array([h.decode("utf-8") for h in header])
df = pd.DataFrame(data)
df.columns = header
if not fp:
df = df.apply(numpy.real)
return df
[docs]def extract_mixed_estimates(filename, skip=0):
return extract_data(filename, "basic", "energies")[skip:]
[docs]def extract_bp_estimates(filename, skip=0):
return extract_data(filename, "back_propagated", "energies")[skip:]
[docs]def extract_rdm(filename, est_type="back_propagated", rdm_type="one_rdm", ix=None):
rdmtot = []
if ix is None:
splits = get_param(
filename, ["estimators", "estimators", "back_prop", "splits"]
)
ix = splits[0][-1]
if est_type == "back_propagated":
denom = extract_data(filename, est_type, "denominator_{}".format(ix), raw=True)
one_rdm = extract_data(
filename, est_type, rdm_type + "_{}".format(ix), raw=True
)
else:
one_rdm = extract_data(filename, est_type, rdm_type, raw=True)
denom = None
fp = get_param(filename, ["propagators", "free_projection"])
if fp:
print("# Warning analysis of FP RDM not implemented.")
return (one_rdm, denom)
else:
if denom is None:
return one_rdm
if len(one_rdm.shape) == 4:
return one_rdm / denom[:, None, None]
elif len(one_rdm.shape) == 5:
return one_rdm / denom[:, None, None, None]
elif len(one_rdm.shape) == 3:
return one_rdm / denom[:, None]
else:
return one_rdm / denom
[docs]def set_info(frame, md):
system = md.get("system")
qmc = md.get("qmc")
propg = md.get("propagators")
trial = md.get("trial")
ncols = len(frame.columns)
frame["dt"] = qmc.get("dt")
nwalkers = qmc.get("ntot_walkers")
if nwalkers is not None:
frame["nwalkers"] = nwalkers
fp = get_from_dict(md, ["propagators", "free_projection"])
if fp is not None:
frame["free_projection"] = fp
beta = qmc.get("beta")
bp = get_from_dict(md, ["estimates", "estimates", "back_prop"])
frame["nbasis"] = system.get("nbasis", 0)
if bp is not None:
frame["tau_bp"] = bp["tau_bp"]
if beta is not None:
frame["beta"] = beta
br = qmc.get("beta_scaled")
if br is not None:
frame["beta_red"] = br
mu = system.get("mu")
if mu is not None:
frame["mu"] = system.get("mu")
if trial is not None:
frame["mu_T"] = trial.get("mu")
frame["Nav_T"] = trial.get("nav")
# else:
# frame["E_T"] = trial.get("energy")
if system["name"] == "UEG":
frame["rs"] = system.get("rs")
frame["ecut"] = system.get("ecut")
frame["nup"] = system.get("nup")
frame["ndown"] = system["ndown"]
elif system["name"] == "Hubbard":
frame["U"] = system.get("U")
frame["nx"] = system.get("nx")
frame["ny"] = system.get("ny")
frame["nup"] = system.get("nup")
frame["ndown"] = system.get("ndown")
elif system["name"] == "Generic":
ints = system.get("integral_file")
if ints is not None:
frame["integrals"] = ints
chol = system.get("threshold")
if chol is not None:
frame["cholesky_treshold"] = chol
frame["nup"] = system.get("nup")
frame["ndown"] = system.get("ndown")
frame["nbasis"] = system.get("nbasis", 0)
return list(frame.columns[ncols:])
[docs]def get_metadata(filename):
try:
with h5py.File(filename, "r") as fh5:
metadata = json.loads(fh5["metadata"][()])
except:
print("# problem with file = {}".format(filename))
return metadata
[docs]def extract_test_data_hdf5(filename, skip=10):
"""For use with testcode"""
try:
data = extract_observable(filename, 'energy')[::skip].to_dict(orient="list")
except KeyError:
# Fall back to legacy
data = extract_mixed_estimates(filename)
# use list so can json serialise easily.
data = data.drop(["Iteration", "Time"], axis=1)[::skip].to_dict(orient="list")
data["sys_info"] = get_metadata(filename)["sys_info"]
try:
mrdm = extract_rdm(filename, est_type="mixed", rdm_type="one_rdm")
except (KeyError, TypeError, AttributeError):
mrdm = None
try:
brdm = extract_rdm(filename, est_type="back_propagated", rdm_type="one_rdm")
except (KeyError, TypeError, AttributeError):
brdm = None
if mrdm is not None:
mrdm = mrdm[::4].ravel()
# Don't compare small numbers
re = numpy.real(mrdm)
im = numpy.imag(mrdm)
re[numpy.abs(re) < 1e-12] = 0.0
im[numpy.abs(im) < 1e-12] = 0.0
data["Gmixed_re"] = list(mrdm)
data["Gmixed_im"] = list(mrdm)
if brdm is not None:
brdm = brdm[::4].flatten().copy()
re = numpy.real(brdm)
im = numpy.imag(brdm)
re[numpy.abs(re) < 1e-12] = 0.0
im[numpy.abs(im) < 1e-12] = 0.0
data["Gbp_re"] = list(re)
data["Gbp_im"] = list(im)
# if itcf is not None:
# itcf = itcf[abs(itcf) > 1e-10].flatten()
# data = pd.DataFrame(itcf)
return data