Source code for refl1d.errors

# -*- coding: utf-8 -*-
"""
Visual representation of model uncertainty.

For reflectivity models, this aligns and plots a set of profiles chosen
from the parameter uncertainty distribution, and plots the distribution
of the residual values.

Use *run_errors* in a model file to reload the results of a batch DREAM fit.
"""
from __future__ import print_function

__all__ = ['reload_errors', 'run_errors',
           'calc_errors', 'align_profiles',
           'show_errors', 'show_profiles', 'show_residuals',
          ]

import sys
import os

import numpy as np
from bumps.plotutil import next_color, dhsv, plot_quantiles, form_quantiles
from bumps.errplot import reload_errors

from .util import asbytes
from .reflectivity import BASE_GUIDE_ANGLE

#CONTOURS = (68, 95, 100)
#CONTOURS = (57, 68, 84, 95, 100)
CONTOURS = (68, 95)

# TODO: we should just keep a certain number of evaluations as a matter of
#       course during sampling rather than recomputing them after the fact.
# TODO: want similar code for covariance matrix based forward analysis
# TODO: need to delegate accumulation of models and plotting to Fitness
# TODO: move run_errors to align.py and use argparse to process sys.argv
# TODO: add controls for the additional parameters to the command line

[docs] def run_errors(**kw): """ Command line tool for generating error plots from models. Type the following to regenerate the profile contour plots plots: $ refl1d align <model>.py <store> [<align>] [0|1|2|n] Align is either auto for the current behaviour, or it is an interface number. You can align on the center of a layer by adding 0.5 to the interface number. You can count interfaces from the surface by prefixing with R. For example, 0 is the substrate interface, R1 is the surface interface, 2.5 is the the middle of layer 2 above the substrate. You can plot the profiles and residuals on one plot by setting plots to 1, on two separate plots by setting plots to 2, or each curve on its own plot by setting plots to n. Plots are saved in <store>/<model>-err#.png. If plots is 0, then no plots are created. Additional parameters include: *nshown*, *random* : see :func:`bumps.errplot.calc_errors_from_state` *contours*, *npoints*, *plots*, *save* : see :func:`show_errors` """ load = {'model': None, 'store': None, 'nshown': 50, 'random': True} show = {'align': 'auto', 'plots': 2, 'contours': CONTOURS, 'npoints': 400, 'save': None} for k, v in kw.items(): if k in load: load[k] = v elif k in show: show[k] = v else: raise TypeError("Unknown argument "+k) if len(sys.argv) > 2: load['model'], load['store'] = sys.argv[1], sys.argv[2] align_str = sys.argv[3] if len(sys.argv) >= 4 else '0' if align_str[0] == 'R': align_str = '-'+align_str[1:] show['align'] = float(align_str) if align_str != 'auto' else align_str plots_str = sys.argv[4] if len(sys.argv) >= 5 else '2' show['plots'] = int(plots_str) if plots_str != 'n' else plots_str #print align, align_str, len(sys.argv), sys.argv if not load['store'] or not load['model']: _usage() sys.exit(0) if show['save'] is None: name, _ = os.path.splitext(os.path.basename(load['model'])) show['save'] = os.path.join(load['store'], name) print("loading... this may take awhile") errors = reload_errors(**load) print("showing...") show_errors(errors, **show) if show['plots'] != 0: import matplotlib.pyplot as plt plt.show() sys.exit(0) # Force refl1d to terminate.
def _usage(): print(run_errors.__doc__)
[docs] def calc_errors(problem, points): """ Align the sample profiles and compute the residual difference from the measured reflectivity for a set of points. The points should be sampled from the posterior probability distribution computed from MCMC, bootstrapping or sampled from the error ellipse calculated at the minimum. Each of the returned arguments is a dictionary mapping model number to error sample data as follows: Returns (profiles, slabs, Q, residuals). *profiles* Arrays of (z, rho, irho) for non-magnetic models or arrays of (z, rho, irho, rhoM, thetaM) for magnetic models. There will be one set of arrays returned per error sample. *slabs* Array of slab thickness for the layers in the models. There will be one array returned per error sample. Using slab thickness, profiles can be aligned on interface boundaries and layer centers. *Q* Array of Q values for the data points in the model. The data points are the same for all error samples, so only one Q array is needed per model. *residuals* Array of (theory-data)/uncertainty for each data point in the measurement. There will be one array returned per error sample. """ # Grab the individual samples if hasattr(problem, 'models'): models = [m.fitness for m in problem.models] else: models = [problem.fitness] experiments = [] for m in models: if hasattr(m, 'parts'): experiments.extend(m.parts) else: experiments.append(m) #probes = [] #for m in experiments: # if hasattr(m.probe, 'probes'): # probes.extend(m.probe.probes) # elif hasattr(m.probe, 'xs'): # probes.extend([p for p in m.probe if p]) # else: # probes.append(p) # Find Q def residQ(m): if m.probe.polarized: return np.hstack([xs.Q for xs in m.probe.xs if xs is not None]) else: return m.probe.Q Q = dict((m, residQ(m)) for m in experiments) profiles = dict((m, []) for m in experiments) residuals = dict((m, []) for m in experiments) slabs = dict((m, []) for m in experiments) def record_point(): problem.chisq_str() # Force reflectivity recalculation for m in experiments: D = m.residuals() residuals[m].append(D+0) slabs_i = [L.thickness.value for L in m.sample[1:-1]] slabs[m].append(np.array(slabs_i)) if m.ismagnetic: z, rho, irho, rhoM, thetaM = m.magnetic_smooth_profile() profiles[m].append((z+0, rho+0, irho+0, rhoM+0, thetaM+0)) else: z, rho, irho = m.smooth_profile() profiles[m].append((z+0, rho+0, irho+0)) record_point() # Put best at slot 0, no alignment for p in points: problem.setp(p) record_point() # Turn residuals into arrays residuals = dict((k, np.asarray(v).T) for k, v in residuals.items()) return profiles, slabs, Q, residuals
[docs] def align_profiles(profiles, slabs, align): """ Align profiles for each sample """ return dict((m, _align_profile_set(profiles[m], slabs[m], align)) for m in profiles.keys())
[docs] def show_errors(errors, contours=CONTOURS, npoints=200, align='auto', plots=1, save=None): """ Plot the aligned profiles and the distribution of the residuals for profiles and residuals returned from calc_errors. *contours* can be a list of percentiles or []. If percentiles are given, then show uncertainty using a contour plot with the given levels, otherwise just overplot sample lines. *contours* defaults to [68, 95, 100]. *npoints* is the number of points to use when generating the profile contour. Since the z values for the various lines do not correspond, the contour generator interpolates the entire profile range with linear spacing using this number of points. *align* is the interface number plus fractional distance within the layer following the interface. For example, use 0 for the substrate interface, use -1 for the surface interface, or use 2.5 for the center of the second slab above the substrate. If *align='auto'* then choose an offset that minimizes the cross-correlation between the first profile and the current profile. *plots* is the number of plots to use (1, 2, or 'n'). *save* is the basename of the plot to save. This should usually be "<store>/<model>". The program will add '-err#.png' where '#' is the number of the plot. """ import matplotlib.pyplot as plt if plots == 0: # Don't create plots, just save the data _save_profile_data(errors, contours=contours, npoints=npoints, align=align, save=save) _save_residual_data(errors, contours=contours, save=save) elif plots == 1: # Subplots for profiles/residuals plt.subplot(211) show_profiles(errors, contours=contours, npoints=npoints, align=align) plt.subplot(212) show_residuals(errors, contours=contours) if save: plt.savefig(save+"-err.png") elif plots == 2: # Separate plots for profiles/residuals show_profiles(errors, contours=contours, npoints=npoints, align=align) if save: plt.savefig(save+"-err1.png") plt.figure() show_residuals(errors, contours=contours) if save: plt.savefig(save+"-err2.png") else: # Multiple plots profiles, slabs, Q, residuals = errors fignum = 1 for m in profiles.keys(): plt.figure() show_profiles(errors=({m: profiles[m]}, {m: slabs[m]}, None, None), contours=contours, npoints=npoints, align=align) if save: plt.savefig(save+"-err%d.png"%fignum) fignum += 1 for m in residuals.keys(): plt.figure() show_residuals(errors=(None, None, {m: Q[m]}, {m: residuals[m]}), contours=contours) if save: plt.savefig(save+"-err%d.png"%fignum) fignum += 1
[docs] def show_profiles(errors, align, contours, npoints): profiles, slabs, _, _ = errors if align is not None: profiles = align_profiles(profiles, slabs, align) if contours: _profiles_contour(profiles, contours, npoints) else: _profiles_overplot(profiles)
[docs] def show_residuals(errors, contours): _, _, Q, residuals = errors if False and contours: _residuals_contour(Q, residuals, contours=contours) else: _residuals_overplot(Q, residuals)
def _save_profile_data(errors, align, contours, npoints, save): profiles, slabs, _, _ = errors if align is not None: profiles = align_profiles(profiles, slabs, align) k = 1 for title, group in sorted((m.name, group) for m, group in profiles.items()): # Find limits of all profiles z = np.hstack([line[0] for line in group]) zp = np.linspace(np.min(z), np.max(z), npoints) absorbing = any((L[2] != 1e-4).any() for L in group) magnetic = (len(group[0]) > 3) twist = magnetic and any((L[4] != BASE_GUIDE_ANGLE).any() for L in group) data, columns = _build_profile_matrix(group, 1, zp, contours) _write_file(save + "_rho_contour%d.dat"%k, data, title, columns) if absorbing: data, columns = _build_profile_matrix(group, 2, zp, contours) _write_file(save + "_irho_contour%d.dat"%k, data, title, columns) if magnetic: data, columns = _build_profile_matrix(group, 3, zp, contours) _write_file(save + "_rhoM_contour%d.dat"%k, data, title, columns) if twist: data, columns = _build_profile_matrix(group, 4, zp, contours) _write_file(save + "_thetaM_contour%d.dat"%k, data, title, columns) k += 1 def _build_profile_matrix(group, index, zp, contours): # Interpolate to common z fp = np.vstack([np.interp(zp, L[0], L[index]) for L in group]) # Find quantiles q, qval = form_quantiles(fp, contours) # Build and return data columns columns = ["z", "best"] + list("%g%%"%v for v in 100*q.flatten()) data = np.vstack((zp, fp[0], np.reshape(qval, (-1, qval.shape[2])))) return data, columns def _save_residual_data(errors, contours, save): _, _, Q, residuals = errors k = 1 for title, x, r in sorted((m.name, Q[m], v) for m, v in residuals.items()): q, qval = form_quantiles(r.T, contours) # TODO: should have columns for R, dR as well. data = np.vstack((x, r[:, 0], np.reshape(qval, (-1, qval.shape[2])))) columns = ["q", "best"] + list("%g%%"%v for v in 100*q.flatten()) _write_file(save+"_resid_contour%d.dat"%k, data, title, columns) k += 1 def _write_file(path, data, title, columns): with open(path, "wb") as fid: fid.write(asbytes("# "+title+"\n")) fid.write(asbytes("# "+" ".join(columns)+"\n")) np.savetxt(fid, data.T) # ===== Plotting functions ===== def dark(color): return dhsv(color, dv=-0.2) def _profiles_overplot(profiles): for model, group in profiles.items(): name = model.name absorbing = any((L[2] != 1e-4).any() for L in group) magnetic = (len(group[0]) > 3) # Note: Use 3 colours per dataset for consistency _draw_overplot(group, 1, name + ' rho') if absorbing: _draw_overplot(group, 2, name + ' irho') else: next_color() if magnetic: _draw_overplot(group, 3, name + ' rhoM') else: next_color() _profile_labels() def _draw_overplot(group, index, label): import matplotlib.pyplot as plt alpha = 0.1 color = next_color() for L in group[1:]: plt.plot(L[0], L[index], '-', color=color, alpha=alpha) # Plot best L = group[0] plt.plot(L[0], L[index], '-', label=label, color=dark(color)) def _profiles_contour(profiles, contours=CONTOURS, npoints=200): for model, group in profiles.items(): name = model.name if model.name is not None else 'model' absorbing = any((L[2] > 1e-4).any() for L in group) magnetic = (len(group[0]) > 3) # Find limits of all profiles z = np.hstack([line[0] for line in group]) zp = np.linspace(np.min(z), np.max(z), npoints) # Note: Use 3 colours per dataset for consistency _draw_contours(group, 1, name + ' rho', zp, contours) if absorbing: _draw_contours(group, 2, name + ' irho', zp, contours) else: next_color() if magnetic: _draw_contours(group, 3, name + ' rhoM', zp, contours) else: next_color() _profile_labels() def _draw_contours(group, index, label, zp, contours): import matplotlib.pyplot as plt color = next_color() # Interpolate on common z fp = np.vstack([np.interp(zp, L[0], L[index]) for L in group]) # Plot the quantiles plot_quantiles(zp, fp, contours, color) # Plot the best plt.plot(zp, fp[0], '-', label=label, color=dark(color)) def _profile_labels(): import matplotlib.pyplot as plt plt.legend() plt.xlabel(u'z (Å)') plt.ylabel(u'SLD (10⁻⁶/Ų)') def _residuals_overplot(Q, residuals): import matplotlib.pyplot as plt alpha = 0.4 shift = 0 for m, r in residuals.items(): color = next_color() plt.plot(Q[m], shift+r[:, 1:], '.', markersize=1, color=color, alpha=alpha) plt.plot(Q[m], shift+r[:, 0], '.', label=m.name, markersize=1, color=dark(color)) # Use 3 colours from cycle so reflectivity matches rho for each dataset next_color() next_color() shift += 5 _residuals_labels() def _residuals_contour(Q, residuals, contours=CONTOURS): import matplotlib.pyplot as plt shift = 0 for m, r in residuals.items(): color = next_color() plot_quantiles(Q[m], shift+r.T, contours, color) plt.plot(Q[m], shift+r[:, 0], '.', label=m.name, markersize=1, color=dark(color)) # Use 3 colours from cycle so reflectivity matches rho for each dataset next_color() next_color() shift += 5 _residuals_labels() def _residuals_labels(): import matplotlib.pyplot as plt plt.legend() plt.xlabel(u'Q (1/Å)') plt.ylabel(u'Residuals') # ==== Helper functions ===== def _align_profile_set(profiles, slabs, align): """ Align all profiles to the first profile. """ p1 = profiles[0] t1_offset = _find_offset(slabs[0], align) if align != 'auto' else None offsets = [0] for p2, t2 in zip(profiles[1:], slabs[1:]): offsets.append(_align_profile_pair(p1[0], p1[1], t1_offset, p2[0], p2[1], t2, align)) profiles = [tuple([group[0]+offset]+list(group[1:])) for offset, group in zip(offsets, profiles)] return profiles def _align_profile_pair(z1, r1, t1_offset, z2, r2, t2, align): """ Use crosscorrelation to align r1 and r2. """ if align == 'auto': import scipy.signal # Assume z1, z2 have the same step size n2 = len(r2) idx = np.argmax(scipy.signal.correlate(r1, r2, 'full')) if idx < n2: offset = z2[(n2-1)-idx] - z1[0] else: offset = z2[0] - z1[idx-(n2-1)] return -offset else: return -(_find_offset(t2, align) - t1_offset) def _find_offset(v, align): """ Find the offset of k.p, where k is the interface number and p is the distance into that interface. This may even work for interfaces defined from the left, such as -1.5 to specify the middle of the final layer. """ idx = int(align) offset = np.sum(v[:idx]) + (align-idx)*v[idx] #print offset, idx, v[:idx], align return offset