Source code for refl1d.stitch

"""
Data stitching.

Join together datasets yielding unique sorted x.
"""

from numpy import hstack, vstack, argsort, sum, sqrt

[docs] def stitch(data, same_x=0.001, same_dx=0.001): """ Stitch together multiple measurements into one. *data* a list of datasets with x, dx, y, dy attributes *same_x* minimum point separation (default is 0.001). *same_dx* minimum change in resolution that may be averaged (default is 0.001). WARNING: the returned x values may be data dependent, with two measured sets having different x after stitching, even though the measurement conditions are identical!! Either add an intensity weight to the datasets:: probe.I = slitscan or use interpolation if you need to align two stitched scans:: import numpy as np x1, dx1, y1, dy1 = stitch([a1, b1, c1, d1]) x2, dx2, y2, dy2 = stitch([a2, b2, c2, d2]) x2[0], x2[-1] = x1[0], x1[-1] # Force matching end points y2 = np.interp(x1, x2, y2) dy2 = np.interp(x1, x2, dy2) x2 = x1 WARNING: the returned dx value underestimates the true x, depending on the relative weights of the averaged data points. """ if same_dx is None: same_dx = same_x x = hstack(p.x for p in data) dx = hstack(p.dx for p in data) y = hstack(p.y for p in data) dy = hstack(p.dy for p in data) if all(hasattr(p, 'I') for p in data): weight = hstack(p.I for p in data) else: weight = y/dy**2 # y/dy**2 is approximately the intensity # Sort the data by increasing x idx = argsort(x) data = vstack((x, dx, y, dy, weight)) data = data[:, idx] x = data[0, :] dx = data[1, :] # Skip through the data looking for regions of overlap. keep = [] n, last, next = len(x), 0, 1 while next < n: while next < n and abs(x[next]-x[last]) <= same_x: next += 1 if next - last == 1: # Only one point, so no averaging necessary keep.append(last) else: # Pick the x in [last:next] with the best resolution and average # them using Poisson averages. Repeat until all points are used remainder = data[:, last:next] avg = [] while remainder.shape[1] > 0: best_dx = min(remainder[1, :]) idx = (remainder[1, :]-best_dx <= same_dx) avg.append(poisson_average(remainder[:, idx])) remainder = remainder[:, ~idx] # Store the result in worst to best resolution order. for i, d in enumerate(reversed(avg)): data[:, last+i] = d keep.append(last+i) last = next return data[:4, keep]
[docs] def poisson_average(xdxydyw): """ Compute the poisson average of y/dy using a set of data points. The returned x, dx is the weighted average of the inputs:: x = sum(x*I)/sum(I) dx = sum(dx*I)/sum(I) The returned y, dy use Poisson averaging:: w = sum(y/dy^2) y = sum((y/dy)^2)/w dy = sqrt(y/w) The above formula gives the expected result for combining two measurements, assuming there is no uncertainty in the monitor. :: measure N counts during M monitors rate: r = N/M rate uncertainty: dr = sqrt(N)/M weighted rate: r/dr^2 = (N/M) / (N/M^2) = M weighted rate squared: r^2/dr^2 = (N^2/M^2) / (N/M^2) = N for two measurements Na, Nb w = ra/dra^2 + rb/drb^2 = Ma + Mb y = ((ra/dra)^2 + (rb/drb)^2)/w = (Na + Nb)/(Ma + Mb) dy = sqrt(y/w) = sqrt( (Na + Nb)/ w^2 ) = sqrt(Na+Nb)/(Ma + Mb) This formula isn't strictly correct when applied to values which have been scaled, for example to account for an attenuator in the counting system. """ # TODO: need better estimate of dx, with weighted broadening according # to the distance of the x's from the centers. # TODO: check the accuracy of the formula in the presence of # attenuators. x, dx, y, _dy, weight = xdxydyw w = sum(weight) x = sum(x*weight)/w dx = sum(dx*weight)/w y = sum(y*weight)/w dy = sqrt(y/w) #print "averaging", xdxydy, x, dx, y, dy return x, dx, y, dy, w