From 8e1eff634a33a4d2bc31844150fbb5d6b8f15fe7 Mon Sep 17 00:00:00 2001
From: Collin Capano <cdcapano@gmail.com>
Date: Wed, 22 Aug 2018 10:52:50 +0200
Subject: [PATCH 1/3] Rollup: New sampler API -> support for emcee (#68)

* start InferenceFile -> BaseInferenceFile

* rename hdf.py base_hdf.py

* add parse_parameters function

* add module for base mcmc io

* make _read_samples_data the abstract method

* added read_samples_data to base_mcmc

* add emcee file handling

* replace read/write functions with io in BaseSampler

* add checkpoint requirement; rename samples raw_samples

* start updating emcee

* move emcee_pt to it's own module

* add base_mcmc (needs work)

* start changing the base sampler api

* add write_metadata to models

* move setting up checkpoint and run interval to sampler methods

* rearrange read/write functions; add checkpoint and finalize methods; add run method to base_mcmc

* fix whitespace

* add acl support

* update executable

* add finalize to emcee, fix typos

* change write_posterior to expect filename, not file

* change burn in module to just have functions

* start to define burn in support class

* move burn in class to burn_in module; add evaluate

* add write burn in to io

* add from_config for burn-in class

* more support for burn-in, calculation of independent samples

* add thin_start/interval/end to the hdf file attrs

* fix typos, whitespace in burn_in

* fix whitespace, typos in base_hdf

* rename EnsembleMCMCIO to MCMCIO; fix whitespace

* fix typo

* fix whitespace

* write filetype to inference hdf files; provide a loadfile function

* fix some import errors

* remove sampler_class from io to avoid circular imports

* fix bugs

* fix bugs, move niterations/nsamples into config file

* add halfchain, posterior_step, min_iterations back to burn_in

* fix bugs to get acl working post burn in

* fix bugs in nacl burn in test

* write more information to the logging messages

* fix bugs in min_iterations burn-in test

* fix more bugs

* fix pep8 issues

* fix bugs for running with data

* whitespace
---
 bin/gwin                      | 343 +++---------
 gwin/burn_in.py               | 621 +++++++++++----------
 gwin/io/__init__.py           | 187 ++++++-
 gwin/io/base_hdf.py           | 659 +++++++++++++++++++++++
 gwin/io/base_mcmc.py          | 251 +++++++++
 gwin/io/emcee.py              |  75 +++
 gwin/io/hdf.py                | 801 ----------------------------
 gwin/models/base.py           |  14 +
 gwin/models/base_data.py      |  16 +
 gwin/models/gaussian_noise.py |  28 +
 gwin/option_utils.py          |  82 ---
 gwin/sampler/__init__.py      |  41 +-
 gwin/sampler/base.py          | 976 ++++++---------------------------
 gwin/sampler/base_mcmc.py     | 564 ++++++++++++++++++++
 gwin/sampler/emcee.py         | 978 +++++-----------------------------
 gwin/sampler/emcee_pt.py      | 756 ++++++++++++++++++++++++++
 16 files changed, 3250 insertions(+), 3142 deletions(-)
 create mode 100644 gwin/io/base_hdf.py
 create mode 100644 gwin/io/base_mcmc.py
 create mode 100644 gwin/io/emcee.py
 delete mode 100644 gwin/io/hdf.py
 create mode 100644 gwin/sampler/base_mcmc.py
 create mode 100644 gwin/sampler/emcee_pt.py

diff --git a/bin/gwin b/bin/gwin
index 2d0439b..cacded7 100644
--- a/bin/gwin
+++ b/bin/gwin
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright (C) 2016 Christopher M. Biwer
+# Copyright (C) 2016 Christopher M. Biwer, Collin Capano
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
@@ -32,18 +32,38 @@ from pycbc.waveform import generator
 
 import gwin
 from gwin import (__version__, burn_in, option_utils)
-from gwin.io.hdf import InferenceFile
-from gwin.option_utils import validate_checkpoint_files
 from gwin.calibration import Recalibrate
 
 # command line usage
 parser = argparse.ArgumentParser(usage=__file__ + " [--options]",
                                  description=__doc__)
-
-# version option
 parser.add_argument("--version", action="version", version=__version__,
                     help="Prints version information.")
-
+parser.add_argument("--verbose", action="store_true", default=False,
+                    help="Print logging messages.")
+# output options
+parser.add_argument("--output-file", type=str, required=True,
+                    help="Output file path.")
+parser.add_argument("--force", action="store_true", default=False,
+                    help="If the output-file already exists, overwrite it. "
+                         "Otherwise, an OSError is raised.")
+parser.add_argument("--save-backup", action="store_true",
+                    default=False,
+                    help="Don't delete the backup file after the run has "
+                         "completed.")
+# parallelization options
+parser.add_argument("--nprocesses", type=int, default=1,
+                    help="Number of processes to use. If not given then only "
+                         "a single core will be used.")
+parser.add_argument("--use-mpi", action='store_true', default=False,
+                    help="Use MPI to parallelize the sampler")
+parser.add_argument("--samples-file", default=None,
+                    help="Use an iteration from an InferenceFile as the "
+                         "initial proposal distribution. The same "
+                         "number of walkers and the same [variable_params] "
+                         "section in the configuration file should be used. "
+                         "The priors must allow encompass the initial "
+                         "positions from the InferenceFile being read.")
 # add data options
 parser.add_argument("--instruments", type=str, nargs="+",
                     help="IFOs, eg. H1 L1.")
@@ -57,57 +77,8 @@ parser.add_argument("--psd-end-time", type=float, default=None,
 parser.add_argument("--seed", type=int, default=0,
                     help="Seed to use for the random number generator that "
                          "initially distributes the walkers. Default is 0.")
-parser.add_argument("--samples-file", default=None,
-                    help="Use an iteration from an InferenceFile as the "
-                         "initial proposal distribution. The same "
-                         "number of walkers and the same [variable_params] "
-                         "section in the configuration file should be used. "
-                         "The priors must allow encompass the initial "
-                         "positions from the InferenceFile being read.")
-
-# add sampler options
-option_utils.add_sampler_option_group(parser)
-
 # add config options
 option_utils.add_config_opts_to_parser(parser)
-
-# output options
-parser.add_argument("--output-file", type=str, required=True,
-                    help="Output file path.")
-parser.add_argument("--force", action="store_true", default=False,
-                    help="If the output-file already exists, overwrite it. "
-                         "Otherwise, an OSError is raised.")
-parser.add_argument("--save-strain", action="store_true", default=False,
-                    help="Save the conditioned strain time series to the "
-                         "output file. If gate-overwhitened, this is done "
-                         "before all gates have been applied.")
-parser.add_argument("--save-stilde", action="store_true", default=False,
-                    help="Save the conditioned strain frequency series to "
-                         "the output file. This is done after all gates have "
-                         "been applied.")
-parser.add_argument("--save-psd", action="store_true", default=False,
-                    help="Save the psd of each ifo to the output file.")
-parser.add_argument("--checkpoint-interval", type=int, default=None,
-                    help="Number of iterations to take before saving new "
-                         "samples to file, calculating ACL, and updating "
-                         "burn-in estimate.")
-parser.add_argument("--resume-from-checkpoint", action="store_true",
-                    default=False,
-                    help="Automatically load results from checkpoint/backup "
-                         "file.")
-parser.add_argument("--save-backup", action="store_true",
-                    default=False,
-                    help="Don't delete the backup file after the run has "
-                         "completed.")
-parser.add_argument("--checkpoint-fast", action="store_true",
-                    help="Do not calculate ACL after each checkpoint, only at "
-                         "the end. Not applicable if n-independent-samples "
-                         "have been specified.")
-
-# verbose option
-parser.add_argument("--verbose", action="store_true", default=False,
-                    help="Print logging messages.")
-
 # add module pre-defined options
 fft.insert_fft_option_group(parser)
 opt.insert_optimization_option_group(parser)
@@ -131,41 +102,6 @@ scheme.verify_processing_options(opts, parser)
 #strain.verify_strain_options(opts, parser)
 weave.verify_weave_options(opts, parser)
 
-# check for the output file
-if os.path.exists(opts.output_file) and not opts.force:
-    raise OSError("output-file already exists; use --force if you wish to "
-                  "overwrite it.")
-
-# check for backup file(s)
-checkpoint_file = opts.output_file + '.checkpoint'
-backup_file = opts.output_file + '.bkup'
-checkpoint_valid = validate_checkpoint_files(checkpoint_file, backup_file)
-
-# determine what to do with checkpoints
-if checkpoint_valid and not opts.resume_from_checkpoint and not opts.force:
-    raise OSError("valid checkpoint file {} found, but "
-                  "resume-from-checkpoint not on. If you wish to overwrite "
-                  "use --force; otherwise, use --resume-from-checkpoint")
-if not opts.resume_from_checkpoint and opts.force:
-    checkpoint_valid = False
-
-# check for how many iterations to run
-max_iterations = opts.niterations
-if opts.niterations is not None and opts.n_independent_samples is not None:
-    raise ValueError("Must specify either niterations or n-independent-"
-                     "samples, not both")
-elif opts.niterations is not None:
-    get_nsamples = opts.niterations
-elif opts.n_independent_samples is not None:
-    if opts.checkpoint_interval is None:
-        raise ValueError("n-independent-samples requires a checkpoint-"
-                         "interval; see help")
-    get_nsamples = opts.n_independent_samples
-else:
-    raise ValueError("Must specify niterations or n-independent-samples; "
-                     "see --help")
-
-
 # set seed
 numpy.random.seed(opts.seed)
 logging.info("Using seed %i", opts.seed)
@@ -199,8 +135,9 @@ with ctx:
     # get ifo-specific instances of calibration model
     if cp.has_section('calibration'):
         logging.info("Initializing calibration model")
-        recalibration = {ifo: Recalibrate.from_config(cp, ifo, section='calibration') for
-                         ifo in opts.instruments}
+        recalibration = {ifo: Recalibrate.from_config(cp, ifo,
+                                                      section='calibration')
+                         for ifo in opts.instruments}
         model_args['recalibration'] = recalibration
 
     # get gates for templates
@@ -213,206 +150,66 @@ with ctx:
     # construct class that will return the natural logarithm of likelihood
     model = gwin.models.read_from_config(cp, **model_args)
 
-    burn_in_eval = burn_in.BurnIn(opts.burn_in_function,
-                                min_iterations=opts.min_burn_in)
-
     logging.info("Setting up sampler")
 
-    # create sampler that will run
-    sampler = option_utils.sampler_from_cli(opts, model)
-
-    # save information about this data and settings
-    if not checkpoint_valid:
-        with InferenceFile(checkpoint_file, "w") as fp:
-            # save command line and data
-            logging.info("Creating and writing data to output file")
-            fp.write_data(
-                strain_dict=strain_dict if opts.save_strain else None,
-                stilde_dict=stilde_dict if opts.save_stilde else None,
-                psd_dict=psd_dict if opts.save_psd else None,
-                low_frequency_cutoff_dict=low_frequency_cutoff_dict)
-
-            # save injection parameters
-            if opts.injection_file:
-                for ifo in opts.instruments:
-                    logging.info("Writing %s injections to output file", ifo)
-                    if ifo in opts.injection_file.keys():
-                        inj_file = opts.injection_file[ifo]
-                    elif len(opts.injection_file) == 1:
-                        inj_file = opts.injection_file.values()[0]
-                    else:
-                        logging.warn("Could not find injections for %s", ifo)
-                        continue
-                    fp.write_injections(opts.injection_file.values()[0], ifo)
-        # copy to backup
-        shutil.copy(checkpoint_file, backup_file)
-
-    # write the command line, resume point
-    for fn in [checkpoint_file, backup_file]:
-        with InferenceFile(fn, "a") as fp:
-            fp.write_command_line()
-            if checkpoint_valid:
-                fp.write_resume_point()
+    # Create sampler that will run.
+    # Note: the pool is created at this point. This means that,
+    # unless you enjoy angering your cluster admins,
+    # NO SAMPLES FILE IO SHOULD BE DONE PRIOR TO THIS POINT!!!
+    sampler = gwin.sampler.load_from_config(
+        cp, model, nprocesses=opts.nprocesses, use_mpi=opts.use_mpi)
+
+    # set up output/checkpoint file
+    # Note: PyCBC's multi-ifo parser uses key:ifo for
+    # the injection file, even though we will use the same
+    # injection file all detectors. This
+    # should be fixed in a future version of PyCBC. Once it is,
+    # update this. Until then, just use the first file.
+    if opts.injection_file:
+        injection_file = opts.injection_file.values()[0]  # None if not set
+    else:
+        injection_file = None
+    sampler.setup_output(opts.output_file, force=opts.force,
+                         injection_file=injection_file)
 
-    # set the walkers initial positions from a pre-existing InferenceFile
-    # or a specific initial distribution listed in the configuration file
-    # or else use the prior distributions to set initial positions
-    logging.info("Setting walkers initial conditions for varying parameters")
+    # Figure out where to get the initial conditions from: a samples file,
+    # the checkpoint file, the prior, or an initial prior.
     samples_file = opts.samples_file
     # use the checkpoint file instead if resume from checkpoint
-    if opts.resume_from_checkpoint and checkpoint_valid:
-        samples_file = checkpoint_file
+    if not sampler.new_checkpoint:
+        samples_file = sampler.checkpoint_file
     if samples_file is not None:
         logging.info("Initial positions taken from last iteration in %s",
                      samples_file)
-        samples_file = InferenceFile(samples_file, "r")
         init_prior = None
-    elif len(cp.get_subsections("initial")):
-        initial_dists = distributions.read_distributions_from_config(
-            cp, section="initial")
-        constraints = distributions.read_constraints_from_config(cp,
-            constraint_section="initial_constraint")
-        init_prior = distributions.JointDistribution(sampler.variable_params,
-            *initial_dists, **{"constraints" : constraints})
     else:
-        init_prior = None
-    sampler.set_p0(samples_file=samples_file, prior=init_prior)
-
-    # if getting samples from file then put sampler and random number generator
-    # back in its former state
-    if samples_file is not None:
-        sampler.set_state_from_file(samples_file)
-        samples_file.close()
-
-    # run sampler's burn in if it is in the list of burn in functions
-    if "use_sampler" in burn_in_eval.burn_in_functions:
-        # remove the sampler's burn in so we don't run more than once
-        burn_in_eval.burn_in_functions.pop("use_sampler")
-        # we'll only do this if we don't have a valid checkpoint: since the
-        # checkpoint happens after the sampler's burn in, the sampler's burn in
-        # must have already run if we have a valid checkpoint file
-        if not checkpoint_valid:
-            with InferenceFile(checkpoint_file, "a") as fp:
-                logging.info("Running sampler's burn in function")
-                burnidx, is_burned_in = burn_in.use_sampler(sampler, fp)
-                sampler.write_burn_in_iterations(fp, burnidx, is_burned_in)
-                # write the burn in results
-                logging.info("Writing burn in samples to file")
-                sampler.write_results(fp, static_params=model.static_params,
-                                      ifos=opts.instruments)
-            # write to backup file
-            with InferenceFile(backup_file, "a") as fp:
-                sampler.write_burn_in_iterations(fp, burnidx, is_burned_in)
-                sampler.write_results(fp, static_params=model.static_params,
-                                      ifos=opts.instruments)
-
-
-    # get the starting number of samples:
-    # nsamples keeps track of the number of samples we've obtained (if
-    # --n-independent-samples is used, this is the number of independent
-    # samples; otherwise, this is the number of iterations);
-    # start is the number of iterations that the file already contains (either
-    # due to sampler burn-in, or a previous checkpoint)
-    try:
-        with InferenceFile(checkpoint_file, "r") as fp:
-            start = fp.niterations
-    except KeyError:
-        start = 0
-    if opts.n_independent_samples is not None:
-        try:
-            with InferenceFile(checkpoint_file, "r") as fp:
-                nsamples = fp.n_independent_samples
-        except AttributeError:
-            nsamples = start
-    else:
-        nsamples = start
-    # to ensure iterations are counted properly, he sampler's lastclear should
-    # be the same as start
-    sampler.lastclear = start
-
-    interval = opts.checkpoint_interval
-    if interval is None:
-        interval = get_nsamples
-
-    # run sampler until we have the desired number of samples
-    while nsamples < get_nsamples:
-
-        end = start + interval
-
-        # adjust the interval if we would go past the number of iterations
-        if opts.n_independent_samples is None and end > get_nsamples:
-            interval = get_nsamples - start
-            end = start + interval
-
-        # run sampler and set initial values to None so that sampler
-        # picks up from where it left off next call
-        logging.info("Running sampler for {} to {} iterations".format(start,
-                                                                      end))
-        sampler.run(interval)
-
-        # write new samples
-        with InferenceFile(checkpoint_file, "a") as fp:
-
-            logging.info("Writing results to file")
-            sampler.write_results(fp, static_params=model.static_params,
-                                  ifos=opts.instruments)
-            logging.info("Updating burn in")
-            burnidx, is_burned_in = burn_in_eval.update(sampler, fp)
-
-            # compute the acls and write
-            acls = None
-            if opts.n_independent_samples is not None or end >= get_nsamples \
-                    or not opts.checkpoint_fast:
-                logging.info("Computing acls")
-                acls = sampler.compute_acls(fp)
-                sampler.write_acls(fp, acls)
-
-        # write to backup
-        with InferenceFile(backup_file, "a") as fp:
-
-            logging.info("Writing to backup file")
-            sampler.write_results(fp, static_params=model.static_params,
-                                  ifos=opts.instruments)
-            sampler.write_burn_in_iterations(fp, burnidx, is_burned_in)
-            if acls is not None:
-                sampler.write_acls(fp, acls)
-
-        # check validity
-        checkpoint_valid = validate_checkpoint_files(checkpoint_file,
-                                                     backup_file)
-        if not checkpoint_valid:
-            raise IOError("error writing to checkpoint file")
-
-        # update nsamples for next loop
-        if opts.n_independent_samples is not None:
-            with InferenceFile(checkpoint_file, 'r') as fp:
-                nsamples = fp.n_independent_samples
-            logging.info("Have {} independent samples".format(nsamples))
-        else:
-            nsamples += interval
+        # try to load an initial distribution from the config file
+        init_prior = gwin.sampler.initial_dist_from_config(cp)
 
+    sampler.set_initial_conditions(initial_distribution=init_prior,
+        samples_file=samples_file)
 
-        # clear the in-memory chain to save memory
-        logging.info("Clearing chain")
-        sampler.clear_chain()
+    # Run the sampler
+    sampler.run()
 
-        start = end
+    # Finalize the output 
+    sampler.finalize()
 
-    # compute evidence, if supported
-    with InferenceFile(checkpoint_file, 'a') as fp:
-        try:
-            lnz, dlnz = sampler.calculate_logevidence(fp)
-            logging.info("Saving evidence")
-            sampler.write_logevidence(fp, lnz, dlnz)
-        except NotImplementedError:
-            pass
+    # FIXME: move to emcee_pt's finalize method
+    #with InferenceFile(checkpoint_file, 'a') as fp:
+    #    try:
+    #        lnz, dlnz = sampler.calculate_logevidence(fp)
+    #        logging.info("Saving evidence")
+    #        sampler.write_logevidence(fp, lnz, dlnz)
+    #    except NotImplementedError:
+    #        pass
 
 # rename checkpoint to output and delete backup
 logging.info("Moving checkpoint to output")
-os.rename(checkpoint_file, opts.output_file)
+os.rename(sampler.checkpoint_file, opts.output_file)
 if not opts.save_backup:
     logging.info("Deleting backup file")
-    os.remove(backup_file)
+    os.remove(sampler.backup_file)
 
 # exit
 logging.info("Done")
diff --git a/gwin/burn_in.py b/gwin/burn_in.py
index bcb4ef6..d87bf69 100644
--- a/gwin/burn_in.py
+++ b/gwin/burn_in.py
@@ -13,380 +13,361 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
 """
 This modules provides classes and functions for determining when Markov Chains
 have burned in.
 """
 
 import numpy
-
 from scipy.stats import ks_2samp
 
+from pycbc.filter import autocorrelation
+from pycbc.io.record import get_vars_from_arg
 
-def ks_test(sampler, fp, threshold=0.9):
-    """Burn in based on whether the p-value of the KS test between the samples
-    at the last iteration and the samples midway along the chain for each
-    parameter is > ``threshold``.
+# The value to use for a burn-in iteration if a chain is not burned in
+NOT_BURNED_IN_ITER = -1
 
-    Parameters
-    ----------
-    sampler : gwin.sampler
-        Sampler to determine burn in for. May be either an instance of a
-        `gwin.sampler`, or the class itself.
-    fp : InferenceFile
-        Open inference hdf file containing the samples to load for determing
-        burn in.
-    threshold : float
-        The thershold to use for the p-value. Default is 0.9.
 
-    Returns
-    -------
-    burn_in_idx : array
-        Array of indices giving the burn-in index for each chain.
-    is_burned_in : array
-        Array of booleans indicating whether each chain is burned in.
-    """
-    nwalkers = fp.nwalkers
-    niterations = fp.niterations
-    # Create a dictionary which would have keys are the variable args and
-    # values are booleans indicating whether the p-value for the parameters
-    # satisfies the KS test
-    is_burned_in_param = {}
-    # iterate over the parameters
-    for param in fp.variable_params:
-        # read samples for the parameter from the last iteration of the chain
-        samples_last_iter = sampler.read_samples(fp, param, iteration=-1,
-                                                 flatten=True)[param]
-        # read samples for the parameter from the iteration midway
-        # along the chain
-        samples_chain_midpt = sampler.read_samples(
-            fp, param, iteration=int(niterations/2), flatten=True)[param]
-        _, p_value = ks_2samp(samples_last_iter, samples_chain_midpt)
-        # check if p_value is > than the desired range
-        is_burned_in_param[param] = p_value > threshold
-
-    # The chains are burned in if the p-value of the KS test lies
-    # in the range [0.1,0.9] for all the parameters.
-    # If the KS test is passed, the chains have burned in at their
-    # mid-way point.
-    if all(is_burned_in_param.values()):
-        is_burned_in = numpy.ones(nwalkers, dtype=bool)
-        burn_in_idx = numpy.repeat(niterations/2, nwalkers).astype(int)
-    else:
-        is_burned_in = numpy.zeros(nwalkers, dtype=bool)
-        burn_in_idx = numpy.repeat(niterations, nwalkers).astype(int)
-    return burn_in_idx, is_burned_in
+#
+# =============================================================================
+#
+#                              Convenience functions
+#
+# =============================================================================
+#
 
 
-def n_acl(sampler, fp, nacls=10):
-    """Burn in based on ACL.
+def ks_test(samples1, samples2, threshold=0.9):
+    """Applies a KS test to determine if two sets of samples are the same.
 
-    The sampler is considered burned in if the number of itertions is >=
-    ``nacls`` times the maximum ACL over all parameters, as measured from the
-    first iteration.
+    The ks test is applied parameter-by-parameter. If the two-tailed p-value
+    returned by the test is greater than ``threshold``, the samples are
+    considered to be the same.
 
     Parameters
     ----------
-    sampler : pycbc.inference.sampler
-        Sampler to determine burn in for. May be either an instance of a
-        `inference.sampler`, or the class itself.
-    fp : InferenceFile
-        Open inference hdf file containing the samples to load for determing
-        burn in.
-    nacls : int
-        Number of ACLs to use for burn in. Default is 10.
+    samples1 : dict
+        Dictionary of mapping parameters to the first set of samples.
+    samples2 : dict
+        Dictionary of mapping parameters to the second set of samples.
+    threshold : float
+        The thershold to use for the p-value. Default is 0.9.
 
     Returns
     -------
-    burn_in_idx : array
-        Array of indices giving the burn-in index for each chain. By definition
-        of this function, all chains reach burn in at the same iteration. Thus
-        the returned array is the burn-in index repeated by the number of
-        chains.
-    is_burned_in : array
-        Array of booleans indicating whether each chain is burned in. Since
-        all chains obtain burn in at the same time, this is either an array
-        of all False or True.
+    dict :
+        Dictionary mapping parameter names to booleans indicating whether the
+        given parameter passes the KS test.
     """
-    acl = numpy.array(sampler.compute_acls(fp, start_index=0).values()).max()
-    burn_idx = nacls * acl
-    is_burned_in = burn_idx < fp.niterations
-    if not is_burned_in:
-        burn_idx = fp.niterations
-    nwalkers = fp.nwalkers
-    return numpy.repeat(burn_idx, nwalkers).astype(int), \
-        numpy.repeat(is_burned_in, nwalkers).astype(bool)
+    is_the_same = {}
+    assert set(samples1.keys()) == set(samples2.keys()), (
+        "samples1 and 2 must have the same parameters")
+    # iterate over the parameters
+    for param in samples1:
+        s1 = samples1[param]
+        s2 = samples2[param]
+        _, p_value = ks_2samp(s1, s2)
+        is_the_same[param] = p_value > threshold
+    return is_the_same
 
 
-def max_posterior(sampler, fp):
+def max_posterior(lnps_per_walker, dim):
     """Burn in based on samples being within dim/2 of maximum posterior.
 
     Parameters
     ----------
-    sampler : gwin.sampler
-        Sampler to determine burn in for. May be either an instance of a
-        `gwin.sampler`, or the class itself.
-    fp : InferenceFile
-        Open inference hdf file containing the samples to load for determing
-        burn in.
+    lnps_per_walker : 2D array
+        Array of values that are proportional to the log posterior values. Must
+        have shape ``nwalkers x niterations``.
+    dim : int
+        The dimension of the parameter space.
 
     Returns
     -------
-    burn_in_idx : array
-        Array of indices giving the burn-in index for each chain.
-    is_burned_in : array
-        Array of booleans indicating whether each chain is burned in.
+    burn_in_idx : array of int
+        The burn in indices of each walker. If a walker is not burned in, its
+        index will be be equal to the length of the chain.
+    is_burned_in : array of bool
+        Whether or not a walker is burned in.
     """
-    # get the posteriors
-    # Note: multi-tempered samplers should just return the coldest chain by
-    # default
-    chain_stats = sampler.read_samples(
-        fp, ['loglr', 'logprior'], samples_group=fp.stats_group,
-        thin_interval=1, thin_start=0, thin_end=None, flatten=False)
-    chain_posteriors = chain_stats['loglr'] + chain_stats['logprior']
-    dim = float(len(fp.variable_params))
-
-    # find the posterior to compare against
-    max_p = chain_posteriors.max()
-    criteria = max_p - dim/2
-    nwalkers = chain_posteriors.shape[-2]
-    niterations = chain_posteriors.shape[-1]
-    burn_in_idx = numpy.repeat(niterations, nwalkers).astype(int)
-    is_burned_in = numpy.zeros(nwalkers, dtype=bool)
-
-    # find the first iteration in each chain where the logplr has exceeded
+    if len(lnps_per_walker.shape) != 2:
+        raise ValueError("lnps_per_walker must have shape "
+                         "nwalkers x niterations")
+    # find the value to compare against
+    max_p = lnps_per_walker.max()
+    criteria = max_p - dim/2.
+    nwalkers, niterations = lnps_per_walker.shape
+    burn_in_idx = numpy.empty(nwalkers, dtype=int)
+    is_burned_in = numpy.empty(nwalkers, dtype=bool)
+    # find the first iteration in each chain where the logpost has exceeded
     # max_p - dim/2
     for ii in range(nwalkers):
-        chain = chain_posteriors[..., ii, :]
-        # numpy.where will return a tuple with multiple arrays if the chain is
-        # more than 1D (which can happen for multi-tempered samplers). Always
-        # taking the last array ensures we are looking at the indices that
-        # count out iterations
-        idx = numpy.where(chain >= criteria)[-1]
-        if idx.size != 0:
-            burn_in_idx[ii] = idx[0]
-            is_burned_in[ii] = True
+        chain = lnps_per_walker[ii, :]
+        passedidx = numpy.where(chain >= criteria)[0]
+        is_burned_in[ii] = passedidx.size > 0
+        if is_burned_in[ii]:
+            burn_in_idx[ii] = passedidx[0]
+        else:
+            burn_in_idx[ii] = NOT_BURNED_IN_ITER
     return burn_in_idx, is_burned_in
 
 
-def posterior_step(sampler, fp):
-    """Burn in based on the last time a chain made a jump > dim/2.
+def posterior_step(logposts, dim):
+    """Finds the last time a chain made a jump > dim/2.
 
     Parameters
     ----------
-    sampler : gwin.sampler
-        Sampler to determine burn in for. May be either an instance of a
-        `gwin.sampler`, or the class itself.
-    fp : InferenceFile
-        Open inference hdf file containing the samples to load for determing
-        burn in.
+    logposts : array
+        1D array of values that are proportional to the log posterior values.
+    dim : int
+        The dimension of the parameter space.
 
     Returns
     -------
-    burn_in_idx : array
-        Array of indices giving the burn-in index for each chain.
-    is_burned_in : array
-        Array of booleans indicating whether each chain is burned in.
-        By definition of this function, all values are set to True.
+    int
+        The index of the last time the logpost made a jump > dim/2. If that
+        never happened, returns 0.
     """
-    # get the posteriors
-    # Note: multi-tempered samplers should just return the coldest chain by
-    # default
-    chain_stats = sampler.read_samples(
-        fp, ['loglr', 'logprior'], samples_group=fp.stats_group,
-        thin_interval=1, thin_start=0, thin_end=None, flatten=False)
-    chain_posteriors = chain_stats['loglr'] + chain_stats['logprior']
-    nwalkers = chain_posteriors.shape[-2]
-    dim = float(len(fp.variable_params))
-    burn_in_idx = numpy.zeros(nwalkers).astype(int)
+    if logposts.ndim > 1:
+        raise ValueError("logposts must be a 1D array")
     criteria = dim/2.
+    dp = numpy.diff(logposts)
+    indices = numpy.where(dp >= criteria)[0]
+    if indices.size > 0:
+        idx = indices[-1] + 1
+    else:
+        idx = 0
+    return idx
 
-    # find the last iteration in each chain where the logplr has
-    # jumped by more than dim/2
-    for ii in range(nwalkers):
-        chain = chain_posteriors[..., ii, :]
-        dp = abs(numpy.diff(chain))
-        idx = numpy.where(dp >= criteria)[-1]
-        if idx.size != 0:
-            burn_in_idx[ii] = idx[-1] + 1
-    return burn_in_idx, numpy.ones(nwalkers, dtype=bool)
-
-
-def half_chain(sampler, fp):
-    """Takes the second half of the iterations as post-burn in.
-
-    Parameters
-    ----------
-    sampler : gwin.sampler
-        This option is not used; it is just here give consistent API as the
-        other burn in functions.
-    fp : InferenceFile
-        Open inference hdf file containing the samples to load for determing
-        burn in.
-
-    Returns
-    -------
-    burn_in_idx : array
-        Array of indices giving the burn-in index for each chain.
-    is_burned_in : array
-        Array of booleans indicating whether each chain is burned in.
-        By definition of this function, all values are set to True.
-    """
-    nwalkers = fp.nwalkers
-    niterations = fp.niterations
-    return (
-        numpy.repeat(niterations/2, nwalkers).astype(int),
-        numpy.ones(nwalkers, dtype=bool),
-    )
-
-
-def use_sampler(sampler, fp=None):
-    """Uses the sampler's burn_in function.
-
-    Parameters
-    ----------
-    sampler : gwin.sampler
-        Sampler to determine burn in for. Must be an instance of an
-        `gwin.sampler` that has a `burn_in` function.
-    fp : InferenceFile, optional
-        This option is not used; it is just here give consistent API as the
-        other burn in functions.
-
-    Returns
-    -------
-    burn_in_idx : array
-        Array of indices giving the burn-in index for each chain.
-    is_burned_in : array
-        Array of booleans indicating whether each chain is burned in.
-        Since the sampler's burn in function will run until all chains
-        are burned, all values are set to True.
-    """
-    sampler.burn_in()
-    return (
-        sampler.burn_in_iterations,
-        numpy.ones(len(sampler.burn_in_iterations), dtype=bool),
-    )
-
-
-burn_in_functions = {
-    'ks_test': ks_test,
-    'n_acl': n_acl,
-    'max_posterior': max_posterior,
-    'posterior_step': posterior_step,
-    'half_chain': half_chain,
-    'use_sampler': use_sampler,
-    }
-
-
-class BurnIn(object):
-    """Class to estimate the number of burn in iterations.
 
-    Parameters
-    ----------
-    function_names : list, optional
-        List of name of burn in functions to use. All names in the provided
-        list muset be in the `burn_in_functions` dict. If none provided, will
-        use no burn-in functions.
-    min_iterations : int, optional
-        Minimum number of burn in iterations to use. The burn in iterations
-        returned by evaluate will be the maximum of this value
-        and the values returned by the burn in functions provided in
-        `function_names`. Default is 0.
-
-    Examples
-    --------
-    Initialize a `BurnIn` instance that will use `max_posterior` and
-    `posterior_step` as the burn in criteria:
-
-    >>> import gwin
-    >>> burn_in = gwin.BurnIn(['max_posterior', 'posterior_step'])
-
-    Use this `BurnIn` instance to find the burn-in iteration of each walker
-    in an inference result file:
-
-    >>> from pycbc.io import InferenceFile
-    >>> fp = InferenceFile('gwin.hdf', 'r')
-    >>> burn_in.evaluate(gwin.samplers[fp.sampler_name], fp)
-    array([11486, 11983, 11894, ..., 11793, 11888, 11981])
+#
+# =============================================================================
+#
+#                              Burn in classes
+#
+# =============================================================================
+#
 
-    """
 
-    def __init__(self, function_names, min_iterations=0):
-        if function_names is None:
-            function_names = []
-        self.min_iterations = min_iterations
-        self.burn_in_functions = {fname: burn_in_functions[fname]
-                                  for fname in function_names}
+class MCMCBurnInTests(object):
+    """Provides methods for estimating burn-in of an ensemble MCMC."""
+
+    available_tests = ('halfchain', 'min_iterations', 'max_posterior',
+                       'posterior_step', 'nacl', 'ks_test',
+                       )
+
+    def __init__(self, sampler, burn_in_test, **kwargs):
+        self.sampler = sampler
+        # determine the burn-in tests that are going to be done
+        self.do_tests = get_vars_from_arg(burn_in_test)
+        self.burn_in_test = burn_in_test
+        self.burn_in_data = {t: {} for t in self.do_tests}
+        self.is_burned_in = False
+        self.burn_in_iteration = NOT_BURNED_IN_ITER
+        # Arguments specific to each test...
+        # for nacl:
+        self._nacls = int(kwargs.pop('nacls', 5))
+        # for kstest:
+        self._ksthreshold = float(kwargs.pop('ks_threshold', 0.9))
+        # for max_posterior and posterior_step
+        self._ndim = int(kwargs.pop('ndim', len(sampler.variable_params)))
+        # for min iterations
+        self._min_iterations = int(kwargs.pop('min_iterations', 0))
+
+    def _getniters(self, filename):
+        """Convenience function to get the number of iterations in the file.
+
+        If `niterations` hasn't been written to the file yet, just returns 0.
+        """
+        with self.sampler.io(filename, 'r') as fp:
+            try:
+                niters = fp.niterations
+            except KeyError:
+                niters = 0
+        return niters
 
-    def evaluate(self, sampler, fp):
-        """Evaluates sampler's chains to find burn in.
+    def _getlogposts(self, filename):
+        """Convenience function for retrieving log posteriors.
 
         Parameters
         ----------
-        sampler : gwin.sampler
-            Sampler to determine burn in for. May be either an instance of a
-            `gwin.sampler`, or the class itself.
-        fp : InferenceFile
-            Open inference hdf file containing the samples to load for
-            determing burn in.
+        filename : str
+            The file to read.
 
         Returns
         -------
-        burnidx : array
-            Array of indices giving the burn-in index for each chain.
-        is_burned_in : array
-            Array of booleans indicating whether each chain is burned in.
+        array
+            The log posterior values. They are not flattened, so have dimension
+            nwalkers x niterations.
         """
-        # if the number of iterations is < than the minimium desired,
-        # just return the number of iterations and all False
-        if fp.niterations < self.min_iterations:
-            return numpy.repeat(self.min_iterations, fp.nwalkers), \
-                   numpy.zeros(fp.nwalkers, dtype=bool)
-        # if the file already has burn in iterations saved, use those as a
-        # base
-        try:
-            burnidx = fp['burn_in_iterations'][:]
-        except KeyError:
-            # just use the minimum
-            burnidx = numpy.repeat(self.min_iterations, fp.nwalkers)
-        # start by assuming is burned in; the &= below will make this false
-        # if any test yields false
-        is_burned_in = numpy.ones(fp.nwalkers, dtype=bool)
-        if self.burn_in_functions != {}:
-            newidx = []
-            for func in self.burn_in_functions.values():
-                idx, state = func(sampler, fp)
-                newidx.append(idx)
-                is_burned_in &= state
-            newidx = numpy.vstack(newidx).max(axis=0)
-            # update the burn in idx if any test yields a larger iteration
-            mask = burnidx < newidx
-            burnidx[mask] = newidx[mask]
-        # if any burn-in idx are less than the min iterations, set to the
-        # min iterations
-        burnidx[burnidx < self.min_iterations] = self.min_iterations
-        return burnidx, is_burned_in
-
-    def update(self, sampler, fp):
-        """Evaluates burn in and saves the updated indices to the given file.
-
-        Parameters
-        ----------
-        sampler : gwin.sampler
-            Sampler to determine burn in for. May be either an instance of a
-            `gwin.sampler`, or the class itself.
-        fp : InferenceFile
-            Open inference hdf file containing the samples to load for
-            determing burn in.
-
-        Returns
-        -------
-        burnidx : array
-            Array of indices giving the burn-in index for each chain.
-        is_burned_in : array
-            Array of booleans indicating whether each chain is burned in.
+        with self.sampler.io(filename, 'r') as fp:
+            samples = fp.read_raw_samples(
+                ['loglikelihood', 'logprior'], thin_start=0, thin_interval=1,
+                flatten=False)
+            logposts = samples['loglikelihood'] + samples['logprior']
+        return logposts
+
+    def halfchain(self, filename):
+        """Just uses half the chain as the burn-in iteration.
+        """
+        niters = self._getniters(filename)
+        data = self.burn_in_data['halfchain']
+        # this test cannot determine when something will burn in
+        # only when it was not burned in in the past
+        data['is_burned_in'] = True
+        data['burn_in_iteration'] = niters/2
+
+    def min_iterations(self, filename):
+        """Just checks that the sampler has been run for the minimum number
+        of iterations.
+        """
+        niters = self._getniters(filename)
+        data = self.burn_in_data['min_iterations']
+        data['is_burned_in'] = self._min_iterations < niters
+        if data['is_burned_in']:
+            data['burn_in_iteration'] = self._min_iterations
+        else:
+            data['burn_in_iteration'] = NOT_BURNED_IN_ITER
+
+    def max_posterior(self, filename):
+        """Applies max posterior test to self."""
+        logposts = self._getlogposts(filename)
+        burn_in_idx, is_burned_in = max_posterior(logposts, self._ndim)
+        data = self.burn_in_data['max_posterior']
+        # required things to store
+        data['is_burned_in'] = is_burned_in.all()
+        if data['is_burned_in']:
+            data['burn_in_iteration'] = burn_in_idx.max()
+        else:
+            data['burn_in_iteration'] = NOT_BURNED_IN_ITER
+        # additional info
+        data['iteration_per_walker'] = burn_in_idx
+        data['status_per_walker'] = is_burned_in
+
+    def posterior_step(self, filename):
+        """Applies the posterior-step test."""
+        logposts = self._getlogposts(filename)
+        burn_in_idx = numpy.array([posterior_step(logps, self._ndim)
+                                   for logps in logposts])
+        data = self.burn_in_data['posterior_step']
+        # this test cannot determine when something will burn in
+        # only when it was not burned in in the past
+        data['is_burned_in'] = True
+        data['burn_in_iteration'] = burn_in_idx.max()
+        # additional info
+        data['iteration_per_walker'] = burn_in_idx
+
+    def nacl(self, filename):
+        """Burn in based on ACL.
+
+        This applies the following test to determine burn in:
+
+        1. The first half of the chain is ignored.
+
+        2. An ACL is calculated from the second half.
+
+        3. If ``nacls`` times the ACL is < the number of iterations / 2,
+           the chain is considered to be burned in at the half-way point.
         """
-        burnidx, is_burned_in = self.evaluate(sampler, fp)
-        sampler.burn_in_iterations = burnidx
-        sampler.write_burn_in_iterations(fp, burnidx, is_burned_in)
-        return burnidx, is_burned_in
+        niters = self._getniters(filename)
+        kstart = int(niters / 2.)
+        acls = self.sampler.compute_acl(filename, start_index=kstart)
+        is_burned_in = {param: (self._nacls * acl) < kstart
+                        for (param, acl) in acls.items()}
+        data = self.burn_in_data['nacl']
+        # required things to store
+        data['is_burned_in'] = all(is_burned_in.values())
+        if data['is_burned_in']:
+            data['burn_in_iteration'] = kstart
+        else:
+            data['burn_in_iteration'] = NOT_BURNED_IN_ITER
+        # additional information
+        data['status_per_parameter'] = is_burned_in
+        # since we calculated it, save the acls to the sampler
+        self.sampler.acls = acls
+
+    def ks_test(self, filename):
+        """Applies ks burn-in test."""
+        with self.sampler.io(filename, 'r') as fp:
+            niters = fp.niterations
+            # get the samples from the mid point
+            samples1 = fp.read_raw_samples(
+                ['loglikelihood', 'logprior'], iteration=int(niters/2.))
+            # get the last samples
+            samples2 = fp.read_raw_samples(
+                ['loglikelihood', 'logprior'], iteration=-1)
+        # do the test
+        # is_the_same is a dictionary of params --> bool indicating whether or
+        # not the 1D marginal is the same at the half way point
+        is_the_same = ks_test(samples1, samples2, threshold=self._ksthreshold)
+        data = self.burn_in_data['ks_test']
+        # required things to store
+        data['is_burned_in'] = all(is_the_same.values())
+        if data['is_burned_in']:
+            data['burn_in_iteration'] = int(niters/2.)
+        else:
+            data['burn_in_iteration'] = NOT_BURNED_IN_ITER
+        # additional
+        data['status_per_parameter'] = is_the_same
+
+    def evaluate(self, filename):
+        """Runs all of the burn-in tests."""
+        for tst in self.do_tests:
+            getattr(self, tst)(filename)
+        # The iteration to use for burn-in depends on the logic in the burn-in
+        # test string. For example, if the test was 'max_posterior | nacl' and
+        # max_posterior burned-in at iteration 5000 while nacl burned in at
+        # iteration 6000, we'd want to use 5000 as the burn-in iteration.
+        # However, if the test was 'max_posterior & nacl', we'd want to use
+        # 6000 as the burn-in iteration. The code below handles all cases by
+        # doing the following: first, take the collection of burn in iterations
+        # from all the burn in tests that were applied.  Next, cycle over the
+        # iterations in increasing order, checking which tests have burned in
+        # by that point. Then evaluate the burn-in string at that point to see
+        # if it passes, and if so, what the iteration is. The first point that
+        # the test passes is used as the burn-in iteration.
+        data = self.burn_in_data
+        burn_in_iters = numpy.unique([data[t]['burn_in_iteration']
+                                      for t in self.do_tests])
+        burn_in_iters.sort()
+        for ii in burn_in_iters:
+            test_results = {t: (data[t]['is_burned_in'] &
+                                0 <= data[t]['burn_in_iteration'] <= ii)
+                            for t in self.do_tests}
+            is_burned_in = eval(self.burn_in_test, {"__builtins__": None},
+                                test_results)
+            if is_burned_in:
+                break
+        self.is_burned_in = is_burned_in
+        if is_burned_in:
+            self.burn_in_iteration = ii
+        else:
+            self.burn_in_iteration = NOT_BURNED_IN_ITER
+
+    @classmethod
+    def from_config(cls, cp, sampler):
+        """Loads burn in from section [sampler-burn_in]."""
+        section = 'sampler'
+        tag = 'burn_in'
+        burn_in_test = cp.get_opt_tag(section, 'burn-in-test', tag)
+        kwargs = {}
+        if cp.has_option_tag(section, 'nacl', tag):
+            kwargs['nacl'] = int(cp.get_opt_tag(section, 'nacl', tag))
+        if cp.has_option_tag(section, 'ks-threshold', tag):
+            kwargs['ks_threshold'] = float(
+                cp.get_opt_tag(section, 'ks-threshold', tag))
+        if cp.has_option_tag(section, 'ndim', tag):
+            kwargs['ndim'] = int(
+                cp.get_opt_tag(section, 'ndim', tag))
+        if cp.has_option_tag(section, 'min-iterations', tag):
+            kwargs['min_iterations'] = int(
+                cp.get_opt_tag(section, 'min-iterations', tag))
+        return cls(sampler, burn_in_test, **kwargs)
diff --git a/gwin/io/__init__.py b/gwin/io/__init__.py
index 2e19621..c284bf6 100644
--- a/gwin/io/__init__.py
+++ b/gwin/io/__init__.py
@@ -17,5 +17,190 @@
 """I/O utilities for GWIn
 """
 
-from .hdf import InferenceFile
+from __future__ import absolute_import
+
+import os
+import shutil
+import logging
+import h5py as _h5py
+
+from .emcee import EmceeFile
 from .txt import InferenceTXTFile
+
+filetypes = {
+    EmceeFile.name: EmceeFile,
+}
+
+
+def loadfile(path, mode=None, filetype=None, **kwargs):
+    """Loads the given file using the appropriate InferenceFile class.
+
+    If ``filetype`` is not provided, this will try to retreive the ``filetype``
+    from the file's ``attrs``. If the file does not exist yet, an IOError will
+    be raised if ``filetype`` is not provided.
+
+    Parameters
+    ----------
+    path : str
+        The filename to load.
+    mode : str, optional
+        What mode to load the file with, e.g., 'w' for write, 'r' for read,
+        'a' for append. Default will default to h5py.File's mode, which is 'a'.
+    filetype : str, optional
+        Force the file to be loaded with the given class name. This must be
+        provided if creating a new file.
+
+    Returns
+    -------
+    filetype instance
+        An open file handler to the file. The class used for IO with the file
+        is determined by the ``filetype`` keyword (if provided) or the
+        ``filetype`` stored in the file (if not provided).
+    """
+    if filetype is None:
+        # try to read the file to get its filetype
+        try:
+            with _h5py.File(path, 'r') as fp:
+                filetype = fp.attrs['filetype']
+        except IOError:
+            # file doesn't exist, filetype must be provided
+            raise IOError("The file appears not to exist. In this case, "
+                          "filetype must be provided.")
+    return filetypes[filetype](path, mode=mode, **kwargs)
+
+#
+# =============================================================================
+#
+#                         HDF Utilities
+#
+# =============================================================================
+#
+
+
+def check_integrity(filename):
+    """Checks the integrity of an InferenceFile.
+
+    Checks done are:
+
+        * can the file open?
+        * do all of the datasets in the samples group have the same shape?
+        * can the first and last sample in all of the datasets in the samples
+          group be read?
+
+    If any of these checks fail, an IOError is raised.
+
+    Parameters
+    ----------
+    filename: str
+        Name of an InferenceFile to check.
+
+    Raises
+    ------
+    ValueError
+        If the given file does not exist.
+    KeyError
+        If the samples group does not exist.
+    IOError
+        If any of the checks fail.
+    """
+    # check that the file exists
+    if not os.path.exists(filename):
+        raise ValueError("file {} does not exist".format(filename))
+    # if the file is corrupted such that it cannot be opened, the next line
+    # will raise an IOError
+    with loadfile(filename, 'r') as fp:
+        # check that all datasets in samples have the same shape
+        parameters = fp[fp.samples_group].keys()
+        group = fp.samples_group + '/{}'
+        # use the first parameter as a reference shape
+        ref_shape = fp[group.format(parameters[0])].shape
+        if not all(fp[group.format(param)].shape == ref_shape
+                   for param in parameters):
+            raise IOError("not all datasets in the samples group have the "
+                          "same shape")
+        # check that we can read the first/last sample
+        firstidx = tuple([0]*len(ref_shape))
+        lastidx = tuple([-1]*len(ref_shape))
+        for param in parameters:
+            fp[group.format(param)][firstidx]
+            fp[group.format(param)][lastidx]
+
+
+def validate_checkpoint_files(checkpoint_file, backup_file):
+    """Checks if the given checkpoint and/or backup files are valid.
+
+    The checkpoint file is considered valid if:
+
+        * it passes all tests run by ``check_integrity``;
+        * it has at least one sample written to it (indicating at least one
+          checkpoint has happened).
+
+    The same applies to the backup file. The backup file must also have the
+    same number of samples as the checkpoint file, otherwise, the backup is
+    considered invalid.
+
+    If the checkpoint (backup) file is found to be valid, but the backup
+    (checkpoint) file is not valid, then the checkpoint (backup) is copied to
+    the backup (checkpoint). Thus, this function ensures that checkpoint and
+    backup files are either both valid or both invalid.
+
+    Parameters
+    ----------
+    checkpoint_file : string
+        Name of the checkpoint file.
+    backup_file : string
+        Name of the backup file.
+
+    Returns
+    -------
+    checkpoint_valid : bool
+        Whether or not the checkpoint (and backup) file may be used for loading
+        samples.
+    """
+    # check if checkpoint file exists and is valid
+    try:
+        check_integrity(checkpoint_file)
+        checkpoint_valid = True
+    except (ValueError, KeyError, IOError):
+        checkpoint_valid = False
+    # backup file
+    try:
+        check_integrity(backup_file)
+        backup_valid = True
+    except (ValueError, KeyError, IOError):
+        backup_valid = False
+    # check if there are any samples in the file; if not, we'll just start from
+    # scratch
+    if checkpoint_valid:
+        with loadfile(checkpoint_file, 'r') as fp:
+            try:
+                group = '{}/{}'.format(fp.samples_group, fp.variable_params[0])
+                nsamples = fp[group].size
+                checkpoint_valid = nsamples != 0
+            except KeyError:
+                checkpoint_valid = False
+    # check if there are any samples in the backup file
+    if backup_valid:
+        with loadfile(backup_file, 'r') as fp:
+            try:
+                group = '{}/{}'.format(fp.samples_group, fp.variable_params[0])
+                backup_nsamples = fp[group].size
+                backup_valid = backup_nsamples != 0
+            except KeyError:
+                backup_valid = False
+    # check that the checkpoint and backup have the same number of samples;
+    # if not, assume the checkpoint has the correct number
+    if checkpoint_valid and backup_valid:
+        backup_valid = nsamples == backup_nsamples
+    # decide what to do based on the files' statuses
+    if checkpoint_valid and not backup_valid:
+        # copy the checkpoint to the backup
+        logging.info("Backup invalid; copying checkpoint file")
+        shutil.copy(checkpoint_file, backup_file)
+        backup_valid = True
+    elif backup_valid and not checkpoint_valid:
+        logging.info("Checkpoint invalid; copying backup file")
+        # copy the backup to the checkpoint
+        shutil.copy(backup_file, checkpoint_file)
+        checkpoint_valid = True
+    return checkpoint_valid
diff --git a/gwin/io/base_hdf.py b/gwin/io/base_hdf.py
new file mode 100644
index 0000000..8a1665c
--- /dev/null
+++ b/gwin/io/base_hdf.py
@@ -0,0 +1,659 @@
+# Copyright (C) 2016 Christopher M. Biwer, Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# self.option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+"""This modules defines functions for reading and writing samples that the
+inference samplers generate.
+"""
+
+from __future__ import absolute_import
+
+import os
+import sys
+import logging
+from abc import ABCMeta, abstractmethod, abstractproperty
+
+import numpy
+
+import h5py
+
+from pycbc import DYN_RANGE_FAC
+from pycbc.io import FieldArray
+from pycbc.types import FrequencySeries
+from pycbc.waveform import parameters as wfparams
+
+
+class BaseInferenceFile(h5py.File):
+    """Base class for all inference hdf files.
+
+    This is a subclass of the h5py.File object. It adds functions for
+    handling reading and writing the samples from the samplers.
+
+    Parameters
+    -----------
+    path : str
+        The path to the HDF file.
+    mode : {None, str}
+        The mode to open the file, eg. "w" for write and "r" for read.
+    """
+    __metaclass__ = ABCMeta
+
+    name = None
+    samples_group = 'samples'
+    sampler_group = 'sampler_info'
+    data_group = 'data'
+    injections_group = 'injections'
+
+    def __init__(self, path, mode=None, **kwargs):
+        super(BaseInferenceFile, self).__init__(path, mode, **kwargs)
+        # check that file type matches self
+        try:
+            filetype = self.attrs['filetype']
+        except KeyError:
+            if mode == 'w':
+                # first time creating the file, add this class's name
+                filetype = self.name
+                self.attrs['filetype'] = filetype
+            else:
+                filetype = None
+        if filetype != self.name:
+            raise ValueError("This file has filetype {}, whereas this class "
+                             "is named {}. This indicates that the file was "
+                             "not written by this class, and so cannot be "
+                             "read by this class.".format(filetype, self.name))
+
+    def __getattr__(self, attr):
+        """Things stored in ``.attrs`` are promoted to instance attributes.
+
+        Note that properties will be called before this, so if there are any
+        properties that share the same name as something in ``.attrs``, that
+        property will get returned.
+        """
+        return self.attrs[attr]
+
+    @abstractmethod
+    def write_samples(self, samples, **kwargs):
+        """This should write all of the provided samples.
+
+        This function should be used to write both samples and model stats.
+
+        Parameters
+        ----------
+        fp : open hdf file
+            The file to write to.
+        samples : dict
+            Samples should be provided as a dictionary of numpy arrays.
+        \**kwargs :
+            Any other keyword args the sampler needs to write data.
+        """
+        pass
+
+    @abstractmethod
+    def write_sampler_metadata(self, sampler):
+        """This should write the given sampler's metadata to the file.
+
+        This should also include the model's metadata.
+        """
+        pass
+
+    def parse_parameters(self, parameters, array_class=None):
+        """Parses a parameters arg to figure out what fields need to be loaded.
+
+        Parameters
+        ----------
+        parameters : (list of) strings
+            The parameter(s) to retrieve. A parameter can be the name of any
+            field in ``samples_group``, a virtual field or method of
+            ``FieldArray`` (as long as the file contains the necessary fields
+            to derive the virtual field or method), and/or a function of
+            these.
+        array_class : array class, optional
+            The type of array to use to parse the parameters. The class must
+            have a ``parse_parameters`` method. Default is to use a
+            ``FieldArray``.
+
+        Returns
+        -------
+        list :
+            A list of strings giving the fields to load from the file.
+        """
+        # get the type of array class to use
+        if array_class is None:
+            array_class = FieldArray
+        # get the names of fields needed for the given parameters
+        possible_fields = self[self.samples_group].keys()
+        return array_class.parse_parameters(parameters, possible_fields)
+
+    def read_samples(self, parameters, array_class=None, **kwargs):
+        """Reads samples for the given parameter(s).
+
+        The ``parameters`` can be the name of any dataset in ``samples_group``,
+        a virtual field or method of ``FieldArray`` (as long as the file
+        contains the necessary fields to derive the virtual field or method),
+        and/or any numpy function of these.
+
+        The ``parameters`` are parsed to figure out what datasets are needed.
+        Only those datasets will be loaded, and will be the base-level fields
+        of the returned ``FieldArray``.
+
+        The ``static_params`` are also added as attributes of the returned
+        ``FieldArray``.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            An open file handler to read the samples from.
+        parameters : (list of) strings
+            The parameter(s) to retrieve.
+        array_class : FieldArray-like class, optional
+            The type of array to return. The class must have ``from_kwargs``
+            and ``parse_parameters`` methods. If None, will return a
+            ``FieldArray``.
+        \**kwargs :
+            All other keyword arguments are passed to ``read_raw_samples``.
+
+        Returns
+        -------
+        FieldArray :
+            The samples as a ``FieldArray``.
+        """
+        # get the type of array class to use
+        if array_class is None:
+            array_class = FieldArray
+        # get the names of fields needed for the given parameters
+        possible_fields = self[self.samples_group].keys()
+        loadfields = array_class.parse_parameters(parameters, possible_fields)
+        samples = self.read_raw_samples(loadfields, **kwargs)
+        # convert to FieldArray
+        samples = array_class.from_kwargs(**samples)
+        # add the static params
+        for (p, val) in self.static_params.items():
+            setattr(samples, p, val)
+        return samples
+
+    @abstractmethod
+    def read_raw_samples(self, fields, **kwargs):
+        """Low level function for reading datasets in the samples group.
+
+        This should return a dictionary of numpy arrays.
+        """
+        pass
+
+    @abstractmethod
+    def write_posterior(self, posterior_file, **kwargs):
+        """This should write a posterior plus any other metadata to the given
+        file.
+
+        Parameters
+        ----------
+        posterior_file : str
+            Name of the file to write to.
+        \**kwargs :
+            Any other keyword args the sampler needs to write the posterior.
+        """
+        pass
+
+    @property
+    def static_params(self):
+        """Returns a dictionary of the static_params. The keys are the argument
+        names, values are the value they were set to.
+        """
+        return {arg: self.attrs[arg] for arg in self.attrs["static_params"]}
+
+    @property
+    def effective_nsamples(self):
+        """Returns the effective number of samples stored in the file.
+        """
+        try:
+            return self.attrs['effective_nsamples']
+        except KeyError:
+            return 0
+
+    def write_effective_nsamples(self, effective_nsamples):
+        """Writes the effective number of samples stored in the file."""
+        self.attrs['effective_nsamples'] = effective_nsamples
+
+    @property
+    def thin_start(self):
+        """The default start index to use when reading samples.
+
+        This tries to read from ``thin_start`` in the ``attrs``. If it isn't
+        there, just returns 0."""
+        try:
+            return self.attrs['thin_start']
+        except KeyError:
+            return 0
+
+    @property
+    def thin_interval(self):
+        """The default interval to use when reading samples.
+
+        This tries to read from ``thin_interval`` in the ``attrs``. If it
+        isn't there, just returns 1.
+        """
+        try:
+            return self.attrs['thin_interval']
+        except KeyError:
+            return 1
+
+    @property
+    def thin_end(self):
+        """The defaut end index to use when reading samples.
+
+        This tries to read from ``thin_end`` in the ``attrs``. If it isn't
+        there, just returns None.
+        """
+        try:
+            return self.attrs['thin_end']
+        except KeyError:
+            return None
+
+    @property
+    def cmd(self):
+        """Returns the (last) saved command line.
+
+        If the file was created from a run that resumed from a checkpoint, only
+        the last command line used is returned.
+
+        Returns
+        -------
+        cmd : string
+            The command line that created this InferenceFile.
+        """
+        cmd = self.attrs["cmd"]
+        if isinstance(cmd, numpy.ndarray):
+            cmd = cmd[-1]
+        return cmd
+
+    def write_logevidence(self, lnz, dlnz):
+        """Writes the given log evidence and its error.
+
+        Results are saved to file's 'log_evidence' and 'dlog_evidence'
+        attributes.
+
+        Parameters
+        ----------
+        lnz : float
+            The log of the evidence.
+        dlnz : float
+            The error in the estimate of the log evidence.
+        """
+        self.attrs['log_evidence'] = lnz
+        self.attrs['dlog_evidence'] = dlnz
+
+    @property
+    def log_evidence(self):
+        """Returns the log of the evidence and its error, if they exist in the
+        file. Raises a KeyError otherwise.
+        """
+        return self.attrs["log_evidence"], self.attrs["dlog_evidence"]
+
+    def write_random_state(self, group=None, state=None):
+        """Writes the state of the random number generator from the file.
+
+        The random state is written to ``sampler_group``/random_state.
+
+        Parameters
+        ----------
+        group : str
+            Name of group to write random state to.
+        state : tuple, optional
+            Specify the random state to write. If None, will use
+            ``numpy.random.get_state()``.
+        """
+        group = self.sampler_group if group is None else group
+        dataset_name = "/".join([group, "random_state"])
+        if state is None:
+            state = numpy.random.get_state()
+        s, arr, pos, has_gauss, cached_gauss = state
+        if dataset_name in self:
+            self[dataset_name][:] = arr
+        else:
+            self.create_dataset(dataset_name, arr.shape, fletcher32=True,
+                                dtype=arr.dtype)
+            self[dataset_name][:] = arr
+        self[dataset_name].attrs["s"] = s
+        self[dataset_name].attrs["pos"] = pos
+        self[dataset_name].attrs["has_gauss"] = has_gauss
+        self[dataset_name].attrs["cached_gauss"] = cached_gauss
+
+    def read_random_state(self, group=None):
+        """Reads the state of the random number generator from the file.
+
+        Parameters
+        ----------
+        group : str
+            Name of group to read random state from.
+
+        Returns
+        -------
+        tuple
+            A tuple with 5 elements that can be passed to numpy.set_state.
+        """
+        group = self.sampler_group if group is None else group
+        dataset_name = "/".join([group, "random_state"])
+        arr = self[dataset_name][:]
+        s = self[dataset_name].attrs["s"]
+        pos = self[dataset_name].attrs["pos"]
+        has_gauss = self[dataset_name].attrs["has_gauss"]
+        cached_gauss = self[dataset_name].attrs["cached_gauss"]
+        return s, arr, pos, has_gauss, cached_gauss
+
+    def write_strain(self, strain_dict, group=None):
+        """Writes strain for each IFO to file.
+
+        Parameters
+        -----------
+        strain : {dict, FrequencySeries}
+            A dict of FrequencySeries where the key is the IFO.
+        group : {None, str}
+            The group to write the strain to. If None, will write to the top
+            level.
+        """
+        subgroup = self.data_group + "/{ifo}/strain"
+        if group is None:
+            group = subgroup
+        else:
+            group = '/'.join([group, subgroup])
+        for ifo, strain in strain_dict.items():
+            self[group.format(ifo=ifo)] = strain
+            self[group.format(ifo=ifo)].attrs['delta_t'] = strain.delta_t
+            self[group.format(ifo=ifo)].attrs['start_time'] = \
+                float(strain.start_time)
+
+    def write_stilde(self, stilde_dict, group=None):
+        """Writes stilde for each IFO to file.
+
+        Parameters
+        -----------
+        stilde : {dict, FrequencySeries}
+            A dict of FrequencySeries where the key is the IFO.
+        group : {None, str}
+            The group to write the strain to. If None, will write to the top
+            level.
+        """
+        subgroup = self.data_group + "/{ifo}/stilde"
+        if group is None:
+            group = subgroup
+        else:
+            group = '/'.join([group, subgroup])
+        for ifo, stilde in stilde_dict.items():
+            self[group.format(ifo=ifo)] = stilde
+            self[group.format(ifo=ifo)].attrs['delta_f'] = stilde.delta_f
+            self[group.format(ifo=ifo)].attrs['epoch'] = float(stilde.epoch)
+
+    def write_psd(self, psds, group=None):
+        """Writes PSD for each IFO to file.
+
+        Parameters
+        -----------
+        psds : {dict, FrequencySeries}
+            A dict of FrequencySeries where the key is the IFO.
+        group : {None, str}
+            The group to write the psd to. Default is ``data_group``.
+        """
+        subgroup = self.data_group + "/{ifo}/psds/0"
+        if group is None:
+            group = subgroup
+        else:
+            print group, subgroup
+            group = '/'.join([group, subgroup])
+        for ifo in psds:
+            self[group.format(ifo=ifo)] = psds[ifo]
+            self[group.format(ifo=ifo)].attrs['delta_f'] = psds[ifo].delta_f
+
+    def write_injections(self, injection_file):
+        """Writes injection parameters from the given injection file.
+
+        Everything in the injection file is copied to ``injections_group``.
+
+        Parameters
+        ----------
+        injection_file : str
+            Path to HDF injection file.
+        """
+        try:
+            with h5py.File(injection_file, "r") as fp:
+                super(BaseInferenceFile, self).copy(fp, self.injections_group)
+        except IOError:
+            logging.warn("Could not read %s as an HDF file", injection_file)
+
+    def write_command_line(self):
+        """Writes command line to attributes.
+
+        The command line is written to the file's ``attrs['cmd']``. If this
+        attribute already exists in the file (this can happen when resuming
+        from a checkpoint), ``attrs['cmd']`` will be a list storing the current
+        command line and all previous command lines.
+        """
+        cmd = [" ".join(sys.argv)]
+        try:
+            previous = self.attrs["cmd"]
+            if isinstance(previous, str):
+                # convert to list
+                previous = [previous]
+            elif isinstance(previous, numpy.ndarray):
+                previous = previous.tolist()
+        except KeyError:
+            previous = []
+        self.attrs["cmd"] = cmd + previous
+
+    @abstractmethod
+    def write_resume_point(self):
+        """Should write the point that a sampler starts up.
+
+        How the resume point is indexed is up to the sampler. For example,
+        MCMC samplers use the number of iterations that are stored in the
+        checkpoint file.
+        """
+        pass
+
+    def get_slice(self, thin_start=None, thin_interval=None, thin_end=None):
+        """Formats a slice using the given arguments that can be used to
+        retrieve a thinned array from an InferenceFile.
+
+        Parameters
+        ----------
+        thin_start : int, optional
+            The starting index to use. If None, will use the ``thin_start``
+            attribute.
+        thin_interval : int, optional
+            The interval to use. If None, will use the ``thin_interval``
+            attribute.
+        thin_end : int, optional
+            The end index to use. If None, will use the ``thin_end`` attribute.
+
+        Returns
+        -------
+        slice :
+            The slice needed.
+        """
+        if thin_start is None:
+            thin_start = self.thin_start
+        if thin_interval is None:
+            thin_interval = self.thin_interval
+        if thin_end is None:
+            thin_end = self.thin_end
+        return slice(thin_start, thin_end, thin_interval)
+
+    def copy_metadata(self, other):
+        """Copies all metadata from this file to the other file.
+
+        Metadata is defined as everything in the top-level ``.attrs``.
+
+        Parameters
+        ----------
+        other : InferenceFile
+            An open inference file to write the data to.
+        """
+        logging.info("Copying metadata")
+        # copy attributes
+        for key in self.attrs.keys():
+            other.attrs[key] = self.attrs[key]
+
+    def copy_info(self, other, ignore=None):
+        """Copies "info" from this file to the other.
+
+        "Info" is defined all groups that are not the samples group.
+
+        Parameters
+        ----------
+        other : output file
+            The output file. Must be an hdf file.
+        ignore : (list of) str
+            Don't copy the given groups.
+        """
+        logging.info("Copying info")
+        # copy non-samples/stats data
+        if ignore is None:
+            ignore = []
+        if isinstance(ignore, (str, unicode)):
+            ignore = [ignore]
+        ignore = set(ignore + [self.samples_group])
+        copy_groups = set(self.keys()) - ignore
+        for key in copy_groups:
+            super(BaseInferenceFile, self).copy(key, other)
+
+    def copy_samples(self, other, parameters=None, parameter_names=None,
+                     read_args=None, write_args=None):
+        """Should copy samples to the other files.
+
+        Parameters
+        ----------
+        other : InferenceFile
+            An open inference file to write to.
+        parameters : list of str, optional
+            List of parameters to copy. If None, will copy all parameters.
+        parameter_names : dict, optional
+            Rename one or more parameters to the given name. The dictionary
+            should map parameter -> parameter name. If None, will just use the
+            original parameter names.
+        read_args : dict, optional
+            Arguments to pass to ``read_samples``.
+        write_args : dict, optional
+            Arguments to pass to ``write_samples``.
+        """
+        # select the samples to copy
+        logging.info("Reading samples to copy")
+        if parameters is None:
+            parameters = self.variable_params
+        # if list of desired parameters is different, rename
+        if set(parameters) != set(self.variable_params):
+            other.attrs['variable_params'] = parameters
+        samples = self.read_samples(parameters, **read_args)
+        logging.info("Copying {} samples".format(samples.size))
+        # if different parameter names are desired, get them from the samples
+        if parameter_names:
+            arrs = {pname: samples[p] for p, pname in parameter_names.items()}
+            arrs.update({p: samples[p] for p in parameters if
+                         p not in parameter_names})
+            samples = FieldArray.from_kwargs(**arrs)
+            other.attrs['variable_params'] = samples.fieldnames
+        logging.info("Writing samples")
+        other.write_samples(other, samples, **write_args)
+
+    def copy(self, other, ignore=None, parameters=None, parameter_names=None,
+             read_args=None, write_args=None):
+        """Copies metadata, info, and samples in this file to another file.
+
+        Parameters
+        ----------
+        other : str or InferenceFile
+            The file to write to. May be either a string giving a filename,
+            or an open hdf file. If the former, the file will be opened with
+            the write attribute (note that if a file already exists with that
+            name, it will be deleted).
+        ignore : (list of) strings
+            Don't copy the given groups. If the samples group is included, no
+            samples will be copied.
+        parameters : list of str, optional
+            List of parameters in the samples group to copy. If None, will copy
+            all parameters.
+        parameter_names : dict, optional
+            Rename one or more parameters to the given name. The dictionary
+            should map parameter -> parameter name. If None, will just use the
+            original parameter names.
+        read_args : dict, optional
+            Arguments to pass to ``read_samples``.
+        write_args : dict, optional
+            Arguments to pass to ``write_samples``.
+
+        Returns
+        -------
+        InferenceFile
+            The open file handler to other.
+        """
+        if not isinstance(other, h5py.File):
+            # check that we're not trying to overwrite this file
+            if other == self.name:
+                raise IOError("destination is the same as this file")
+            other = self.__class__(other, 'w')
+        # metadata
+        self.copy_metadata(other)
+        # info
+        if ignore is None:
+            ignore = []
+        if isinstance(ignore, (str, unicode)):
+            ignore = [ignore]
+        self.copy_info(other, ignore=ignore)
+        # samples
+        if self.samples_group not in ignore:
+            self.copy_samples(other, parameters=parameters,
+                              parameter_names=parameter_names,
+                              read_args=read_args,
+                              write_args=write_args)
+            # if any down selection was done, re-set the default
+            # thin-start/interval/end
+            p = self[self.samples_group].keys()[0]
+            my_shape = self[self.samples_group][p].shape
+            p = other[other.samples_group].keys()[0]
+            other_shape = other[other.samples_group][p].shape
+            if my_shape != other_shape:
+                other.attrs['thin_start'] = 0
+                other.attrs['thin_interval'] = 1
+                other.attrs['thin_end'] = None
+        return other
+
+
+def write_kwargs_to_hdf_attrs(attrs, **kwargs):
+    """Writes the given keywords to the given ``attrs``.
+
+    If any keyword argument points to a dict, the keyword will point to a
+    list of the dict's keys. Each key is then written to the attrs with its
+    corresponding value.
+
+    Parameters
+    ----------
+    attrs : an HDF attrs
+        Can be either the ``attrs`` of the hdf file, or any group in a file.
+    \**kwargs :
+        The keywords to write.
+    """
+    for arg, val in kwargs.items():
+        if val is None:
+            val = str(None)
+        if isinstance(val, dict):
+            attrs[arg] = val.keys()
+            # just call self again with the dict as kwargs
+            write_kwargs_to_hdf_attrs(attrs, **val)
+        else:
+            attrs[arg] = val
diff --git a/gwin/io/base_mcmc.py b/gwin/io/base_mcmc.py
new file mode 100644
index 0000000..f77247f
--- /dev/null
+++ b/gwin/io/base_mcmc.py
@@ -0,0 +1,251 @@
+# Copyright (C) 2016 Christopher M. Biwer, Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# self.option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+"""Provides I/O that is specific to MCMC samplers.
+"""
+
+from __future__ import absolute_import
+
+from abc import (ABCMeta, abstractmethod)
+
+import numpy
+from .base_hdf import write_kwargs_to_hdf_attrs
+
+
+class MCMCIO(object):
+    """Abstract base class that provides some IO functions for ensemble MCMCs.
+    """
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def read_acls(self):
+        """Should return all of the individual chains' acls.
+        """
+        pass
+
+    def write_samples(self, samples, parameters=None,
+                      start_iteration=None, max_iterations=None):
+        """Writes samples to the given file.
+
+        Results are written to:
+
+            ``fp[samples_group/{vararg}]``,
+
+        where ``{vararg}`` is the name of a model params. The samples are
+        written as an ``nwalkers x niterations`` array.
+
+        Parameters
+        -----------
+        samples : dict
+            The samples to write. Each array in the dictionary should have
+            shape nwalkers x niterations.
+        parameters : list, optional
+            Only write the specified parameters to the file. If None, will
+            write all of the keys in the ``samples`` dict.
+        start_iteration : int, optional
+            Write results to the file's datasets starting at the given
+            iteration. Default is to append after the last iteration in the
+            file.
+        max_iterations : int, optional
+            Set the maximum size that the arrays in the hdf file may be resized
+            to. Only applies if the samples have not previously been written
+            to file. The default (None) is to use the maximum size allowed by
+            h5py.
+        """
+        nwalkers, niterations = samples.values()[0].shape
+        assert all(p.shape == (nwalkers, niterations)
+                   for p in samples.values()), (
+               "all samples must have the same shape")
+        if max_iterations is not None and max_iterations < niterations:
+            raise IndexError("The provided max size is less than the "
+                             "number of iterations")
+        group = self.samples_group + '/{name}'
+        if parameters is None:
+            parameters = samples.keys()
+        # loop over number of dimensions
+        for param in parameters:
+            dataset_name = group.format(name=param)
+            istart = start_iteration
+            try:
+                fp_niterations = self[dataset_name].shape[-1]
+                if istart is None:
+                    istart = fp_niterations
+                istop = istart + niterations
+                if istop > fp_niterations:
+                    # resize the dataset
+                    self[dataset_name].resize(istop, axis=1)
+            except KeyError:
+                # dataset doesn't exist yet
+                if istart is not None and istart != 0:
+                    raise ValueError("non-zero start_iteration provided, "
+                                     "but dataset doesn't exist yet")
+                istart = 0
+                istop = istart + niterations
+                self.create_dataset(dataset_name, (nwalkers, istop),
+                                    maxshape=(nwalkers, max_iterations),
+                                    dtype=samples[param].dtype,
+                                    fletcher32=True)
+            self[dataset_name][:, istart:istop] = samples[param]
+
+    def read_raw_samples(self, fields,
+                         thin_start=None, thin_interval=None, thin_end=None,
+                         iteration=None, walkers=None, flatten=True):
+        """Base function for reading samples.
+
+        Parameters
+        -----------
+        fields : list
+            The list of field names to retrieve. Must be names of datasets in
+            the ``samples_group``.
+
+        Returns
+        -------
+        dict
+            A dictionary of field name -> numpy array pairs.
+        """
+        if isinstance(fields, (str, unicode)):
+            fields = [fields]
+        # walkers to load
+        if walkers is not None:
+            widx = numpy.zeros(fp.nwalkers, dtype=bool)
+            widx[walkers] = True
+        else:
+            widx = slice(0, None)
+        # get the slice to use
+        if iteration is not None:
+            get_index = iteration
+        else:
+            get_index = self.get_slice(thin_start=thin_start,
+                                       thin_end=thin_end,
+                                       thin_interval=thin_interval)
+        # load
+        group = self.samples_group + '/{name}'
+        arrays = {}
+        for name in fields:
+            arr = self[group.format(name=name)][widx, get_index]
+            if flatten:
+                arr = arr.flatten()
+            arrays[name] = arr
+        return arrays
+
+    def write_resume_point(self):
+        """Keeps a list of the number of iterations that were in a file when a
+        run was resumed from a checkpoint."""
+        try:
+            resume_pts = self.attrs["resume_points"].tolist()
+        except KeyError:
+            resume_pts = []
+        try:
+            niterations = self.niterations
+        except KeyError:
+            niterations = 0
+        resume_pts.append(niterations)
+        self.attrs["resume_points"] = resume_pts
+
+    def write_niterations(self, niterations):
+        """Writes the given number of iterations to the sampler group."""
+        self[self.sampler_group].attrs['niterations'] = niterations
+
+    @property
+    def niterations(self):
+        """Returns the number of iterations the sampler was run for."""
+        return self[self.sampler_group].attrs['niterations']
+
+    def write_sampler_metadata(self, sampler):
+        """Writes the sampler's metadata."""
+        self.attrs['sampler'] = sampler.name
+        if self.sampler_group not in self.keys():
+            # create the sampler group
+            self.create_group(self.sampler_group)
+        self[self.sampler_group].attrs['nwalkers'] = sampler.nwalkers
+        # write the model's metadata
+        sampler.model.write_metadata(self)
+
+    def write_acls(self, acls):
+        """Writes the given autocorrelation lengths.
+
+        The ACL of each parameter is saved to
+        ``[sampler_group]/acls/{param}']``.  The maximum over all the
+        parameters is saved to the file's 'acl' attribute.
+
+        Parameters
+        ----------
+        acls : dict
+            A dictionary of ACLs keyed by the parameter.
+
+        Returns
+        -------
+        ACL
+            The maximum of the acls that was written to the file.
+        """
+        group = self.sampler_group + '/acls/{}'
+        # write the individual acls
+        for param in acls:
+            try:
+                # we need to use the write_direct function because it's
+                # apparently the only way to update scalars in h5py
+                self[group.format(param)].write_direct(
+                    numpy.array(acls[param]))
+            except KeyError:
+                # dataset doesn't exist yet
+                self[group.format(param)] = acls[param]
+        # write the maximum over all params
+        acl = numpy.array(acls.values()).max()
+        self[self.sampler_group].attrs['acl'] = acl
+        # set the default thin interval to be the acl (if it is finite)
+        if numpy.isfinite(acl):
+            self.attrs['thin_interval'] = acl
+
+    def read_acls(self):
+        """Reads the acls of all the parameters.
+
+        Parameters
+        ----------
+        fp : InferenceFile
+            An open file handler to read the acls from.
+
+        Returns
+        -------
+        dict
+            A dictionary of the ACLs, keyed by the parameter name.
+        """
+        group = self[self.sampler_group]['acls']
+        return {param: group[param].value for param in group.keys()}
+
+    def write_burn_in(self, burn_in):
+        """Write the given burn-in data to the given filename."""
+        group = self[self.sampler_group]
+        group.attrs['burn_in_test'] = burn_in.burn_in_test
+        group.attrs['is_burned_in'] = burn_in.is_burned_in
+        group.attrs['burn_in_iteration'] = burn_in.burn_in_iteration
+        # set the defaut thin_start to be the burn_in_iteration
+        self.attrs['thin_start'] = burn_in.burn_in_iteration
+        # write individual test data
+        for tst in burn_in.burn_in_data:
+            key = 'burn_in_tests/{}'.format(tst)
+            try:
+                attrs = group[key].attrs
+            except KeyError:
+                group.create_group(key)
+                attrs = group[key].attrs
+            write_kwargs_to_hdf_attrs(attrs, **burn_in.burn_in_data[tst])
diff --git a/gwin/io/emcee.py b/gwin/io/emcee.py
new file mode 100644
index 0000000..8331226
--- /dev/null
+++ b/gwin/io/emcee.py
@@ -0,0 +1,75 @@
+# Copyright (C) 2018 Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# self.option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+"""Provides IO for the emcee sampler.
+"""
+
+from .base_hdf import BaseInferenceFile
+from .base_mcmc import MCMCIO
+
+
+class EmceeFile(MCMCIO, BaseInferenceFile):
+    """Class to handle file IO for the ``emcee`` sampler."""
+
+    name = 'emcee_file'
+
+    def read_acceptance_fraction(self, walkers=None):
+        """Reads the acceptance fraction.
+
+        Parameters
+        -----------
+        walkers : {None, (list of) int}
+            The walker index (or a list of indices) to retrieve. If None,
+            samples from all walkers will be obtained.
+
+        Returns
+        -------
+        array
+            Array of acceptance fractions with shape (requested walkers,).
+        """
+        group = self.sampler_group + '/acceptance_fraction'
+        if walkers is None:
+            wmask = numpy.ones(self.nwalkers, dtype=bool)
+        else:
+            wmask = numpy.zeros(self.nwalkers, dtype=bool)
+            wmask[walkers] = True
+        return self[group][wmask]
+
+    def write_acceptance_fraction(self, acceptance_fraction):
+        """Write acceptance_fraction data to file. Results are written to
+        the ``[sampler_group]/acceptance_fraction``.
+
+        Parameters
+        -----------
+        acceptance_fraction : numpy.ndarray
+            Array of acceptance fractions to write.
+        """
+        group = self.sampler_group + '/acceptance_fraction'
+        try:
+            self[group][:] = acceptance_fraction
+        except KeyError:
+            # dataset doesn't exist yet, create it
+            self[group] = acceptance_fraction
+
+    def write_posterior(self, filename, **kwargs):
+        pass
diff --git a/gwin/io/hdf.py b/gwin/io/hdf.py
deleted file mode 100644
index 1799694..0000000
--- a/gwin/io/hdf.py
+++ /dev/null
@@ -1,801 +0,0 @@
-# Copyright (C) 2016 Christopher M. Biwer
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# self.option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
-
-#
-# =============================================================================
-#
-#                                   Preamble
-#
-# =============================================================================
-#
-"""This modules defines functions for reading and writing samples that the
-inference samplers generate.
-"""
-
-import os
-import sys
-import logging
-
-import numpy
-
-import h5py
-
-from pycbc import DYN_RANGE_FAC
-from pycbc.io import FieldArray
-from pycbc.types import FrequencySeries
-from pycbc.waveform import parameters as wfparams
-
-from .. import sampler as gwin_sampler
-
-
-class _PosteriorOnlyParser(object):
-    """Provides interface for reading/writing samples from/to an InferenceFile
-    that contains flattened posterior samples.
-    """
-    @staticmethod
-    def _read_fields(fp, fields_group, fields, array_class,
-                     thin_start=None, thin_interval=None, thin_end=None,
-                     iteration=None):
-        """Reads fields from the given file.
-        """
-        if iteration is not None:
-            get_index = iteration
-        else:
-            get_index = fp.get_slice(thin_start=thin_start, thin_end=thin_end,
-                                     thin_interval=thin_interval)
-        # load
-        arrays = {}
-        group = fields_group + '/{}'
-        arrays = {field: fp[group.format(field)][get_index]
-                  for field in fields}
-        return array_class.from_kwargs(**arrays)
-
-    @classmethod
-    def read_samples(cls, fp, parameters, samples_group=None,
-                     thin_start=0, thin_end=None, thin_interval=1,
-                     iteration=None, array_class=None):
-        """Reads posterior samples from a posterior-only file.
-        """
-        # get the group to load from
-        if samples_group is None:
-            samples_group = fp.samples_group
-        # get the type of array class to use
-        if array_class is None:
-            array_class = FieldArray
-        # get the names of fields needed for the given parameters
-        possible_fields = fp[samples_group].keys()
-        loadfields = array_class.parse_parameters(parameters, possible_fields)
-        return cls._read_fields(fp, samples_group, loadfields, array_class,
-                                thin_start=thin_start,
-                                thin_interval=thin_interval, thin_end=thin_end,
-                                iteration=iteration)
-
-    @staticmethod
-    def write_samples_group(fp, samples_group, fields, samples):
-        """Writes the given samples to the given samples group.
-        """
-        for field in samples.fieldnames:
-            grp = '{}/{}'.format(samples_group, field)
-            fp[grp] = samples[field]
-
-    @classmethod
-    def n_independent_samples(cls, fp):
-        """Returns the number of independent samples stored in the file.
-        """
-        return cls.read_samples(fp, fp.variable_params[0]).size
-
-
-class InferenceFile(h5py.File):
-    """ A subclass of the h5py.File object that has extra functions for
-    handling reading and writing the samples from the samplers.
-
-    Parameters
-    -----------
-    path : str
-        The path to the HDF file.
-    mode : {None, str}
-        The mode to open the file, eg. "w" for write and "r" for read.
-    """
-    name = "hdf"
-    samples_group = 'samples'
-    stats_group = 'model_stats'
-    sampler_group = 'sampler_states'
-
-    def __init__(self, path, mode=None, **kwargs):
-        super(InferenceFile, self).__init__(path, mode, **kwargs)
-
-    @property
-    def posterior_only(self):
-        """Whether the file only contains flattened posterior samples.
-        """
-        try:
-            return self.attrs['posterior_only']
-        except KeyError:
-            return False
-
-    @property
-    def sampler_name(self):
-        """Returns the name of the sampler that was used."""
-        return self.attrs["sampler"]
-
-    @property
-    def sampler_class(self):
-        """Returns the sampler class that was used."""
-        try:
-            sampler = self.sampler_name
-        except KeyError:
-            return None
-        return gwin_sampler.samplers[sampler]
-
-    @property
-    def samples_parser(self):
-        """Returns the class to use to read/write samples from/to the file."""
-        if self.posterior_only:
-            return _PosteriorOnlyParser
-        else:
-            return self.sampler_class
-
-    @property
-    def model_name(self):
-        """Returns the name of the model that was used."""
-        return self.attrs["model"]
-
-    @property
-    def variable_params(self):
-        """Returns list of variable_params.
-
-        Returns
-        -------
-        variable_params : {list, str}
-            List of str that contain variable_params keys.
-        """
-        return self.attrs["variable_params"]
-
-    @property
-    def static_params(self):
-        """Returns a dictionary of the static_params. The keys are the argument
-        names, values are the value they were set to.
-        """
-        return {arg: self.attrs[arg] for arg in self.attrs["static_params"]}
-
-    @property
-    def sampling_params(self):
-        """Returns the parameters that were used to sample.
-
-        Returns
-        -------
-        sampling_params : {list, str}
-            List of the sampling params.
-        """
-        return self.attrs["sampling_params"]
-
-    @property
-    def lognl(self):
-        """Returns the log noise likelihood."""
-        return self.attrs["lognl"]
-
-    @property
-    def niterations(self):
-        """Returns number of iterations performed.
-
-        Returns
-        -------
-        niterations : int
-            Number of iterations performed.
-        """
-        return self.attrs["niterations"]
-
-    @property
-    def n_independent_samples(self):
-        """Returns the number of independent samples stored in the file.
-        """
-        return self.samples_parser.n_independent_samples(self)
-
-    @property
-    def burn_in_iterations(self):
-        """Returns number of iterations in the burn in.
-        """
-        return self.attrs["burn_in_iterations"]
-
-    @property
-    def is_burned_in(self):
-        """Returns whether or not the sampler is burned in.
-        """
-        return self.attrs["is_burned_in"]
-
-    @property
-    def nwalkers(self):
-        """Returns number of walkers used.
-
-        Returns
-        -------
-        nwalkesr : int
-            Number of walkers used.
-        """
-        return self.attrs["nwalkers"]
-
-    @property
-    def ntemps(self):
-        """Returns number of temperatures used."""
-        return self.attrs["ntemps"]
-
-    @property
-    def acl(self):
-        """ Returns the saved autocorelation length (ACL).
-
-        Returns
-        -------
-        acl : {int, float}
-            The ACL.
-        """
-        return self.attrs["acl"]
-
-    @property
-    def cmd(self):
-        """Returns the (last) saved command line.
-
-        If the file was created from a run that resumed from a checkpoint, only
-        the last command line used is returned.
-
-        Returns
-        -------
-        cmd : string
-            The command line that created this InferenceFile.
-        """
-        cmd = self.attrs["cmd"]
-        if isinstance(cmd, numpy.ndarray):
-            cmd = cmd[-1]
-        return cmd
-
-    @property
-    def resume_points(self):
-        """The iterations at which a run was resumed from checkpoint.
-
-        Returns
-        -------
-        resume_points : array or None
-            An array of integers giving the points at which the run resumed.
-
-        Raises
-        ------
-        KeyError
-            If the run never resumed from a checkpoint.
-        """
-        return self.attrs['resume_points']
-
-    @property
-    def log_evidence(self):
-        """Returns the log of the evidence and its error, if they exist in the
-        file. Raises a KeyError otherwise.
-        """
-        return self.attrs["log_evidence"], self.attrs["dlog_evidence"]
-
-    def read_samples(self, parameters, samples_group=None, **kwargs):
-        """Reads samples from the file.
-
-        Parameters
-        -----------
-        parameters : (list of) strings
-            The parameter(s) to retrieve. A parameter can be the name of any
-            field in `samples_group`, a virtual field or method of
-            `FieldArray` (as long as the file contains the necessary fields
-            to derive the virtual field or method), and/or a function of
-            these.
-        samples_group : str
-            Group in HDF InferenceFile that parameters belong to.
-        **kwargs :
-            The rest of the keyword args are passed to the sampler's
-            `read_samples` method.
-
-        Returns
-        -------
-        FieldArray
-            Samples for the given parameters, as an instance of a
-            FieldArray.
-        """
-        # get the appropriate sampler class
-        samples_group = samples_group if samples_group else self.samples_group
-        return self.samples_parser.read_samples(self, parameters,
-                                                samples_group=samples_group,
-                                                **kwargs)
-
-    def read_model_stats(self, **kwargs):
-        """Reads model stats from self.
-
-        Parameters
-        -----------
-        **kwargs :
-            The keyword args are passed to the sampler's
-            ``read_model_stats`` method.
-
-        Returns
-        -------
-        stats : {FieldArray, None}
-            Likelihood stats in the file, as a FieldArray. The fields of the
-            array are the names of the stats that are in the ``model_stats``
-            group.
-        """
-        parameters = self[self.stats_group].keys()
-        return self.read_samples(parameters, samples_group=self.stats_group,
-                                 **kwargs)
-
-    def read_acceptance_fraction(self, **kwargs):
-        """Returns the acceptance fraction that was written to the file.
-
-        Parameters
-        ----------
-        **kwargs :
-            All keyword arguments are passed to the sampler's
-            `read_acceptance_fraction` function.
-        Returns
-        -------
-        numpy.array
-            The acceptance fraction.
-        """
-        return self.sampler_class.read_acceptance_fraction(self, **kwargs)
-
-    def read_acls(self):
-        """Returns all of the individual chains' acls. See the `read_acls`
-        function of this file's sampler for more details.
-        """
-        return self.sampler_class.read_acls(self)
-
-    def read_label(self, parameter, error_on_none=False):
-        """Returns the label for the parameter.
-
-        Parameters
-        -----------
-        parameter : str
-            Name of parameter to get a label for. Will first try to retrieve
-            a label from this file's "label" attributes. If the parameter
-            is not found there, will look for a label from
-            pycbc.waveform.parameters.
-        error_on_none : {False, bool}
-            If True, will raise a ValueError if a label cannot be found, or if
-            the label is None. Otherwise, the parameter will just be returned
-            if no label can be found.
-
-        Returns
-        -------
-        label : str
-            A formatted string for the name of the paramter.
-        """
-        # get label
-        try:
-            label = self[parameter].attrs["label"]
-        except KeyError:
-            # try looking in pycbc.waveform.parameters
-            try:
-                label = getattr(wfparams, parameter).label
-            except AttributeError:
-                label = None
-        if label is None:
-            if error_on_none:
-                raise ValueError("Cannot find a label for paramter %s" % (
-                    parameter))
-            else:
-                return parameter
-        return label
-
-    def read_random_state(self, group=None):
-        """ Reads the state of the random number generator from the file.
-
-        Parameters
-        ----------
-        group : str
-            Name of group to read random state from.
-
-        Returns
-        -------
-        tuple
-            A tuple with 5 elements that can be passed to numpy.set_state.
-        """
-        group = self.sampler_group if group is None else group
-        dataset_name = "/".join([group, "random_state"])
-        arr = self[dataset_name][:]
-        s = self[dataset_name].attrs["s"]
-        pos = self[dataset_name].attrs["pos"]
-        has_gauss = self[dataset_name].attrs["has_gauss"]
-        cached_gauss = self[dataset_name].attrs["cached_gauss"]
-        return s, arr, pos, has_gauss, cached_gauss
-
-    def write_strain(self, strain_dict, group=None):
-        """Writes strain for each IFO to file.
-
-        Parameters
-        -----------
-        strain : {dict, FrequencySeries}
-            A dict of FrequencySeries where the key is the IFO.
-        group : {None, str}
-            The group to write the strain to. If None, will write to the top
-            level.
-        """
-        subgroup = "{ifo}/strain"
-        if group is None:
-            group = subgroup
-        else:
-            group = '/'.join([group, subgroup])
-        for ifo, strain in strain_dict.items():
-            self[group.format(ifo=ifo)] = strain
-            self[group.format(ifo=ifo)].attrs['delta_t'] = strain.delta_t
-            self[group.format(ifo=ifo)].attrs['start_time'] = \
-                float(strain.start_time)
-
-    def write_stilde(self, stilde_dict, group=None):
-        """Writes stilde for each IFO to file.
-
-        Parameters
-        -----------
-        stilde : {dict, FrequencySeries}
-            A dict of FrequencySeries where the key is the IFO.
-        group : {None, str}
-            The group to write the strain to. If None, will write to the top
-            level.
-        """
-        subgroup = "{ifo}/stilde"
-        if group is None:
-            group = subgroup
-        else:
-            group = '/'.join([group, subgroup])
-        for ifo, stilde in stilde_dict.items():
-            self[group.format(ifo=ifo)] = stilde
-            self[group.format(ifo=ifo)].attrs['delta_f'] = stilde.delta_f
-            self[group.format(ifo=ifo)].attrs['epoch'] = float(stilde.epoch)
-
-    def write_psd(self, psds, low_frequency_cutoff, group=None):
-        """Writes PSD for each IFO to file.
-
-        Parameters
-        -----------
-        psds : {dict, FrequencySeries}
-            A dict of FrequencySeries where the key is the IFO.
-        low_frequency_cutoff : {dict, float}
-            A dict of the low-frequency cutoff where the key is the IFO. The
-            minimum value will be stored as an attr in the File.
-        group : {None, str}
-            The group to write the strain to. If None, will write to the top
-            level.
-        """
-        subgroup = "{ifo}/psds/0"
-        if group is None:
-            group = subgroup
-        else:
-            group = '/'.join([group, subgroup])
-        self.attrs["low_frequency_cutoff"] = min(low_frequency_cutoff.values())
-        for ifo in psds:
-            self[group.format(ifo=ifo)] = psds[ifo]
-            self[group.format(ifo=ifo)].attrs['delta_f'] = psds[ifo].delta_f
-
-    def write_data(self, strain_dict=None, stilde_dict=None,
-                   psd_dict=None, low_frequency_cutoff_dict=None,
-                   group=None):
-        """Writes the strain/stilde/psd.
-
-        Parameters
-        ----------
-        strain_dict : {None, dict}
-            A dictionary of strains. If None, no strain will be written.
-        stilde_dict : {None, dict}
-            A dictionary of stilde. If None, no stilde will be written.
-        psd_dict : {None, dict}
-            A dictionary of psds. If None, no psds will be written.
-        low_freuency_cutoff_dict : {None, dict}
-            A dictionary of low frequency cutoffs used for each detector in
-            `psd_dict`; must be provided if `psd_dict` is not None.
-        group : {None, str}
-            The group to write the strain to. If None, will write to the top
-            level.
-        """
-        # save PSD
-        if psd_dict is not None:
-            if low_frequency_cutoff_dict is None:
-                raise ValueError("must provide low_frequency_cutoff_dict if "
-                                 "saving psds to output")
-            # apply dynamic range factor for saving PSDs since
-            # plotting code expects it
-            psd_dyn_dict = {}
-            for key, val in psd_dict.iteritems():
-                psd_dyn_dict[key] = FrequencySeries(val*DYN_RANGE_FAC**2,
-                                                    delta_f=val.delta_f)
-            self.write_psd(psds=psd_dyn_dict,
-                           low_frequency_cutoff=low_frequency_cutoff_dict,
-                           group=group)
-
-        # save stilde
-        if stilde_dict is not None:
-            self.write_stilde(stilde_dict, group=group)
-
-        # save strain if desired
-        if strain_dict is not None:
-            self.write_strain(strain_dict, group=group)
-
-    def write_injections(self, injection_file, ifo):
-        """ Writes injection parameters for an IFO to file.
-
-        Parameters
-        ----------
-        injection_file : str
-            Path to HDF injection file.
-        ifo : str
-            IFO name.
-        """
-        subgroup = "{ifo}/injections"
-        self.create_group(subgroup.format(ifo=ifo))
-        try:
-            with h5py.File(injection_file, "r") as fp:
-                for param in fp.keys():
-                    self[subgroup.format(ifo=ifo)][param] = fp[param][:]
-                for key in fp.attrs.keys():
-                    self[subgroup.format(ifo=ifo)].attrs[key] = fp.attrs[key]
-        except IOError:
-            logging.warn("Could not read %s as an HDF file", injection_file)
-
-    def write_command_line(self):
-        """Writes command line to attributes.
-
-        The command line is written to the file's ``attrs['cmd']``. If this
-        attribute already exists in the file (this can happen when resuming
-        from a checkpoint), ``attrs['cmd']`` will be a list storing the current
-        command line and all previous command lines.
-        """
-        cmd = [" ".join(sys.argv)]
-        try:
-            previous = self.attrs["cmd"]
-            if isinstance(previous, str):
-                # convert to list
-                previous = [previous]
-            elif isinstance(previous, numpy.ndarray):
-                previous = previous.tolist()
-        except KeyError:
-            previous = []
-        self.attrs["cmd"] = cmd + previous
-
-    def write_resume_point(self):
-        """Keeps a list of the number of iterations that were in a file when a
-        run was resumed from a checkpoint."""
-        try:
-            resume_pts = self.attrs["resume_points"].tolist()
-        except KeyError:
-            resume_pts = []
-        try:
-            niterations = self.niterations
-        except KeyError:
-            niterations = 0
-        resume_pts.append(niterations)
-        self.attrs["resume_points"] = resume_pts
-
-    def write_random_state(self, group=None, state=None):
-        """ Writes the state of the random number generator from the file.
-
-        Parameters
-        ----------
-        group : str
-            Name of group to read random state to.
-        state : tuple, optional
-            Specify the random state to write. If None, will use
-            ``numpy.random.get_state()``.
-        """
-        group = self.sampler_group if group is None else group
-        dataset_name = "/".join([group, "random_state"])
-        if state is None:
-            state = numpy.random.get_state()
-        s, arr, pos, has_gauss, cached_gauss = state
-        if group in self:
-            self[dataset_name][:] = arr
-        else:
-            self.create_dataset(dataset_name, arr.shape, fletcher32=True,
-                                dtype=arr.dtype)
-            self[dataset_name][:] = arr
-        self[dataset_name].attrs["s"] = s
-        self[dataset_name].attrs["pos"] = pos
-        self[dataset_name].attrs["has_gauss"] = has_gauss
-        self[dataset_name].attrs["cached_gauss"] = cached_gauss
-
-    def get_slice(self, thin_start=None, thin_interval=None, thin_end=None):
-        """Formats a slice using the given arguments that can be used to
-        retrieve a thinned array from an InferenceFile.
-
-        Parameters
-        ----------
-        thin_start : {None, int}
-            The starting index to use. If None, will try to retrieve the
-            `burn_in_iterations` from the given file. If no
-            `burn_in_iterations` exists, will default to the start of the
-            array.
-        thin_interval : {None, int}
-            The interval to use. If None, will try to retrieve the acl from the
-            given file. If no acl attribute exists, will default to 1.
-        thin_end : {None, int}
-            The end index to use. If None, will retrieve to the end of the
-            array.
-
-        Returns
-        -------
-        slice :
-            The slice needed.
-        """
-
-        # default is to skip burn in samples
-        if thin_start is None:
-            try:
-                thin_start = self.burn_in_iterations
-                # if the sampler hasn't burned in, the burn_in_iterations will
-                # be the same as the number of iterations, which would result
-                # in 0 samples. In that case, just use the last one
-                if thin_start == self.niterations:
-                    thin_start = thin_start - 1
-            except KeyError:
-                pass
-
-        # default is to use stored ACL and accept every i-th sample
-        if thin_interval is None:
-            try:
-                thin_interval = int(numpy.ceil(self.acl))
-            except KeyError:
-                pass
-        return slice(thin_start, thin_end, thin_interval)
-
-    def copy_metadata(self, other):
-        """Copies all metadata from this file to the other file.
-
-        Metadata is defined as all data that is not in either the samples or
-        stats group.
-
-        Parameters
-        ----------
-        other : InferenceFile
-            An open inference file to write the data to.
-        """
-        logging.info("Copying metadata")
-        # copy non-samples/stats data
-        for key in self.keys():
-            if key not in [self.samples_group, self.stats_group]:
-                super(InferenceFile, self).copy(key, other)
-        # copy attributes
-        for key in self.attrs.keys():
-            other.attrs[key] = self.attrs[key]
-
-    def copy(self, other, parameters=None, parameter_names=None,
-             posterior_only=False, **kwargs):
-        """Copies data in this file to another file.
-
-        The samples and stats to copy may be down selected using the given
-        kwargs. All other data (the "metadata") are copied exactly.
-
-        Parameters
-        ----------
-        other : str or InferenceFile
-            The file to write to. May be either a string giving a filename,
-            or an open hdf file. If the former, the file will be opened with
-            the write attribute (note that if a file already exists with that
-            name, it will be deleted).
-        parameters : list of str, optional
-            List of parameters to copy. If None, will copy all parameters.
-        parameter_names : dict, optional
-            Rename one or more parameters to the given name. The dictionary
-            should map parameter -> parameter name. If None, will just use the
-            original parameter names.
-        posterior_only : bool, optional
-            Write the samples and model stats as flattened arrays, and
-            set other's posterior_only attribute. For example, if this file
-            has a parameter's samples written to
-            `{samples_group}/{param}/walker{x}`, then other will have all of
-            the selected samples from all walkers written to
-            `{samples_group}/{param}/`.
-        **kwargs :
-            All other keyword arguments are passed to `read_samples`.
-
-        Returns
-        -------
-        InferenceFile
-            The open file handler to other.
-        """
-        if not isinstance(other, h5py.File):
-            # check that we're not trying to overwrite this file
-            if other == self.name:
-                raise IOError("destination is the same as this file")
-            other = InferenceFile(other, 'w')
-        # copy metadata over
-        self.copy_metadata(other)
-        # update other's posterior attribute
-        if posterior_only:
-            other.attrs['posterior_only'] = posterior_only
-        # select the samples to copy
-        logging.info("Reading samples to copy")
-        if parameters is None:
-            parameters = self.variable_params
-        # if list of desired parameters is different, rename model params
-        if set(parameters) != set(self.variable_params):
-            other.attrs['variable_params'] = parameters
-        # if only the posterior is desired, we'll flatten the results
-        if not posterior_only and not self.posterior_only:
-            kwargs['flatten'] = False
-        samples = self.read_samples(parameters, **kwargs)
-        logging.info("Copying {} samples".format(samples.size))
-        # if different parameter names are desired, get them from the samples
-        if parameter_names:
-            arrs = {pname: samples[p] for p, pname in parameter_names.items()}
-            arrs.update({p: samples[p] for p in parameters if
-                         p not in parameter_names})
-            samples = FieldArray.from_kwargs(**arrs)
-            other.attrs['variable_params'] = samples.fieldnames
-        logging.info("Writing samples")
-        other.samples_parser.write_samples_group(other, self.samples_group,
-                                                 samples.fieldnames, samples)
-        # do the same for the model stats
-        logging.info("Reading stats to copy")
-        stats = self.read_model_stats(**kwargs)
-        logging.info("Writing stats")
-        other.samples_parser.write_samples_group(other, self.stats_group,
-                                                 stats.fieldnames, stats)
-        # if any down selection was done, re-set the burn in iterations and
-        # the acl, and the niterations.
-        # The last dimension of the samples returned by the sampler should
-        # be the number of iterations.
-        if samples.shape[-1] != self.niterations:
-            other.attrs['acl'] = 1
-            other.attrs['burn_in_iterations'] = 0
-            other.attrs['niterations'] = samples.shape[-1]
-        return other
-
-
-def check_integrity(filename):
-    """Checks the integrity of an InferenceFile.
-
-    Checks done are:
-
-        * can the file open?
-        * do all of the datasets in the samples group have the same shape?
-        * can the first and last sample in all of the datasets in the samples
-          group be read?
-
-    If any of these checks fail, an IOError is raised.
-
-    Parameters
-    ----------
-    filename: str
-        Name of an InferenceFile to check.
-
-    Raises
-    ------
-    ValueError
-        If the given file does not exist.
-    KeyError
-        If the samples group does not exist.
-    IOError
-        If any of the checks fail.
-    """
-    # check that the file exists
-    if not os.path.exists(filename):
-        raise ValueError("file {} does not exist".format(filename))
-    # if the file is corrupted such that it cannot be opened, the next line
-    # will raise an IOError
-    with InferenceFile(filename, 'r') as fp:
-        # check that all datasets in samples have the same shape
-        parameters = fp[fp.samples_group].keys()
-        group = fp.samples_group + '/{}'
-        # use the first parameter as a reference shape
-        ref_shape = fp[group.format(parameters[0])].shape
-        if not all(fp[group.format(param)].shape == ref_shape
-                   for param in parameters):
-            raise IOError("not all datasets in the samples group have the "
-                          "same shape")
-        # check that we can read the first/last sample
-        firstidx = tuple([0]*len(ref_shape))
-        lastidx = tuple([-1]*len(ref_shape))
-        for param in parameters:
-            fp[group.format(param)][firstidx]
-            fp[group.format(param)][lastidx]
diff --git a/gwin/models/base.py b/gwin/models/base.py
index f4c4378..e15dc57 100644
--- a/gwin/models/base.py
+++ b/gwin/models/base.py
@@ -34,6 +34,7 @@
 from pycbc.io import FieldArray
 from pycbc.workflow import ConfigParser
 
+from gwin.io.base_hdf import write_kwargs_to_hdf_attrs
 
 #
 # =============================================================================
@@ -742,3 +743,16 @@ def from_config(cls, cp, **kwargs):
         args['sampling_transforms'] = sampling_transforms
         args.update(kwargs)
         return cls(**args)
+
+    def write_metadata(self, fp):
+        """Writes metadata to the given file handler.
+
+        Parameters
+        ----------
+        fp : gwin.io.BaseInferenceFile instance
+            The inference file to write to.
+        """
+        fp.attrs['model'] = self.name
+        fp.attrs['variable_params'] = list(self.variable_params)
+        fp.attrs['sampling_params'] = list(self.sampling_params)
+        write_kwargs_to_hdf_attrs(fp.attrs, static_params=self.static_params)
diff --git a/gwin/models/base_data.py b/gwin/models/base_data.py
index 0c2095e..ebb5723 100644
--- a/gwin/models/base_data.py
+++ b/gwin/models/base_data.py
@@ -150,6 +150,11 @@ def data(self):
         """Returns the data that was set."""
         return self._data
 
+    @property
+    def detectors(self):
+        """Returns the detectors used."""
+        return self._data.keys()
+
     def _transform_params(self, **params):
         """Adds waveform transforms to parent's ``_transform_params``."""
         params = super(BaseDataModel, self)._transform_params(**params)
@@ -231,3 +236,14 @@ def from_config(cls, cp, data, delta_f=None, delta_t=None,
         args['waveform_generator'] = waveform_generator
 
         return cls(**args)
+
+    def write_metadata(self, fp):
+        """Adds data to the metadata that's written.
+
+        Parameters
+        ----------
+        fp : gwin.io.BaseInferenceFile instance
+            The inference file to write to.
+        """
+        super(BaseDataModel, self).write_metadata(fp)
+        fp.write_stilde(self.data)
diff --git a/gwin/models/gaussian_noise.py b/gwin/models/gaussian_noise.py
index a2279de..81dfb4e 100644
--- a/gwin/models/gaussian_noise.py
+++ b/gwin/models/gaussian_noise.py
@@ -244,6 +244,7 @@ def __init__(self, variable_params, data, waveform_generator,
         d = data.values()[0]
         N = len(d)
         # figure out the kmin, kmax to use
+        self._f_lower = f_lower
         kmin, kmax = filter.get_cutoff_indices(f_lower, f_upper, d.delta_f,
                                                (N-1)*2)
         self._kmin = kmin
@@ -252,9 +253,12 @@ def __init__(self, variable_params, data, waveform_generator,
             norm = 4*d.delta_f
         # we'll store the weight to apply to the inner product
         if psds is None:
+            self._psds = None
             w = Array(numpy.sqrt(norm)*numpy.ones(N))
             self._weight = {det: w for det in data}
         else:
+            # store a copy of the psds
+            self._psds = {ifo: d.copy() for (ifo, d) in psds.items()}
             # temporarily suppress numpy divide by 0 warning
             numpysettings = numpy.seterr(divide='ignore')
             self._weight = {det: Array(numpy.sqrt(norm/psds[det]))
@@ -432,3 +436,27 @@ def det_optimal_snrsq(self, det):
             self.loglr
             # now try returning again
             return getattr(self._current_stats, '{}_optimal_snrsq'.format(det))
+
+    def write_metadata(self, fp):
+        """Adds writing the psds and lognl, since it's a constant.
+
+        The lognl is written to the sample group's ``attrs``.
+
+        Parameters
+        ----------
+        fp : gwin.io.BaseInferenceFile instance
+            The inference file to write to.
+        """
+        super(GaussianNoise, self).write_metadata(fp)
+        fp.attrs['f_lower'] = self._f_lower
+        if self._psds is not None:
+            fp.write_psd(self._psds)
+        try:
+            attrs = fp[fp.samples_group].attrs
+        except KeyError:
+            # group doesn't exist, create it
+            fp.create_group(fp.samples_group)
+            attrs = fp[fp.samples_group].attrs
+        attrs['lognl'] = self.lognl
+        for det in self.detectors:
+            attrs['{}_lognl'.format(det)] = self.det_lognl(det)
diff --git a/gwin/option_utils.py b/gwin/option_utils.py
index 5fe539e..47ff79c 100644
--- a/gwin/option_utils.py
+++ b/gwin/option_utils.py
@@ -191,86 +191,6 @@ def sampler_from_cli(opts, model, pool=None):
 #
 # -----------------------------------------------------------------------------
 
-def validate_checkpoint_files(checkpoint_file, backup_file):
-    """Checks if the given checkpoint and/or backup files are valid.
-
-    The checkpoint file is considered valid if:
-
-        * it passes all tests run by ``InferenceFile.check_integrity``;
-        * it has at least one sample written to it (indicating at least one
-          checkpoint has happened).
-
-    The same applies to the backup file. The backup file must also have the
-    same number of samples as the checkpoint file, otherwise, the backup is
-    considered invalid.
-
-    If the checkpoint (backup) file is found to be valid, but the backup
-    (checkpoint) file is not valid, then the checkpoint (backup) is copied to
-    the backup (checkpoint). Thus, this function ensures that checkpoint and
-    backup files are either both valid or both invalid.
-
-    Parameters
-    ----------
-    checkpoint_file : string
-        Name of the checkpoint file.
-    backup_file : string
-        Name of the backup file.
-
-    Returns
-    -------
-    checkpoint_valid : bool
-        Whether or not the checkpoint (and backup) file may be used for loading
-        samples.
-    """
-    # check if checkpoint file exists and is valid
-    logging.info("Validating checkpoint and backup files")
-    try:
-        check_integrity(checkpoint_file)
-        checkpoint_valid = True
-    except (ValueError, KeyError, IOError):
-        checkpoint_valid = False
-    # backup file
-    try:
-        check_integrity(backup_file)
-        backup_valid = True
-    except (ValueError, KeyError, IOError):
-        backup_valid = False
-    # check if there are any samples in the file; if not, we'll just start from
-    # scratch
-    if checkpoint_valid:
-        with InferenceFile(checkpoint_file, 'r') as fp:
-            try:
-                group = '{}/{}'.format(fp.samples_group, fp.variable_params[0])
-                nsamples = fp[group].size
-                checkpoint_valid = nsamples != 0
-            except KeyError:
-                checkpoint_valid = False
-    # check if there are any samples in the backup file
-    if backup_valid:
-        with InferenceFile(backup_file, 'r') as fp:
-            try:
-                group = '{}/{}'.format(fp.samples_group, fp.variable_params[0])
-                backup_nsamples = fp[group].size
-                backup_valid = backup_nsamples != 0
-            except KeyError:
-                backup_valid = False
-    # check that the checkpoint and backup have the same number of samples;
-    # if not, assume the checkpoint has the correct number
-    if checkpoint_valid and backup_valid:
-        backup_valid = nsamples == backup_nsamples
-    # decide what to do based on the files' statuses
-    if checkpoint_valid and not backup_valid:
-        # copy the checkpoint to the backup
-        logging.info("Backup invalid; copying checkpoint file")
-        shutil.copy(checkpoint_file, backup_file)
-        backup_valid = True
-    elif backup_valid and not checkpoint_valid:
-        logging.info("Checkpoint invalid; copying backup file")
-        # copy the backup to the checkpoint
-        shutil.copy(backup_file, checkpoint_file)
-        checkpoint_valid = True
-    return checkpoint_valid
-
 
 def add_low_frequency_cutoff_opt(parser):
     """Adds the low-frequency-cutoff option to the given parser."""
@@ -325,7 +245,6 @@ def data_from_cli(opts):
                                              precision="double")
     # apply gates if not waiting to overwhiten
     if not opts.gate_overwhitened:
-        logging.info("Applying gates to strain data")
         strain_dict = apply_gates_to_td(strain_dict, gates)
 
     # get strain time series to use for PSD estimation
@@ -350,7 +269,6 @@ def data_from_cli(opts):
 
     # FFT strain and save each of the length of the FFT, delta_f, and
     # low frequency cutoff to a dict
-    logging.info("FFT strain")
     stilde_dict = {}
     length_dict = {}
     delta_f_dict = {}
diff --git a/gwin/sampler/__init__.py b/gwin/sampler/__init__.py
index 6154aee..aa7cf3a 100644
--- a/gwin/sampler/__init__.py
+++ b/gwin/sampler/__init__.py
@@ -17,14 +17,43 @@
 This modules provides a list of implemented samplers for parameter estimation.
 """
 
-from .kombine import KombineSampler
-from .emcee import (EmceeEnsembleSampler, EmceePTSampler)
-from .mcmc import MCMCSampler
+from __future__ import absolute_import
+
+from .base import (initial_dist_from_config, create_new_output_file)
+# from .kombine import KombineSampler
+from .emcee import EmceeEnsembleSampler
+# from .emcee_pt import EmceePTSampler
+# from .mcmc import MCMCSampler
 
 # list of available samplers
 samplers = {cls.name: cls for cls in (
-    KombineSampler,
+    # KombineSampler,
     EmceeEnsembleSampler,
-    EmceePTSampler,
-    MCMCSampler,
+    # EmceePTSampler,
+    # MCMCSampler,
 )}
+
+
+def load_from_config(cp, model, **kwargs):
+    """Loads a sampler from the given config file.
+
+    This looks for a name in the section ``[sampler]`` to determine which
+    sampler class to load. That sampler's ``from_config`` is then called.
+
+    Parameters
+    ----------
+    cp : WorkflowConfigParser
+        Config parser to read from.
+    model : gwin.model
+        Which model to pass to the sampler.
+    \**kwargs :
+        All other keyword arguments are passed directly to the sampler's
+        ``from_config`` file.
+
+    Returns
+    -------
+    sampler :
+        The initialized sampler.
+    """
+    name = cp.get('sampler', 'name')
+    return samplers[name].from_config(cp, model, **kwargs)
diff --git a/gwin/sampler/base.py b/gwin/sampler/base.py
index 3601c5b..41bc2b0 100644
--- a/gwin/sampler/base.py
+++ b/gwin/sampler/base.py
@@ -22,47 +22,50 @@
 # =============================================================================
 #
 """
-This modules provides classes and functions for using different sampler
-packages for parameter estimation.
+Defines the base sampler class to be inherited by all samplers.
 """
 
+from abc import ABCMeta, abstractmethod, abstractproperty
+import os
 import numpy
+import shutil
 from pycbc.io import FieldArray
 from pycbc.filter import autocorrelation
 import h5py
 import logging
 
+from ..io import validate_checkpoint_files
 
 #
 # =============================================================================
 #
-#                                   Samplers
+#                           Base Sampler definition
 #
 # =============================================================================
 #
 
-class _BaseSampler(object):
-    """Base container class for running the inference sampler that will
-    generate the posterior distributions.
+
+class BaseSampler(object):
+    """Base container class for inference samplers.
 
     Parameters
     ----------
     model : Model
         An instance of a model from ``gwin.models``.
     """
+    __metaclass__ = ABCMeta
     name = None
 
     def __init__(self, model):
         self.model = model
-        self.lastclear = 0
 
-    @classmethod
-    def from_cli(cls, opts, model, pool=None,
-                 model_call=None):
-        """This function create an instance of this sampler from the given
-        command-line options.
+    # @classmethod <--uncomment when we move to python 3.3
+    @abstractmethod
+    def from_config(cls, cp, model, nprocesses=1, use_mpi=False,
+                    **kwargs):
+        """This should initialize the sampler given a config file.
         """
-        raise NotImplementedError("from_cli function not set")
+        pass
 
     @property
     def variable_params(self):
@@ -72,841 +75,196 @@ def variable_params(self):
 
     @property
     def sampling_params(self):
-        """Returns the sampling args used by the model.
+        """Returns the sampling params used by the model.
         """
         return self.model.sampling_params
 
     @property
-    def chain(self):
-        """This function should return the past samples as a
-        [additional dimensions x] niterations x ndim array, where ndim are the
-        number of model params, niterations the number of iterations, and
-        additional dimeionions are any additional dimensions used by the
-        sampler (e.g, walkers, temperatures).
+    def static_params(self):
+        """Returns the model's fixed parameters.
         """
-        return NotImplementedError("chain function not set.")
+        return self.model.static_params
 
-    @property
+    @abstractproperty
     def samples(self):
-        """This function should return the past samples as a [additional
-        dimensions x] niterations field array, where the fields are union
-        of the sampling args and the model params.
-        """
-        return NotImplementedError("samples function not set.")
+        """A dict mapping variable_params to arrays of samples currently
+        in memory. The dictionary may also contain sampling_params.
 
-    @property
-    def clear_chain(self):
-        """This function should clear the current chain of samples from memory.
+        The sample arrays may have any shape, and may or may not be thinned.
         """
-        return NotImplementedError("clear chain function not set.")
+        pass
 
-    @property
-    def niterations(self):
-        """Get the current number of iterations."""
-        return self.chain.shape[-2] + self.lastclear
-
-    @property
-    def acceptance_fraction(self):
-        """This function should return the fraction of steps accepted by each
-        walker as an array.
-        """
-        return NotImplementedError("acceptance_fraction function not set.")
-
-    @property
-    def lnpost(self):
-        """This function should return the natural logarithm of the likelihood
-        function used by the sampler as an
-        [additional dimensions] x niterations array.
-        """
-        return NotImplementedError("lnpost function not set.")
-
-    @property
+    @abstractproperty
     def model_stats(self):
-        """This function should return the prior and likelihood ratio of
-        samples as an [additional dimensions] x niterations
-        array. If the model did not return that info to the
-        sampler, it should return None.
-        """
-        return NotImplementedError("model stats not set")
+        """A dict mapping model's metadata fields to arrays of values for
+        each sample in ``raw_samples``.
 
-    def burn_in(self, initial_values):
-        """This function should burn in the sampler.
+        The arrays may have any shape, and may or may not be thinned.
         """
-        raise NotImplementedError("This sampler has no burn_in function.")
+        pass
 
-    def run(self, niterations):
+    @abstractmethod
+    def run(self):
         """This function should run the sampler.
-        """
-        raise NotImplementedError("run function not set.")
 
-    @classmethod
-    def calculate_logevidence(cls, fp):
-        """This function should calculate the log evidence and its error using
-        the results in the given file. If the sampler does not support evidence
-        calculation, then this will raise a NotImplementedError.
+        Any checkpointing should be done internally in this function.
         """
-        raise NotImplementedError("this sampler does not support evidence "
-                                  "calculation")
+        pass
 
-    # write and read functions
-    def write_metadata(self, fp, **kwargs):
-        """Writes metadata about this sampler to the given file. Metadata is
-        written to the file's `attrs`.
+    @abstractproperty
+    def io(self):
+        """A class that inherits from ``BaseInferenceFile`` to handle IO with
+        an hdf file.
 
-        Parameters
-        ----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        **kwargs :
-            All keyword arguments are saved as separate arguments in the
-            file attrs. If any keyword argument is a dictionary, the keyword
-            will point to the list of keys in the the file's ``attrs``. Each
-            key is then stored as a separate attr with its corresponding value.
-        """
-        fp.attrs['sampler'] = self.name
-        fp.attrs['model'] = self.model.name
-        fp.attrs['variable_params'] = list(self.variable_params)
-        fp.attrs['sampling_params'] = list(self.sampling_params)
-        fp.attrs["niterations"] = self.niterations
-        try:
-            fp.attrs["lognl"] = self.model.lognl
-        except AttributeError:
-            pass
-        for arg, val in kwargs.items():
-            if val is None:
-                val = str(None)
-            if isinstance(val, dict):
-                fp.attrs[arg] = val.keys()
-                for key, item in val.items():
-                    if item is None:
-                        item = str(None)
-                    fp.attrs[key] = item
-            else:
-                fp.attrs[arg] = val
-
-    @staticmethod
-    def write_logevidence(fp, lnz, dlnz):
-        """Writes the given log evidence and its error to the given file.
-        Results are saved to the file's 'log_evidence' and 'dlog_evidence'
-        attributes.
-
-        Parameters
-        ----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        lnz : float
-            The log of the evidence.
-        dlnz : float
-            The error in the estimate of the log evidence.
+        This should be a class, not an instance of class, so that the sampler
+        can initialize it when needed.
         """
-        fp.attrs['log_evidence'] = lnz
-        fp.attrs['dlog_evidence'] = dlnz
+        pass
 
-    @staticmethod
-    def write_burn_in_iterations(fp, burn_in_iterations, is_burned_in=None):
-        """Writes the burn in iterations to the given file.
+    @abstractmethod
+    def set_initial_conditions(self, initial_distribution=None,
+                               samples_file=None):
+        """Sets up the starting point for the sampler.
 
-        Parameters
-        ----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        burn_in_iterations : array
-            Array of values giving the iteration of the burn in of each walker.
-        is_burned_in : array
-            Array of booleans indicating which chains are burned in.
+        Should also set the sampler's random state.
         """
-        try:
-            fp['burn_in_iterations'][:] = burn_in_iterations
-        except KeyError:
-            fp['burn_in_iterations'] = burn_in_iterations
-        fp.attrs['burn_in_iterations'] = burn_in_iterations.max()
-        if is_burned_in is not None:
-            try:
-                fp['is_burned_in'][:] = is_burned_in
-            except KeyError:
-                fp['is_burned_in'] = is_burned_in
-            fp.attrs['is_burned_in'] = is_burned_in.all()
-
-    @staticmethod
-    def write_state(fp):
-        """Saves the state of the sampler in a file.
-        """
-        fp.write_random_state()
+        pass
 
-    @staticmethod
-    def set_state_from_file(fp):
-        """Sets the state of the sampler back to the instance saved in a file.
+    @abstractmethod
+    def checkpoint(self):
+        """The sampler must have a checkpoint method for dumping raw samples
+        and stats to the file type defined by ``io``.
         """
-        numpy.random.set_state(fp.read_random_state())
+        pass
 
+    @abstractmethod
+    def finalize(self):
+        """Do any finalization to the samples file before exiting."""
+        pass
 
-class BaseMCMCSampler(_BaseSampler):
-    """This class is used to construct the MCMC sampler from the kombine-like
-    packages.
+    def setup_output(self, output_file, force=False, injection_file=None):
+        """Sets up the sampler's checkpoint and output files.
 
-    Parameters
-    ----------
-    sampler : sampler instance
-        An instance of an MCMC sampler similar to kombine or emcee.
-    model : model class
-        A model from ``gwin.models``.
+        The checkpoint file has the same name as the output file, but with
+        ``.checkpoint`` appended to the name. A backup file will also be
+        created.
 
-    Attributes
-    ----------
-    sampler :
-        The MCMC sampler instance used.
-    p0 : nwalkers x ndim array
-        The initial position of the walkers. Set by using set_p0. If not set
-        yet, a ValueError is raised when the attribute is accessed.
-    pos : {None, array}
-        An array of the current walker positions.
-    """
-    name = None
-
-    def __init__(self, sampler, model):
-        self._sampler = sampler
-        self._pos = None
-        self._p0 = None
-        self._currentblob = None
-        self._nwalkers = None
-        self.lastclear = 0
-        self.burn_in_iterations = None
-        # initialize
-        super(BaseMCMCSampler, self).__init__(model)
-
-    @property
-    def sampler(self):
-        return self._sampler
-
-    @property
-    def pos(self):
-        return self._pos
-
-    def set_p0(self, samples_file=None, prior=None):
-        """Sets the initial position of the walkers.
+        If the output file already exists, an ``OSError`` will be raised.
+        This can be overridden by setting ``force`` to ``True``.
 
         Parameters
         ----------
-        samples_file : InferenceFile, optional
-            If provided, use the last iteration in the given file for the
-            starting positions.
-        prior : JointDistribution, optional
-            Use the given prior to set the initial positions rather than
-            ``model``'s prior.
-
-        Returns
-        -------
-        p0 : array
-            An nwalkers x ndim array of the initial positions that were set.
-        """
-        # create a (nwalker, ndim) array for initial positions
-        nwalkers = self.nwalkers
-        ndim = len(self.variable_params)
-        p0 = numpy.ones((nwalkers, ndim))
-        # if samples are given then use those as initial positions
-        if samples_file is not None:
-            samples = self.read_samples(samples_file, self.variable_params,
-                                        iteration=-1)
-            # transform to sampling parameter space
-            samples = self.model.apply_sampling_transforms(samples)
-        # draw random samples if samples are not provided
-        else:
-            samples = self.model.prior_rvs(size=nwalkers, prior=prior)
-        # convert to 2D array
-        for i, param in enumerate(self.sampling_params):
-            p0[:, i] = samples[param]
-        self._p0 = p0
-        return p0
-
-    @property
-    def p0(self):
-        if self._p0 is None:
-            raise ValueError("initial positions not set; run set_p0")
-        return self._p0
-
-    @property
-    def nwalkers(self):
-        """Get the number of walkers."""
-        return self._nwalkers
-
-    @property
-    def acceptance_fraction(self):
-        """Get the fraction of steps accepted by each walker as an array.
-        """
-        return self._sampler.acceptance_fraction
-
-    @property
-    def samples(self):
-        """Returns the samples in the chain as a FieldArray.
-
-        If the sampling args are not the same as the model params, the
-        returned samples will have both the sampling and the model params.
-
-        The returned FieldArray has dimension [additional dimensions x]
-        nwalkers x niterations.
-        """
-        # chain is a [additional dimensions x] niterations x ndim array
-        samples = self.chain
-        sampling_params = self.sampling_params
-        # convert to dictionary to apply boundary conditions
-        samples = {param: samples[..., ii] for
-                   ii, param in enumerate(sampling_params)}
-        samples = self.model.prior_distribution.apply_boundary_conditions(
-            **samples)
-        # now convert to field array
-        samples = FieldArray.from_arrays([samples[param]
-                                          for param in sampling_params],
-                                         names=sampling_params)
-        # apply transforms to go to model params space
-        if self.model.sampling_transforms is not None:
-            samples = self.model.sampling_transforms.apply(samples,
-                                                           inverse=True)
-        return samples
-
-    @property
-    def model_stats(self):
-        """Returns the model stats as a FieldArray, with field names
-        corresponding to the type of data returned by the model.
-        The returned array has shape nwalkers x niterations. If no additional
-        stats were returned to the sampler by the model, returns
-        None.
-        """
-        stats = numpy.array(self._sampler.blobs)
-        if stats.size == 0:
-            return None
-        # we'll force arrays to float; this way, if there are `None`s in the
-        # blobs, they will be changed to `nan`s
-        arrays = {field: stats[..., fi].astype(float)
-                  for fi, field in
-                  enumerate(self.model.default_stats)}
-        return FieldArray.from_kwargs(**arrays).transpose()
-
-    # write and read functions
-    def write_metadata(self, fp, **kwargs):
-        """Writes metadata about this sampler to the given file. Metadata is
-        written to the file's `attrs`.
-
-        Parameters
-        ----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        **kwargs :
-            All keyword args are written to the file's ``attrs``.
-        """
-        super(BaseMCMCSampler, self).write_metadata(fp, **kwargs)
-        # add info about walkers, burn in
-        fp.attrs["nwalkers"] = self.nwalkers
+        sampler : sampler instance
+            Sampler
+        output_file : str
+            Name of the output file.
+        force : bool, optional
+            If the output file already exists, overwrite it.
+        injection_file : str, optional
+            If an injection was added to the data, write its information.
+        """
+        # check for backup file(s)
+        checkpoint_file = output_file + '.checkpoint'
+        backup_file = output_file + '.bkup'
+        # check if we have a good checkpoint and/or backup file
+        logging.info("Looking for checkpoint file")
+        checkpoint_valid = validate_checkpoint_files(checkpoint_file,
+                                                     backup_file)
+        # Create a new file if the checkpoint doesn't exist, or if it is
+        # corrupted
+        self.new_checkpoint = False  # keeps track if this is a new file or not
+        if not checkpoint_valid:
+            logging.info("Checkpoint not found or not valid")
+            create_new_output_file(self, checkpoint_file, force=force,
+                                   injection_file=injection_file)
+            # now the checkpoint is valid
+            self.new_checkpoint = True
+            # copy to backup
+            shutil.copy(checkpoint_file, backup_file)
+        # write the command line, startup
+        for fn in [checkpoint_file, backup_file]:
+            with self.io(fn, "a") as fp:
+                fp.write_command_line()
+                fp.write_resume_point()
+        # store
+        self.checkpoint_file = checkpoint_file
+        self.backup_file = backup_file
+        self.checkpoint_valid = checkpoint_valid
 
-    @staticmethod
-    def write_samples_group(fp, samples_group, parameters, samples,
-                            start_iteration=None, max_iterations=None):
-        """Writes samples to the given file.
 
-        Results are written to:
-
-            ``fp[samples_group/{vararg}]``,
-
-        where ``{vararg}`` is the name of a model params. The samples are
-        written as an ``nwalkers x niterations`` array.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        samples_group : str
-            Name of samples group to write.
-        parameters : list
-            The parameters to write to the file.
-        samples : FieldArray
-            The samples to write. Should be a FieldArray with fields containing
-            the samples to write and shape nwalkers x niterations.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-        """
-        nwalkers, niterations = samples.shape
-        if max_iterations is not None and max_iterations < niterations:
-            raise IndexError("The provided max size is less than the "
-                             "number of iterations")
-        group = samples_group + '/{name}'
-        # loop over number of dimensions
-        for param in parameters:
-            dataset_name = group.format(name=param)
-            istart = start_iteration
-            try:
-                fp_niterations = fp[dataset_name].shape[-1]
-                if istart is None:
-                    istart = fp_niterations
-                istop = istart + niterations
-                if istop > fp_niterations:
-                    # resize the dataset
-                    fp[dataset_name].resize(istop, axis=1)
-            except KeyError:
-                # dataset doesn't exist yet
-                if istart is not None and istart != 0:
-                    raise ValueError("non-zero start_iteration provided, "
-                                     "but dataset doesn't exist yet")
-                istart = 0
-                istop = istart + niterations
-                fp.create_dataset(dataset_name, (nwalkers, istop),
-                                  maxshape=(nwalkers, max_iterations),
-                                  dtype=float, fletcher32=True)
-            fp[dataset_name][:, istart:istop] = samples[param]
-
-    def write_chain(self, fp, start_iteration=None, max_iterations=None):
-        """Writes the samples from the current chain to the given file.
-
-        Results are written to:
-
-            `fp[fp.samples_group/{field}/(temp{k}/)walker{i}]`,
-
-        where `{i}` is the index of a walker, `{field}` is the name of each
-        field returned by ``model_stats``, and, if the sampler is
-        multitempered, `{k}` is the temperature.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-        samples_group : str
-            Name of samples group to write.
-        """
-        # samples is a nwalkers x niterations field array
-        samples = self.samples
-        parameters = self.variable_params
-        samples_group = fp.samples_group
-        # write data
-        self.write_samples_group(fp, samples_group, parameters, samples,
-                                 start_iteration=start_iteration,
-                                 max_iterations=max_iterations)
-
-    def write_model_stats(self, fp, start_iteration=None,
-                          max_iterations=None):
-        """Writes the ``model_stats`` to the given file.
-
-        Results are written to:
-
-            `fp[fp.stats_group/{field}/(temp{k}/)walker{i}]`,
-
-        where `{i}` is the index of a walker, `{field}` is the name of each
-        field returned by ``model_stats``, and, if the sampler is
-        multitempered, `{k}` is the temperature.  If nothing is returned by
-        ``model_stats``, this does nothing.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-
-        Returns
-        -------
-        stats : {FieldArray, None}
-            The stats that were written, as a FieldArray. If there were no
-            stats, returns None.
-        """
-        samples = self.model_stats
-        if samples is None:
-            return None
-        # ensure the prior is in the model params parameter space
-        if 'logjacobian' in samples.fieldnames:
-            samples['logprior'] -= samples['logjacobian']
-        parameters = samples.fieldnames
-        samples_group = fp.stats_group
-        # write data
-        self.write_samples_group(fp, samples_group, parameters, samples,
-                                 start_iteration=start_iteration,
-                                 max_iterations=max_iterations)
-        return samples
-
-    def write_acceptance_fraction(self, fp):
-        """Write acceptance_fraction data to file. Results are written to
-        `fp[acceptance_fraction]`.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        """
-        dataset_name = "acceptance_fraction"
-        try:
-            fp[dataset_name][:] = self.acceptance_fraction
-        except KeyError:
-            # dataset doesn't exist yet, create it
-            fp[dataset_name] = self.acceptance_fraction
-
-    def write_results(self, fp, start_iteration=None,
-                      max_iterations=None, **metadata):
-        """Writes metadata, samples, model stats, and acceptance fraction
-        to the given file. Also computes and writes the autocorrleation lengths
-        of the chains. See the various write function for details.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the acceptance fraction has not previously been
-            written to the file. The default (None) is to use the maximum size
-            allowed by h5py.
-        \**metadata :
-            All other keyword arguments are passed to ``write_metadata``.
-        """
-        self.write_metadata(fp, **metadata)
-        self.write_chain(fp, start_iteration=start_iteration,
-                         max_iterations=max_iterations)
-        self.write_model_stats(fp, start_iteration=start_iteration,
-                               max_iterations=max_iterations)
-        self.write_acceptance_fraction(fp)
-        self.write_state(fp)
-
-    @staticmethod
-    def _read_fields(fp, fields_group, fields, array_class,
-                     thin_start=None, thin_interval=None, thin_end=None,
-                     iteration=None, walkers=None, flatten=True):
-        """Base function for reading samples and model stats. See
-        `read_samples` and `read_model_stats` for details.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        fields_group : str
-            The name of the group to retrieve the desired fields.
-        fields : list
-            The list of field names to retrieve. Must be names of groups in
-            `fp[fields_group/]`.
-        array_class : FieldArray or similar
-            The type of array to return. Must have a `from_kwargs` attribute.
-
-        For other details on keyword arguments, see `read_samples` and
-        `read_model_stats`.
-
-        Returns
-        -------
-        array_class
-            An instance of the given array class populated with values
-            retrieved from the fields.
-        """
-        # walkers to load
-        if walkers is not None:
-            widx = numpy.zeros(fp.nwalkers, dtype=bool)
-            widx[walkers] = True
-        else:
-            widx = slice(0, None)
-        # get the slice to use
-        if iteration is not None:
-            get_index = iteration
-        else:
-            if thin_end is None:
-                # use the number of current iterations
-                thin_end = fp.niterations
-            get_index = fp.get_slice(thin_start=thin_start, thin_end=thin_end,
-                                     thin_interval=thin_interval)
-        # load
-        arrays = {}
-        group = fields_group + '/{name}'
-        for name in fields:
-            arr = fp[group.format(name=name)][widx, get_index]
-            if flatten:
-                arr = arr.flatten()
-            arrays[name] = arr
-        return array_class.from_kwargs(**arrays)
-
-    @classmethod
-    def read_samples(cls, fp, parameters,
-                     thin_start=None, thin_interval=None, thin_end=None,
-                     iteration=None, walkers=None, flatten=True,
-                     samples_group=None, array_class=None):
-        """Reads samples for the given parameter(s).
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        parameters : (list of) strings
-            The parameter(s) to retrieve. A parameter can be the name of any
-            field in `fp[fp.samples_group]`, a virtual field or method of
-            `FieldArray` (as long as the file contains the necessary fields
-            to derive the virtual field or method), and/or a function of
-            these.
-        thin_start : int
-            Index of the sample to begin returning samples. Default is to read
-            samples after burn in. To start from the beginning set thin_start
-            to 0.
-        thin_interval : int
-            Interval to accept every i-th sample. Default is to use the
-            `fp.acl`. If `fp.acl` is not set, then use all samples
-            (set thin_interval to 1).
-        thin_end : int
-            Index of the last sample to read. If not given then
-            `fp.niterations` is used.
-        iteration : int
-            Get a single iteration. If provided, will override the
-            `thin_{start/interval/end}` arguments.
-        walkers : {None, (list of) int}
-            The walker index (or a list of indices) to retrieve. If None,
-            samples from all walkers will be obtained.
-        flatten : {True, bool}
-            The returned array will be one dimensional, with all desired
-            samples from all desired walkers concatenated together. If False,
-            the returned array will have dimension requested walkers
-            x requested iterations.
-        samples_group : {None, str}
-            The group in `fp` from which to retrieve the parameter fields. If
-            None, searches in `fp.samples_group`.
-        array_class : {None, array class}
-            The type of array to return. The class must have a `from_kwargs`
-            class method and a `parse_parameters` method. If None, will return
-            a FieldArray.
-
-        Returns
-        -------
-        array_class
-            Samples for the given parameters, as an instance of a the given
-            `array_class` (`FieldArray` if `array_class` is None).
-        """
-        # get the group to load from
-        if samples_group is None:
-            samples_group = fp.samples_group
-        # get the type of array class to use
-        if array_class is None:
-            array_class = FieldArray
-        # get the names of fields needed for the given parameters
-        possible_fields = fp[samples_group].keys()
-        loadfields = array_class.parse_parameters(parameters, possible_fields)
-        return cls._read_fields(fp, samples_group, loadfields, array_class,
-                                thin_start=thin_start,
-                                thin_interval=thin_interval, thin_end=thin_end,
-                                iteration=iteration, walkers=walkers,
-                                flatten=flatten)
-
-    @classmethod
-    def n_independent_samples(cls, fp):
-        """Returns the number of independent samples stored in a file.
-
-        The number of independent samples are counted starting from after
-        burn-in. If the sampler hasn't burned in yet, then 0 is returned.
+#
+# =============================================================================
+#
+#                           Convenience functions
+#
+# =============================================================================
+#
 
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read.
-
-        Returns
-        -------
-        int
-            The number of independent samples.
-        """
-        # check if burned in
-        if not fp.is_burned_in:
-            return 0
-        # we'll just read a single parameter from the file
-        samples = cls.read_samples(fp, fp.variable_params[0])
-        return samples.size
+def create_new_output_file(sampler, filename, force=False, injection_file=None,
+                           **kwargs):
+    """Creates a new output file.
 
-    @staticmethod
-    def read_acceptance_fraction(fp, walkers=None):
-        """Reads the acceptance fraction from the given file.
+    If the output file already exists, an ``OSError`` will be raised. This can
+    be overridden by setting ``force`` to ``True``.
 
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        walkers : {None, (list of) int}
-            The walker index (or a list of indices) to retrieve. If None,
-            samples from all walkers will be obtained.
-
-        Returns
-        -------
-        array
-            Array of acceptance fractions with shape (requested walkers,).
-        """
-        group = 'acceptance_fraction'
-        if walkers is None:
-            wmask = numpy.ones(fp.nwalkers, dtype=bool)
+    Parameters
+    ----------
+    sampler : sampler instance
+        Sampler
+    filename : str
+        Name of the file to create.
+    force : bool, optional
+        Create the file even if it already exists. Default is False.
+    injection_file : str, optional
+        If an injection was added to the data, write its information.
+    \**kwargs :
+        All other keyword arguments are passed through to the file's
+        ``write_metadata`` function.
+    """
+    if os.path.exists(filename):
+        if force:
+            os.remove(filename)
         else:
-            wmask = numpy.zeros(fp.nwalkers, dtype=bool)
-            wmask[walkers] = True
-        return fp[group][wmask]
+            raise OSError("output-file already exists; use force if you "
+                          "wish to overwrite it.")
+    logging.info("Creating file {}".format(filename))
+    with sampler.io(filename, "w") as fp:
+        # save the sampler's metadata
+        fp.write_sampler_metadata(sampler)
+        # save injection parameters
+        if injection_file is not None:
+            logging.info("Writing injection file to output")
+            # just use the first one
+            fp.write_injections(injection_file)
 
-    @classmethod
-    def compute_acfs(cls, fp, start_index=None, end_index=None,
-                     per_walker=False, walkers=None, parameters=None):
-        """Computes the autocorrleation function of the model params in the
-        given file.
 
-        By default, parameter values are averaged over all walkers at each
-        iteration. The ACF is then calculated over the averaged chain. An
-        ACF per-walker will be returned instead if ``per_walker=True``.
+def initial_dist_from_config(cp):
+    """Loads a distribution for the sampler start from the given config file.
 
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        start_index : {None, int}
-            The start index to compute the acl from. If None, will try to use
-            the number of burn-in iterations in the file; otherwise, will start
-            at the first sample.
-        end_index : {None, int}
-            The end index to compute the acl to. If None, will go to the end
-            of the current iteration.
-        per_walker : optional, bool
-            Return the ACF for each walker separately. Default is False.
-        walkers : optional, int or array
-            Calculate the ACF using only the given walkers. If None (the
-            default) all walkers will be used.
-        parameters : optional, str or array
-            Calculate the ACF for only the given parameters. If None (the
-            default) will calculate the ACF for all of the model params.
-
-        Returns
-        -------
-        FieldArray
-            A ``FieldArray`` of the ACF vs iteration for each parameter. If
-            `per-walker` is True, the FieldArray will have shape
-            ``nwalkers x niterations``.
-        """
-        acfs = {}
-        if parameters is None:
-            parameters = fp.variable_params
-        if isinstance(parameters, str) or isinstance(parameters, unicode):
-            parameters = [parameters]
-        for param in parameters:
-            if per_walker:
-                # just call myself with a single walker
-                if walkers is None:
-                    walkers = numpy.arange(fp.nwalkers)
-                arrays = [cls.compute_acfs(fp, start_index=start_index,
-                                           end_index=end_index,
-                                           per_walker=False, walkers=ii,
-                                           parameters=param)[param]
-                          for ii in walkers]
-                acfs[param] = numpy.vstack(arrays)
-            else:
-                samples = cls.read_samples(fp, param,
-                                           thin_start=start_index,
-                                           thin_interval=1, thin_end=end_index,
-                                           walkers=walkers,
-                                           flatten=False)[param]
-                samples = samples.mean(axis=0)
-                acfs[param] = autocorrelation.calculate_acf(samples).numpy()
-        return FieldArray.from_kwargs(**acfs)
-
-    @classmethod
-    def compute_acls(cls, fp, start_index=None, end_index=None):
-        """Computes the autocorrleation length for all model params in the
-        given file.
-
-        Parameter values are averaged over all walkers at each iteration.
-        The ACL is then calculated over the averaged chain. If the returned ACL
-        is `inf`,  will default to the number of current iterations.
+    A distribution will only be loaded if the config file has a [initial-*]
+    section(s).
 
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        start_index : {None, int}
-            The start index to compute the acl from. If None, will try to use
-            the number of burn-in iterations in the file; otherwise, will start
-            at the first sample.
-        end_index : {None, int}
-            The end index to compute the acl to. If None, will go to the end
-            of the current iteration.
-
-        Returns
-        -------
-        dict
-            A dictionary giving the ACL for each parameter.
-        """
-        acls = {}
-        for param in fp.variable_params:
-            samples = cls.read_samples(fp, param,
-                                       thin_start=start_index,
-                                       thin_interval=1, thin_end=end_index,
-                                       flatten=False)[param]
-            samples = samples.mean(axis=0)
-            acl = autocorrelation.calculate_acl(samples)
-            if numpy.isinf(acl):
-                acl = samples.size
-            acls[param] = acl
-        return acls
-
-    @staticmethod
-    def write_acls(fp, acls):
-        """Writes the given autocorrelation lengths to the given file.
-
-        The ACL of each parameter is saved to ``fp['acls/{param}']``.
-        The maximum over all the parameters is saved to the file's 'acl'
-        attribute.
-
-        Parameters
-        ----------
-        fp : InferenceFile
-            An open file handler to write the samples to.
-        acls : dict
-            A dictionary of ACLs keyed by the parameter.
-
-        Returns
-        -------
-        ACL
-            The maximum of the acls that was written to the file.
-        """
-        group = 'acls/{}'
-        # write the individual acls
-        for param in acls:
-            try:
-                # we need to use the write_direct function because it's
-                # apparently the only way to update scalars in h5py
-                fp[group.format(param)].write_direct(numpy.array(acls[param]))
-            except KeyError:
-                # dataset doesn't exist yet
-                fp[group.format(param)] = acls[param]
-        # write the maximum over all params
-        fp.attrs['acl'] = numpy.array(acls.values()).max()
-        return fp.attrs['acl']
-
-    @staticmethod
-    def read_acls(fp):
-        """Reads the acls of all the parameters in the given file.
-
-        Parameters
-        ----------
-        fp : InferenceFile
-            An open file handler to read the acls from.
-
-        Returns
-        -------
-        dict
-            A dictionary of the ACLs, keyed by the parameter name.
-        """
-        group = fp['acls']
-        return {param: group[param].value for param in group.keys()}
+    Parameters
+    ----------
+    cp : Config parser
+        The config parser to try to load from.
+
+    Returns
+    -------
+    JointDistribution or None :
+        The initial distribution. If no [initial-*] section found in the
+        config file, will just return None.
+    """
+    if len(cp.get_subsections("initial")):
+        logging.info("Using a different distribution for the starting points "
+                     "than the prior.")
+        initial_dists = distributions.read_distributions_from_config(
+            cp, section="initial")
+        constraints = distributions.read_constraints_from_config(
+            cp, constraint_section="initial_constraint")
+        init_dist = distributions.JointDistribution(
+            sampler.variable_params, *initial_dists,
+            **{"constraints": constraints})
+    else:
+        init_dist = None
+    return init_dist
diff --git a/gwin/sampler/base_mcmc.py b/gwin/sampler/base_mcmc.py
new file mode 100644
index 0000000..d5afbc0
--- /dev/null
+++ b/gwin/sampler/base_mcmc.py
@@ -0,0 +1,564 @@
+# Copyright (C) 2016  Christopher M. Biwer, Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+"""Provides constructor classes and convenience functions for MCMC samplers."""
+
+from __future__ import absolute_import
+
+from abc import (ABCMeta, abstractmethod, abstractproperty)
+import logging
+import numpy
+from pycbc.filter import autocorrelation
+
+from ..io import validate_checkpoint_files
+
+#
+# =============================================================================
+#
+#                              Convenience functions
+#
+# =============================================================================
+#
+
+
+def raw_samples_to_dict(sampler, raw_samples):
+    """Convenience function for converting ND array to a dict of samples.
+
+    The samples are assumed to have dimension
+    ``[sampler.base_shape x] niterations x len(sampler.sampling_params)``.
+
+    Parameters
+    ----------
+    sampler : sampler instance
+        An instance of an MCMC sampler.
+    raw_samples : array
+        The array of samples to convert.
+
+    Returns
+    -------
+    dict :
+        A dictionary mapping the raw samples to the variable params. If the
+        sampling params are not the same as the variable params, they will
+        also be included. Each array will have shape
+        ``[sampler.base_shape x] niterations``.
+    """
+    sampling_params = sampler.sampling_params
+    # convert to dictionary
+    samples = {param: raw_samples[..., ii] for
+               ii, param in enumerate(sampling_params)}
+    # apply boundary conditions
+    samples = sampler.model.prior_distribution.apply_boundary_conditions(
+        **samples)
+    # apply transforms to go to model's variable params space
+    if sampler.model.sampling_transforms is not None:
+        samples = sampler.model.sampling_transforms.apply(
+            samples, inverse=True)
+    return samples
+
+
+def raw_stats_to_dict(sampler, raw_stats):
+    """Converts an ND array of model stats to a dict.
+
+    The ``raw_stats`` may either be a numpy array or a list. If the
+    former, the stats are assumed to have shape
+    ``[sampler.base_shape x] niterations x nstats, where nstats are the number
+    of stats returned by ``sampler.model.default_stats``. If the latter, the
+    list is cast to an array that is assumed to be the same shape as if an
+    array was given.
+
+    Parameters
+    ----------
+    sampler : sampler instance
+        An instance of an MCMC sampler.
+    raw_stats : array or list
+        The stats to convert.
+
+    Returns
+    -------
+    dict :
+        A dictionary mapping the model's ``default_stats`` to arrays of values.
+        Each array will have shape ``[sampler.base_shape x] niterations``.
+    """
+    if not isinstance(raw_stats, numpy.ndarray):
+        # Assume list. Since the model returns a tuple of values, this should
+        # be a [sampler.base_shape x] x niterations list of tuples. We can
+        # therefore immediately convert this to a ND array.
+        raw_stats = numpy.array(raw_stats)
+    return {stat: raw_stats[..., ii]
+            for (ii, stat) in enumerate(sampler.model.default_stats)}
+
+#
+# =============================================================================
+#
+#                              BaseMCMC definition
+#
+# =============================================================================
+#
+
+
+class BaseMCMC(object):
+    """This class provides methods common to MCMCs.
+
+    It is not a sampler class itself. Sampler classes can inherit from this
+    along with ``BaseSampler``.
+
+    Attributes
+    ----------
+    p0 : dict
+        A dictionary of the initial position of the walkers. Set by using
+        ``set_p0``. If not set yet, a ``ValueError`` is raised when the
+        attribute is accessed.
+    pos : dict
+        A dictionary of the current walker positions. If the sampler hasn't
+        been run yet, returns p0.
+    """
+    __metaclass__ = ABCMeta
+
+    _lastclear = None  # the iteration when samples were cleared from memory
+    _itercounter = None  # the number of iterations since the last clear
+    _pos = None
+    _p0 = None
+    _nwalkers = None
+    _burn_in = None
+    _checkpoint_interval = None
+    _target_niterations = None
+    _target_eff_nsamples = None
+
+    @abstractproperty
+    def base_shape(self):
+        """What shape the sampler's samples arrays are in, excluding
+        the iterations dimension.
+
+        For example, if a sampler uses 20 walkers and 3 temperatures, this
+        would be ``(3, 20)``. If a sampler only uses a single walker and no
+        temperatures this would be ``()``.
+        """
+        pass
+
+    @property
+    def nwalkers(self):
+        """Get the number of walkers."""
+        if self._nwalkers is None:
+            raise ValueError("number of walkers not set")
+        return self._nwalkers
+
+    @property
+    def niterations(self):
+        """Get the current number of iterations."""
+        itercounter = self._itercounter
+        if itercounter is None:
+            itercounter = 0
+        lastclear = self._lastclear
+        if lastclear is None:
+            lastclear = 0
+        return itercounter + lastclear
+
+    @property
+    def checkpoint_interval(self):
+        """The number of iterations to do between checkpoints."""
+        return self._checkpoint_interval
+
+    @property
+    def target_niterations(self):
+        """The number of iterations the sampler should run for."""
+        return self._target_niterations
+
+    @property
+    def target_eff_nsamples(self):
+        """The target number of effective samples the sampler should get."""
+        return self._target_eff_nsamples
+
+    def set_target(self, niterations=None, eff_nsamples=None):
+        """Sets the target niterations/nsamples for the sampler.
+
+        One or the other must be provided, not both.
+        """
+        if niterations is None and eff_nsamples is None:
+            raise ValueError("Must provide a target niterations or "
+                             "eff_nsamples")
+        if niterations is not None and eff_nsamples is not None:
+            raise ValueError("Must provide a target niterations or "
+                             "eff_nsamples, not both")
+        self._target_niterations = int(niterations) \
+            if niterations is not None else None
+        self._target_eff_nsamples = int(eff_nsamples) \
+            if eff_nsamples is not None else None
+
+    @abstractmethod
+    def clear_samples(self):
+        """A method to clear samples from memory."""
+        pass
+
+    @property
+    def pos(self):
+        pos = self._pos
+        if pos is None:
+            return self.p0
+        # convert to dict
+        pos = {param: self._pos[..., k]
+               for (k, param) in enumerate(self.sampling_params)}
+        return pos
+
+    @property
+    def p0(self):
+        """The starting position of the walkers in the sampling param space.
+
+        The returned object is a dict mapping the sampling parameters to the
+        values.
+        """
+        if self._p0 is None:
+            raise ValueError("initial positions not set; run set_p0")
+        # convert to dict
+        p0 = {param: self._p0[..., k]
+              for (k, param) in enumerate(self.sampling_params)}
+        return p0
+
+    def set_p0(self, samples_file=None, prior=None):
+        """Sets the initial position of the walkers.
+
+        Parameters
+        ----------
+        samples_file : InferenceFile, optional
+            If provided, use the last iteration in the given file for the
+            starting positions.
+        prior : JointDistribution, optional
+            Use the given prior to set the initial positions rather than
+            ``model``'s prior.
+
+        Returns
+        -------
+        p0 : dict
+            A dictionary maping sampling params to the starting positions.
+        """
+        # if samples are given then use those as initial positions
+        if samples_file is not None:
+            with self.io(samples_file, 'r') as fp:
+                samples = fp.read_samples(self.variable_params,
+                                          iteration=-1)
+                # make sure we have the same shape
+                assert samples.shape == self.base_shape, (
+                       "samples in file {} have shape {}, but I have shape {}".
+                       format(samples_file, samples.shape, self.base_shape))
+            # transform to sampling parameter space
+            if self.model.sampling_transforms is not None:
+                samples = self.model.sampling_transforms.apply(samples)
+        # draw random samples if samples are not provided
+        else:
+            nsamples = numpy.prod(self.base_shape)
+            samples = self.model.prior_rvs(size=nsamples, prior=prior).reshape(
+                self.base_shape)
+        # store as ND array with shape [base_shape] x nparams
+        ndim = len(self.variable_params)
+        p0 = numpy.ones(list(self.base_shape)+[ndim])
+        for i, param in enumerate(self.sampling_params):
+            p0[..., i] = samples[param]
+        self._p0 = p0
+        return self.p0
+
+    def set_initial_conditions(self, initial_distribution=None,
+                               samples_file=None):
+        """Sets the initial starting point for the MCMC.
+
+        If a starting samples file is provided, will also load the random
+        state from it.
+        """
+        self.set_p0(samples_file=samples_file, prior=initial_distribution)
+        # if a samples file was provided, use it to set the state of the
+        # sampler
+        if samples_file is not None:
+            self.set_state_from_file(samples_file)
+
+    @abstractmethod
+    def set_state_from_file(self, filename):
+        """Sets the state of the sampler to the instance saved in a file.
+        """
+        pass
+
+    def run(self):
+        """Runs the sampler."""
+        if self.target_eff_nsamples and self.checkpoint_interval is None:
+            raise ValueError("A checkpoint interval must be set if "
+                             "targetting an effective number of samples")
+        # get the starting number of samples:
+        # "nsamples" keeps track of the number of samples we've obtained (if
+        # target_eff_nsamples is not None, this is the effective number of
+        # samples; otherwise, this is the total number of samples).
+        # _lastclear is the number of iterations that the file already
+        # contains (either due to sampler burn-in, or a previous checkpoint)
+        if self.new_checkpoint:
+            self._lastclear = 0
+        else:
+            with self.io(self.checkpoint_file, "r") as fp:
+                self._lastclear = fp.niterations
+        if self.target_eff_nsamples is not None:
+            target_nsamples = self.target_eff_nsamples
+            with self.io(self.checkpoint_file, "r") as fp:
+                nsamples = fp.effective_nsamples
+        elif self.target_niterations is not None:
+            # the number of samples is the number of iterations times the
+            # number of walkers
+            target_nsamples = self.nwalkers * self.target_niterations
+            nsamples = self._lastclear * self.nwalkers
+        else:
+            raise ValueError("must set either target_eff_nsamples or "
+                             "target_niterations; see set_target")
+        self._itercounter = 0
+        # figure out the interval to use
+        iterinterval = self.checkpoint_interval
+        if iterinterval is None:
+            iterinterval = self.target_niterations
+        # run sampler until we have the desired number of samples
+        while nsamples < target_nsamples:
+            # adjust the interval if we would go past the number of iterations
+            if self.target_niterations is not None and (
+                    self.niterations + iterinterval > self.target_niterations):
+                iterinterval = self.target_niterations - self.niterations
+            # run sampler and set initial values to None so that sampler
+            # picks up from where it left off next call
+            logging.info("Running sampler for {} to {} iterations".format(
+                self.niterations, self.niterations + iterinterval))
+            # run the underlying sampler for the desired interval
+            self.run_mcmc(iterinterval)
+            # update the itercounter
+            self._itercounter = self._itercounter + iterinterval
+            # dump the current results
+            self.checkpoint()
+            # update nsamples for next loop
+            if self.target_eff_nsamples is not None:
+                nsamples = self.effective_nsamples
+                logging.info("Have {} effective samples post burn in".format(
+                    nsamples))
+            else:
+                nsamples += iterinterval * self.nwalkers
+
+    @property
+    def burn_in(self):
+        """The class for doing burn-in tests (if specified)."""
+        return self._burn_in
+
+    def set_burn_in(self, burn_in):
+        """Sets the object to use for doing burn-in tests."""
+        self._burn_in = burn_in
+
+    @property
+    def effective_nsamples(self):
+        """The effective number of samples post burn-in that the sampler has
+        acquired so far."""
+        try:
+            acl = numpy.array(self.acls.values()).max()
+        except (AttributeError, TypeError):
+            acl = numpy.inf
+        if self.burn_in is None:
+            nperwalker = max(int(self.niterations // acl), 1)
+        elif self.burn_in.is_burned_in:
+            nperwalker = int(
+                (self.niterations - self.burn_in.burn_in_iteration) // acl)
+            # after burn in, we always have atleast 1 sample per walker
+            nperwalker = max(nperwalker, 1)
+        else:
+            nperwalker = 0
+        return self.nwalkers * nperwalker
+
+    @abstractmethod
+    def run_mcmc(self, niterations):
+        """Run the MCMC for the given number of iterations."""
+        pass
+
+    @abstractmethod
+    def write_results(self, filename):
+        """Should write all samples currently in memory to the given file."""
+        pass
+
+    def checkpoint(self):
+        """Dumps current samples to the checkpoint file."""
+        # write new samples
+        logging.info("Writing samples to files")
+        for fn in [self.checkpoint_file, self.backup_file]:
+            self.write_results(fn)
+            with self.io(fn, "a") as fp:
+                # write the current number of iterations
+                fp.write_niterations(self.niterations)
+        # check for burn in, compute the acls
+        self.acls = None
+        if self.burn_in is not None:
+            logging.info("Updating burn in")
+            self.burn_in.evaluate(self.checkpoint_file)
+            burn_in_iter = self.burn_in.burn_in_iteration
+            logging.info("Is burned in: {}".format(self.burn_in.is_burned_in))
+            if self.burn_in.is_burned_in:
+                logging.info("Burn-in iteration: {}".format(
+                    self.burn_in.burn_in_iteration))
+        else:
+            burn_in_iter = 0
+        # Compute acls; the burn_in test may have calculated an acl and saved
+        # it, in which case we don't need to do it again.
+        if self.acls is None:
+            logging.info("Computing acls")
+            self.acls = self.compute_acl(self.checkpoint_file,
+                                         start_index=burn_in_iter)
+        logging.info("ACL: {}".format(numpy.array(self.acls.values()).max()))
+        # write
+        for fn in [self.checkpoint_file, self.backup_file]:
+            with self.io(fn, "a") as fp:
+                if self.burn_in is not None:
+                    fp.write_burn_in(self.burn_in)
+                if self.acls is not None:
+                    fp.write_acls(self.acls)
+                # write effective number of samples
+                fp.write_effective_nsamples(self.effective_nsamples)
+        # check validity
+        logging.info("Validating checkpoint and backup files")
+        checkpoint_valid = validate_checkpoint_files(
+            self.checkpoint_file, self.backup_file)
+        if not checkpoint_valid:
+            raise IOError("error writing to checkpoint file")
+        # clear the in-memory chain to save memory
+        logging.info("Clearing samples from memory")
+        self.clear_samples()
+
+    @abstractmethod
+    def compute_acf(cls, filename, **kwargs):
+        """A method to compute the autocorrelation function of samples in the
+        given file."""
+        pass
+
+    @abstractmethod
+    def compute_acl(cls, filename, **kwargs):
+        """A method to compute the autocorrelation length of samples in the
+        given file."""
+        pass
+
+
+class MCMCAutocorrSupport(object):
+    """Provides class methods for calculating ensemble ACFs/ACLs.
+    """
+
+    @classmethod
+    def compute_acf(cls, filename, start_index=None, end_index=None,
+                    per_walker=False, walkers=None, parameters=None):
+        """Computes the autocorrleation function of the model params in the
+        given file.
+
+        By default, parameter values are averaged over all walkers at each
+        iteration. The ACF is then calculated over the averaged chain. An
+        ACF per-walker will be returned instead if ``per_walker=True``.
+
+        Parameters
+        -----------
+        filename : str
+            Name of a samples file to compute ACFs for.
+        start_index : {None, int}
+            The start index to compute the acl from. If None, will try to use
+            the number of burn-in iterations in the file; otherwise, will start
+            at the first sample.
+        end_index : {None, int}
+            The end index to compute the acl to. If None, will go to the end
+            of the current iteration.
+        per_walker : optional, bool
+            Return the ACF for each walker separately. Default is False.
+        walkers : optional, int or array
+            Calculate the ACF using only the given walkers. If None (the
+            default) all walkers will be used.
+        parameters : optional, str or array
+            Calculate the ACF for only the given parameters. If None (the
+            default) will calculate the ACF for all of the model params.
+
+        Returns
+        -------
+        dict :
+            Dictionary of arrays giving the ACFs for each parameter. If
+            ``per-walker`` is True, the arrays will have shape
+            ``nwalkers x niterations``.
+        """
+        acfs = {}
+        with cls._io(filename, 'r') as fp:
+            if parameters is None:
+                parameters = fp.variable_params
+            if isinstance(parameters, str) or isinstance(parameters, unicode):
+                parameters = [parameters]
+            for param in parameters:
+                if per_walker:
+                    # just call myself with a single walker
+                    if walkers is None:
+                        walkers = numpy.arange(fp.nwalkers)
+                    arrays = [
+                        cls.compute_acf(filename, start_index=start_index,
+                                        end_index=end_index,
+                                        per_walker=False, walkers=ii,
+                                        parameters=param)[param]
+                        for ii in walkers]
+                    acfs[param] = numpy.vstack(arrays)
+                else:
+                    samples = fp.read_raw_samples(
+                        param, thin_start=start_index, thin_interval=1,
+                        thin_end=end_index, walkers=walkers,
+                        flatten=False)[param]
+                    samples = samples.mean(axis=0)
+                    acfs[param] = autocorrelation.calculate_acf(
+                        samples).numpy()
+        return acfs
+
+    @classmethod
+    def compute_acl(cls, filename, start_index=None, end_index=None):
+        """Computes the autocorrleation length for all model params in the
+        given file.
+
+        Parameter values are averaged over all walkers at each iteration.
+        The ACL is then calculated over the averaged chain. If the returned ACL
+        is `inf`,  will default to the number of current iterations.
+
+        Parameters
+        -----------
+        filename : str
+            Name of a samples file to compute ACLs for.
+        start_index : {None, int}
+            The start index to compute the acl from. If None, will try to use
+            the number of burn-in iterations in the file; otherwise, will start
+            at the first sample.
+        end_index : {None, int}
+            The end index to compute the acl to. If None, will go to the end
+            of the current iteration.
+
+        Returns
+        -------
+        dict
+            A dictionary giving the ACL for each parameter.
+        """
+        acls = {}
+        with cls._io(filename, 'r') as fp:
+            for param in fp.variable_params:
+                samples = fp.read_raw_samples(
+                    param, thin_start=start_index, thin_interval=1,
+                    thin_end=end_index, flatten=False)[param]
+                samples = samples.mean(axis=0)
+                # if < 10 samples, just set to inf
+                # Note: this should be done inside of pycbc's autocorrelation
+                # function
+                if samples.size < 10:
+                    acl = numpy.inf
+                else:
+                    acl = autocorrelation.calculate_acl(samples)
+                if acl <= 0:
+                    acl = numpy.inf
+                acls[param] = acl
+        return acls
diff --git a/gwin/sampler/emcee.py b/gwin/sampler/emcee.py
index 97786b5..443f89d 100644
--- a/gwin/sampler/emcee.py
+++ b/gwin/sampler/emcee.py
@@ -29,10 +29,16 @@
 from __future__ import absolute_import
 
 import numpy
-from pycbc.io import FieldArray
-from pycbc.filter import autocorrelation
+import emcee
+from pycbc.pool import choose_pool
+from pycbc.workflow import ConfigParser
 
-from .base import BaseMCMCSampler
+from .base import BaseSampler
+from .base_mcmc import (BaseMCMC, MCMCAutocorrSupport, raw_samples_to_dict,
+                        raw_stats_to_dict)
+from ..burn_in import MCMCBurnInTests
+from ..io import EmceeFile
+from .. import models
 
 
 #
@@ -43,7 +49,7 @@
 # =============================================================================
 #
 
-class EmceeEnsembleSampler(BaseMCMCSampler):
+class EmceeEnsembleSampler(MCMCAutocorrSupport, BaseMCMC, BaseSampler):
     """This class is used to construct an MCMC sampler from the emcee
     package's EnsembleSampler.
 
@@ -59,896 +65,168 @@ class EmceeEnsembleSampler(BaseMCMCSampler):
         cores/nodes/etc.
     """
     name = "emcee"
-
-    def __init__(self, model, nwalkers, pool=None,
-                 model_call=None):
-        try:
-            import emcee
-        except ImportError:
-            raise ImportError("emcee is not installed.")
-
-        if model_call is None:
-            model_call = model
-
+    _io = EmceeFile
+    burn_in_class = MCMCBurnInTests
+
+    def __init__(self, model, nwalkers, checkpoint_interval=None,
+                 logpost_function=None, nprocesses=1, use_mpi=False):
+
+        self.model = model
+        # create a wrapper for calling the model
+        if logpost_function is None:
+            logpost_function = 'logposterior'
+        model_call = models.CallModel(model, logpost_function)
+
+        # Set up the pool
+        if nprocesses > 1:
+            # these are used to help paralleize over multiple cores / MPI
+            models._global_instance = model_call
+            model_call = models._call_global_model
+        pool = choose_pool(mpi=use_mpi, processes=nprocesses)
+        if pool is not None:
+            pool.count = nprocesses
+
+        # set up emcee
+        self._nwalkers = nwalkers
         ndim = len(model.variable_params)
-        sampler = emcee.EnsembleSampler(nwalkers, ndim,
-                                        model_call,
-                                        pool=pool)
+        self._sampler = emcee.EnsembleSampler(nwalkers, ndim, model_call,
+                                              pool=pool)
         # emcee uses it's own internal random number generator; we'll set it
         # to have the same state as the numpy generator
         rstate = numpy.random.get_state()
-        sampler.random_state = rstate
-        # initialize
-        super(EmceeEnsembleSampler, self).__init__(
-              sampler, model)
-        self._nwalkers = nwalkers
-
-    @classmethod
-    def from_cli(cls, opts, model, pool=None,
-                 model_call=None):
-        """Create an instance of this sampler from the given command-line
-        options.
+        self._sampler.random_state = rstate
+        self._checkpoint_interval = checkpoint_interval
 
-        Parameters
-        ----------
-        opts : ArgumentParser options
-            The options to parse.
-        model : LikelihoodEvaluator
-            The model to use with the sampler.
+    @property
+    def io(self):
+        return self._io
 
-        Returns
-        -------
-        EmceeEnsembleSampler
-            An emcee sampler initialized based on the given arguments.
-        """
-        return cls(model, opts.nwalkers,
-                   pool=pool, model_call=model_call)
+    @property
+    def base_shape(self):
+        return (self.nwalkers,)
 
     @property
-    def lnpost(self):
-        """Get the natural logarithm of the likelihood as an
-        nwalkers x niterations array.
+    def samples(self):
+        """A dict mapping ``variable_params`` to arrays of samples currently
+        in memory.
+
+        The arrays have shape ``nwalkers x niterations``.
         """
-        # emcee returns nwalkers x niterations
-        return self._sampler.lnprobability
+        # emcee stores samples to it's chain attribute as a
+        # nwalker x niterations x ndim array
+        raw_samples = self._sampler.chain
+        return raw_samples_to_dict(self, raw_samples)
 
     @property
-    def chain(self):
-        """Get all past samples as an nwalker x niterations x ndim array."""
-        # emcee returns the chain as nwalker x niterations x ndim
-        return self._sampler.chain
+    def model_stats(self):
+        """A dict mapping the model's ``default_stats`` to arrays of values.
 
-    def clear_chain(self):
-        """Clears the chain and blobs from memory.
+        The returned array has shape ``nwalkers x niterations``.
+        """
+        raw_stats = numpy.array(self._sampler.blobs)
+        # raw_stats has shape niterations x nwalkers x nstats; transpose
+        # so that it has shape nwalkers x niterations x nstats
+        raw_stats = raw_stats.transpose((1, 0, 2))
+        return raw_stats_to_dict(self, raw_stats)
+
+    def clear_samples(self):
+        """Clears the samples and stats from memory.
         """
         # store the iteration that the clear is occuring on
-        self.lastclear = self.niterations
+        self._lastclear = self.niterations
+        self._itercounter = 0
         # now clear the chain
         self._sampler.reset()
         self._sampler.clear_blobs()
 
-    def set_p0(self, samples_file=None, prior=None):
-        """Sets the initial position of the walkers.
-
-        Parameters
-        ----------
-        samples_file : InferenceFile, optional
-            If provided, use the last iteration in the given file for the
-            starting positions.
-        prior : JointDistribution, optional
-            Use the given prior to set the initial positions rather than
-            ``model``'s prior.
-
-        Returns
-        -------
-        p0 : array
-            An nwalkers x ndim array of the initial positions that were set.
-        """
-        # we define set_p0 here to ensure that emcee's internal random number
-        # generator is set to numpy's after the distributions' rvs functions
-        # are called
-        super(EmceeEnsembleSampler, self).set_p0(samples_file=samples_file,
-                                                 prior=prior)
-        # update the random state
-        self._sampler.random_state = numpy.random.get_state()
-
-    def write_state(self, fp):
-        """Saves the state of the sampler in a file.
-        """
-        fp.write_random_state(state=self._sampler.random_state)
-
-    def set_state_from_file(self, fp):
+    def set_state_from_file(self, filename):
         """Sets the state of the sampler back to the instance saved in a file.
         """
-        rstate = fp.read_random_state()
+        with self.io(filename, 'r') as fp:
+            rstate = fp.read_random_state()
         # set the numpy random state
         numpy.random.set_state(rstate)
         # set emcee's generator to the same state
         self._sampler.random_state = rstate
 
-    def run(self, niterations, **kwargs):
+    def run_mcmc(self, niterations, **kwargs):
         """Advance the ensemble for a number of samples.
 
         Parameters
         ----------
         niterations : int
-            Number of samples to get from sampler.
-
-        Returns
-        -------
-        p : numpy.array
-            An array of current walker positions with shape (nwalkers, ndim).
-        lnpost : numpy.array
-            The list of log posterior probabilities for the walkers at
-            positions p, with shape (nwalkers, ndim).
-        rstate :
-            The current state of the random number generator.
+            Number of iterations to run the sampler for.
+        \**kwargs :
+            All other keyword arguments are passed to the emcee sampler.
         """
         pos = self._pos
         if pos is None:
-            pos = self.p0
+            pos = self._p0
         res = self._sampler.run_mcmc(pos, niterations, **kwargs)
-        p, lnpost, rstate = res[0], res[1], res[2]
+        p, _, _ = res[0], res[1], res[2]
         # update the positions
         self._pos = p
-        return p, lnpost, rstate
 
-    def write_results(self, fp, start_iteration=None,
-                      max_iterations=None, **metadata):
-        """Writes metadata, samples, model stats, and acceptance fraction
-        to the given file. See the write function for each of those for
-        details.
+    def write_results(self, filename):
+        """Writes samples, model stats, acceptance fraction, and random state
+        to the given file.
 
         Parameters
         -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-        \**metadata :
-            All other keyword arguments are passed to ``write_metadata``.
-        """
-        self.write_metadata(fp, **metadata)
-        self.write_chain(fp, start_iteration=start_iteration,
-                         max_iterations=max_iterations)
-        self.write_model_stats(fp, start_iteration=start_iteration,
-                               max_iterations=max_iterations)
-        self.write_acceptance_fraction(fp)
-        self.write_state(fp)
-
-
-# This is needed for two reason
-# 1) pools freeze state when created and so classes *cannot be updated*
-# 2) methods cannot be pickled.
-class _callprior(object):
-    """Calls the model's prior function, and ensures that no
-    metadata is returned."""
-    def __init__(self, model_call):
-        self.callable = model_call
-
-    def __call__(self, args):
-        prior = self.callable(args, callstat='logprior',
-                              return_all_stats=False)
-        return prior
-
-
-class _callloglikelihood(object):
-    """Calls the model's loglikelihood function.
-    """
-    def __init__(self, model_call):
-        self.callable = model_call
-
-    def __call__(self, args):
-        return self.callable(args, callstat='loglikelihood',
-                             return_all_stats=False)
-
-
-class EmceePTSampler(BaseMCMCSampler):
-    """This class is used to construct a parallel-tempered MCMC sampler from
-    the emcee package's PTSampler.
-
-    Parameters
-    ----------
-    model : model
-        A model from ``gwin.models``.
-    ntemps : int
-        Number of temeratures to use in the sampler.
-    nwalkers : int
-        Number of walkers to use in sampler.
-    pool : function with map, Optional
-        A provider of a map function that allows a function call to be run
-        over multiple sets of arguments and possibly maps them to
-        cores/nodes/etc.
-    """
-    name = "emcee_pt"
-
-    def __init__(self, model, ntemps, nwalkers, pool=None,
-                 model_call=None):
-
-        try:
-            import emcee
-        except ImportError:
-            raise ImportError("emcee is not installed.")
-
-        if model_call is None:
-            model_call = model
-
-        # construct the sampler: PTSampler needs the likelihood and prior
-        # functions separately
-        ndim = len(model.variable_params)
-        sampler = emcee.PTSampler(ntemps, nwalkers, ndim,
-                                  _callloglikelihood(model_call),
-                                  _callprior(model_call),
-                                  pool=pool)
-        # initialize
-        super(EmceePTSampler, self).__init__(
-              sampler, model)
-        self._nwalkers = nwalkers
-        self._ntemps = ntemps
+        filename : str
+            The file to write to. The file is opened using the ``io`` class
+            in an an append state.
+        """
+        with self.io(filename, 'a') as fp:
+            # write samples
+            fp.write_samples(self.samples, self.model.variable_params)
+            # write stats
+            fp.write_samples(self.model_stats)
+            # write accpetance
+            fp.write_acceptance_fraction(self._sampler.acceptance_fraction)
+            # write random state
+            fp.write_random_state(state=self._sampler.random_state)
+
+    def finalize(self):
+        """All data is written by the last checkpoint in the run method, so
+        this just passes."""
+        pass
 
     @classmethod
-    def from_cli(cls, opts, model, pool=None,
-                 model_call=None):
-        """Create an instance of this sampler from the given command-line
-        options.
-
-        Parameters
-        ----------
-        opts : ArgumentParser options
-            The options to parse.
-        model : LikelihoodEvaluator
-            The model to use with the sampler.
-
-        Returns
-        -------
-        EmceePTSampler
-            An emcee sampler initialized based on the given arguments.
-        """
-        return cls(model, opts.ntemps, opts.nwalkers,
-                   pool=pool, model_call=model_call)
-
-    @property
-    def ntemps(self):
-        return self._ntemps
-
-    @property
-    def chain(self):
-        """Get all past samples as an ntemps x nwalker x niterations x ndim
-        array.
-        """
-        # emcee returns the chain as ntemps x nwalker x niterations x ndim
-        return self._sampler.chain
-
-    def clear_chain(self):
-        """Clears the chain and blobs from memory.
-        """
-        # store the iteration that the clear is occuring on
-        self.lastclear = self.niterations
-        # now clear the chain
-        self._sampler.reset()
-
-    @property
-    def model_stats(self):
-        """Returns the log likelihood ratio and log prior as a FieldArray.
-        The returned array has shape ntemps x nwalkers x niterations.
-        """
-        # likelihood has shape ntemps x nwalkers x niterations
-        logl = self._sampler.lnlikelihood
-        # get prior from posterior
-        logp = self._sampler.lnprobability - logl
-        # compute the likelihood ratio
-        loglr = logl - self.model.lognl
-        kwargs = {'loglr': loglr, 'logprior': logp}
-        # if different coordinates were used for sampling, get the jacobian
-        if self.model.sampling_transforms is not None:
-            samples = self.samples
-            # convert to dict
-            d = {param: samples[param] for param in samples.fieldnames}
-            logj = self.model.logjacobian(**d)
-            kwargs['logjacobian'] = logj
-        return FieldArray.from_kwargs(**kwargs)
-
-    @property
-    def lnpost(self):
-        """Get the natural logarithm of the likelihood + the prior as an
-        ntemps x nwalkers x niterations array.
-        """
-        # emcee returns ntemps x nwalkers x niterations
-        return self._sampler.lnprobability
-
-    def set_p0(self, samples_file=None, prior=None):
-        """Sets the initial position of the walkers.
-
-        Parameters
-        ----------
-        samples_file : InferenceFile, optional
-            If provided, use the last iteration in the given file for the
-            starting positions.
-        prior : JointDistribution, optional
-            Use the given prior to set the initial positions rather than
-            ``model``'s prior.
-
-        Returns
-        -------
-        p0 : array
-            An ntemps x nwalkers x ndim array of the initial positions that
-            were set.
-        """
-        # create a (nwalker, ndim) array for initial positions
-        ntemps = self.ntemps
-        nwalkers = self.nwalkers
-        ndim = len(self.variable_params)
-        p0 = numpy.ones((ntemps, nwalkers, ndim))
-        # if samples are given then use those as initial positions
-        if samples_file is not None:
-            samples = self.read_samples(samples_file, self.variable_params,
-                                        iteration=-1, temps='all',
-                                        flatten=False)[..., 0]
-            # transform to sampling parameter space
-            samples = self.model.apply_sampling_transforms(
-                samples)
-        # draw random samples if samples are not provided
+    def from_config(cls, cp, model, nprocesses=1, use_mpi=False):
+        """Loads the sampler from the given config file."""
+        section = "sampler"
+        # check name
+        assert cp.get(section, "name") == cls.name, (
+            "name in section [sampler] must match mine")
+        # get the number of walkers to use
+        nwalkers = int(cp.get(section, "nwalkers"))
+        # get the checkpoint interval, if it's specified
+        if cp.has_option(section, "checkpoint-interval"):
+            checkpoint_interval = int(cp.get(section, "checkpoint-interval"))
         else:
-            samples = self.model.prior_rvs(
-                size=nwalkers*ntemps, prior=prior).reshape((ntemps, nwalkers))
-        # convert to array
-        for i, param in enumerate(self.sampling_params):
-            p0[..., i] = samples[param]
-        self._p0 = p0
-        return p0
-
-    def run(self, niterations, **kwargs):
-        """Advance the ensemble for a number of samples.
-
-        Parameters
-        ----------
-        niterations : int
-            Number of samples to get from sampler.
-
-        Returns
-        -------
-        p : numpy.array
-            An array of current walker positions with shape (nwalkers, ndim).
-        lnpost : numpy.array
-            The list of log posterior probabilities for the walkers at
-            positions p, with shape (nwalkers, ndim).
-        rstate :
-            The current state of the random number generator.
-        """
-        pos = self._pos
-        if pos is None:
-            pos = self.p0
-        res = self._sampler.run_mcmc(pos, niterations, **kwargs)
-        p, lnpost, rstate = res[0], res[1], res[2]
-        # update the positions
-        self._pos = p
-        return p, lnpost, rstate
-
-    # read/write functions
-
-    # add ntemps and betas to metadata
-    def write_metadata(self, fp, **kwargs):
-        """Writes metadata about this sampler to the given file. Metadata is
-        written to the file's `attrs`.
-
-        Parameters
-        ----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        **kwargs :
-            All keyword arguments are saved as separate arguments in the
-            file attrs. If any keyword argument is a dictionary, the keyword
-            will point to the list of keys in the the file's ``attrs``. Each
-            key is then stored as a separate attr with its corresponding value.
-        """
-        super(EmceePTSampler, self).write_metadata(fp, **kwargs)
-        fp.attrs["ntemps"] = self.ntemps
-        fp.attrs["betas"] = self._sampler.betas
-
-    def write_acceptance_fraction(self, fp):
-        """Write acceptance_fraction data to file. Results are written to
-        `fp[acceptance_fraction/temp{k}]` where k is the temperature.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        """
-        group = "acceptance_fraction/temp{tk}"
-        # acf has shape ntemps x nwalkers
-        acf = self.acceptance_fraction
-        for tk in range(fp.ntemps):
-            try:
-                fp[group.format(tk=tk)][:] = acf[tk, :]
-            except KeyError:
-                # dataset doesn't exist yet, create it
-                fp[group.format(tk=tk)] = acf[tk, :]
-
-    @staticmethod
-    def read_acceptance_fraction(fp, temps=None, walkers=None):
-        """Reads the acceptance fraction from the given file.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        temps : {None, (list of) int}
-            The temperature index (or a list of indices) to retrieve. If None,
-            acfs from all temperatures and all walkers will be retrieved.
-        walkers : {None, (list of) int}
-            The walker index (or a list of indices) to retrieve. If None,
-            samples from all walkers will be obtained.
-
-        Returns
-        -------
-        array
-            Array of acceptance fractions with shape (requested temps,
-            requested walkers).
-        """
-        group = 'acceptance_fraction/temp{tk}'
-        if temps is None:
-            temps = numpy.arange(fp.ntemps)
-        if walkers is None:
-            wmask = numpy.ones(fp.nwalkers, dtype=bool)
-        else:
-            wmask = numpy.zeros(fp.nwalkers, dtype=bool)
-            wmask[walkers] = True
-        arrays = []
-        for tk in temps:
-            arrays.extend(fp[group.format(tk=tk)][wmask])
-        return arrays
-
-    @staticmethod
-    def write_samples_group(fp, samples_group, parameters, samples,
-                            start_iteration=None, max_iterations=None):
-        """Writes samples to the given file.
-
-        Results are written to:
-
-            ``fp[samples_group/{vararg}]``,
-
-        where ``{vararg}`` is the name of a variable arg. The samples are
-        written as an ``ntemps x nwalkers x niterations`` array.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        samples_group : str
-            Name of samples group to write.
-        parameters : list
-            The parameters to write to the file.
-        samples : FieldArray
-            The samples to write. Should be a FieldArray with fields containing
-            the samples to write and shape nwalkers x niterations.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-        """
-        ntemps, nwalkers, niterations = samples.shape
-        if max_iterations is not None and max_iterations < niterations:
-            raise IndexError("The provided max size is less than the "
-                             "number of iterations")
-        group = samples_group + '/{name}'
-        # loop over number of dimensions
-        for param in parameters:
-            dataset_name = group.format(name=param)
-            istart = start_iteration
-            try:
-                fp_niterations = fp[dataset_name].shape[-1]
-                if istart is None:
-                    istart = fp_niterations
-                istop = istart + niterations
-                if istop > fp_niterations:
-                    # resize the dataset
-                    fp[dataset_name].resize(istop, axis=2)
-            except KeyError:
-                # dataset doesn't exist yet
-                if istart is not None and istart != 0:
-                    raise ValueError("non-zero start_iteration provided, but "
-                                     "dataset doesn't exist yet")
-                istart = 0
-                istop = istart + niterations
-                fp.create_dataset(dataset_name, (ntemps, nwalkers, istop),
-                                  maxshape=(ntemps, nwalkers, max_iterations),
-                                  dtype=float, fletcher32=True)
-            fp[dataset_name][:, :, istart:istop] = samples[param]
-
-    def write_results(self, fp, start_iteration=None, max_iterations=None,
-                      **metadata):
-        """Writes metadata, samples, model stats, and acceptance fraction
-        to the given file. See the write function for each of those for
-        details.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-        \**metadata :
-            All other keyword arguments are passed to ``write_metadata``.
-        """
-        self.write_metadata(fp, **metadata)
-        self.write_chain(fp, start_iteration=start_iteration,
-                         max_iterations=max_iterations)
-        self.write_model_stats(fp, start_iteration=start_iteration,
-                               max_iterations=max_iterations)
-        self.write_acceptance_fraction(fp)
-        self.write_state(fp)
-
-    @staticmethod
-    def _read_fields(fp, fields_group, fields, array_class,
-                     thin_start=None, thin_interval=None, thin_end=None,
-                     iteration=None, temps=None, walkers=None, flatten=True):
-        """Base function for reading samples and model stats. See
-        `read_samples` and `read_model_stats` for details.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        fields_group : str
-            The name of the group to retrieve the desired fields.
-        fields : list
-            The list of field names to retrieve. Must be names of groups in
-            `fp[fields_group/]`.
-        array_class : FieldArray or similar
-            The type of array to return. Must have a `from_kwargs` attribute.
-
-        For other details on keyword arguments, see `read_samples` and
-        `read_model_stats`.
-
-        Returns
-        -------
-        array_class
-            An instance of the given array class populated with values
-            retrieved from the fields.
-        """
-        # walkers to load
-        if walkers is not None:
-            widx = numpy.zeros(fp.nwalkers, dtype=bool)
-            widx[walkers] = True
-            nwalkers = widx.sum()
+            checkpoint_interval = None
+        if cp.has_option(section, "logpost-function"):
+            lnpost = cp.get(section, "logpost-function")
         else:
-            widx = slice(None, None)
-            nwalkers = fp.nwalkers
-        # temperatures to load
-        selecttemps = False
-        if temps is None:
-            tidx = 0
-            ntemps = 1
-        elif isinstance(temps, int):
-            tidx = temps
-            ntemps = 1
+            lnpost = None
+        obj = cls(model, nwalkers, checkpoint_interval=checkpoint_interval,
+                  logpost_function=lnpost, nprocesses=nprocesses,
+                  use_mpi=use_mpi)
+        # get target
+        if cp.has_option(section, "niterations"):
+            niterations = int(cp.get(section, "niterations"))
         else:
-            # temps is either 'all' or a list of temperatures;
-            # in either case, we'll get all of the temperatures from the file;
-            # if not 'all', then we'll pull out the ones we want
-            tidx = slice(None, None)
-            selecttemps = temps != 'all'
-            if selecttemps:
-                ntemps = len(temps)
-            else:
-                ntemps = fp.ntemps
-        # get the slice to use
-        if iteration is not None:
-            get_index = iteration
-            niterations = 1
-        else:
-            if thin_end is None:
-                # use the number of current iterations
-                thin_end = fp.niterations
-            get_index = fp.get_slice(thin_start=thin_start, thin_end=thin_end,
-                                     thin_interval=thin_interval)
-            # we'll just get the number of iterations from the returned shape
             niterations = None
-        # load
-        arrays = {}
-        group = fields_group + '/{name}'
-        for name in fields:
-            arr = fp[group.format(name=name)][tidx, widx, get_index]
-            if niterations is None:
-                niterations = arr.shape[-1]
-            # pull out the temperatures we need
-            if selecttemps:
-                arr = arr[temps, ...]
-            if flatten:
-                arr = arr.flatten()
-            else:
-                # ensure that the returned array is 3D
-                arr = arr.reshape((ntemps, nwalkers, niterations))
-            arrays[name] = arr
-        return array_class.from_kwargs(**arrays)
-
-    @classmethod
-    def read_samples(cls, fp, parameters,
-                     thin_start=None, thin_interval=None, thin_end=None,
-                     iteration=None, temps=0, walkers=None, flatten=True,
-                     samples_group=None, array_class=None):
-        """Reads samples for the given parameter(s).
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        parameters : (list of) strings
-            The parameter(s) to retrieve. A parameter can be the name of any
-            field in `fp[fp.samples_group]`, a virtual field or method of
-            `FieldArray` (as long as the file contains the necessary fields
-            to derive the virtual field or method), and/or a function of
-            these.
-        thin_start : int
-            Index of the sample to begin returning samples. Default is to read
-            samples after burn in. To start from the beginning set thin_start
-            to 0.
-        thin_interval : int
-            Interval to accept every i-th sample. Default is to use the
-            `fp.acl`. If `fp.acl` is not set, then use all samples
-            (set thin_interval to 1).
-        thin_end : int
-            Index of the last sample to read. If not given then
-            `fp.niterations` is used.
-        iteration : int
-            Get a single iteration. If provided, will override the
-            `thin_{start/interval/end}` arguments.
-        walkers : {None, (list of) int}
-            The walker index (or a list of indices) to retrieve. If None,
-            samples from all walkers will be obtained.
-        temps : {None, (list of) int, 'all'}
-            The temperature index (or list of indices) to retrieve. If None,
-            only samples from the coldest (= 0) temperature chain will be
-            retrieved. To retrieve all temperates pass 'all', or a list of
-            all of the temperatures.
-        flatten : {True, bool}
-            The returned array will be one dimensional, with all desired
-            samples from all desired walkers concatenated together. If False,
-            the returned array will have dimension requested temps x requested
-            walkers x requested iterations.
-        samples_group : {None, str}
-            The group in `fp` from which to retrieve the parameter fields. If
-            None, searches in `fp.samples_group`.
-        array_class : {None, array class}
-            The type of array to return. The class must have a `from_kwargs`
-            class method and a `parse_parameters` method. If None, will return
-            a FieldArray.
-
-        Returns
-        -------
-        array_class
-            Samples for the given parameters, as an instance of a the given
-            `array_class` (`FieldArray` if `array_class` is None).
-        """
-        # get the group to load from
-        if samples_group is None:
-            samples_group = fp.samples_group
-        # get the type of array class to use
-        if array_class is None:
-            array_class = FieldArray
-        # get the names of fields needed for the given parameters
-        possible_fields = fp[samples_group].keys()
-        loadfields = array_class.parse_parameters(parameters, possible_fields)
-        return cls._read_fields(
-                fp, samples_group, loadfields, array_class,
-                thin_start=thin_start, thin_interval=thin_interval,
-                thin_end=thin_end, iteration=iteration, temps=temps,
-                walkers=walkers, flatten=flatten)
-
-    @classmethod
-    def compute_acfs(cls, fp, start_index=None, end_index=None,
-                     per_walker=False, walkers=None, parameters=None,
-                     temps=None):
-        """Computes the autocorrleation function of the model params in the
-        given file.
-
-        By default, parameter values are averaged over all walkers at each
-        iteration. The ACF is then calculated over the averaged chain for each
-        temperature. An ACF per-walker will be returned instead if
-        ``per_walker=True``.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        start_index : {None, int}
-            The start index to compute the acl from. If None, will try to use
-            the number of burn-in iterations in the file; otherwise, will start
-            at the first sample.
-        end_index : {None, int}
-            The end index to compute the acl to. If None, will go to the end
-            of the current iteration.
-        per_walker : optional, bool
-            Return the ACF for each walker separately. Default is False.
-        walkers : optional, int or array
-            Calculate the ACF using only the given walkers. If None (the
-            default) all walkers will be used.
-        parameters : optional, str or array
-            Calculate the ACF for only the given parameters. If None (the
-            default) will calculate the ACF for all of the model params.
-        temps : optional, (list of) int or 'all'
-            The temperature index (or list of indices) to retrieve. If None
-            (the default), the ACF will only be computed for the coldest (= 0)
-            temperature chain. To compute an ACF for all temperates pass 'all',
-            or a list of all of the temperatures.
-
-        Returns
-        -------
-        FieldArray
-            A ``FieldArray`` of the ACF vs iteration for each parameter. If
-            `per-walker` is True, the FieldArray will have shape
-            ``ntemps x nwalkers x niterations``. Otherwise, the returned
-            array will have shape ``ntemps x niterations``.
-        """
-        acfs = {}
-        if parameters is None:
-            parameters = fp.variable_params
-        if isinstance(parameters, str) or isinstance(parameters, unicode):
-            parameters = [parameters]
-        if isinstance(temps, int):
-            temps = [temps]
-        elif temps == 'all':
-            temps = numpy.arange(fp.ntemps)
-        elif temps is None:
-            temps = [0]
-        for param in parameters:
-            subacfs = []
-            for tk in temps:
-                if per_walker:
-                    # just call myself with a single walker
-                    if walkers is None:
-                        walkers = numpy.arange(fp.nwalkers)
-                    arrays = [cls.compute_acfs(fp, start_index=start_index,
-                                               end_index=end_index,
-                                               per_walker=False, walkers=ii,
-                                               parameters=param,
-                                               temps=tk)[param][0, :]
-                              for ii in walkers]
-                    # we'll stack all of the walker arrays to make a single
-                    # nwalkers x niterations array; when these are stacked
-                    # below, we'll get a ntemps x nwalkers x niterations array
-                    subacfs.append(numpy.vstack(arrays))
-                else:
-                    samples = cls.read_samples(fp, param,
-                                               thin_start=start_index,
-                                               thin_interval=1,
-                                               thin_end=end_index,
-                                               walkers=walkers, temps=tk,
-                                               flatten=False)[param]
-                    # contract the walker dimension using the mean, and flatten
-                    # the (length 1) temp dimension
-                    samples = samples.mean(axis=1)[0, :]
-                    thisacf = autocorrelation.calculate_acf(samples).numpy()
-                    subacfs.append(thisacf)
-            # stack the temperatures
-            # FIXME: the following if/else can be condensed to a single line
-            # using numpy.stack, once the version requirements are bumped to
-            # numpy >= 1.10
-            if per_walker:
-                nw, ni = subacfs[0].shape
-                acfs[param] = numpy.zeros((len(temps), nw, ni), dtype=float)
-                for tk in range(len(temps)):
-                    acfs[param][tk, ...] = subacfs[tk]
-            else:
-                acfs[param] = numpy.vstack(subacfs)
-        return FieldArray.from_kwargs(**acfs)
-
-    @classmethod
-    def compute_acls(cls, fp, start_index=None, end_index=None):
-        """Computes the autocorrleation length for all model params and
-        temperatures in the given file.
-
-        Parameter values are averaged over all walkers at each iteration and
-        temperature.  The ACL is then calculated over the averaged chain. If
-        the returned ACL is `inf`,  will default to the number of current
-        iterations.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        start_index : {None, int}
-            The start index to compute the acl from. If None, will try to use
-            the number of burn-in iterations in the file; otherwise, will start
-            at the first sample.
-        end_index : {None, int}
-            The end index to compute the acl to. If None, will go to the end
-            of the current iteration.
-
-        Returns
-        -------
-        dict
-            A dictionary of ntemps-long arrays of the ACLs of each parameter.
-        """
-        acls = {}
-        if end_index is None:
-            end_index = fp.niterations
-        tidx = numpy.arange(fp.ntemps)
-        for param in fp.variable_params:
-            these_acls = numpy.zeros(fp.ntemps, dtype=int)
-            for tk in tidx:
-                samples = cls.read_samples(fp, param, thin_start=start_index,
-                                           thin_interval=1, thin_end=end_index,
-                                           temps=tk, flatten=False)[param]
-                # contract the walker dimension using the mean, and flatten
-                # the (length 1) temp dimension
-                samples = samples.mean(axis=1)[0, :]
-                acl = autocorrelation.calculate_acl(samples)
-                if numpy.isinf(acl):
-                    acl = samples.size
-                these_acls[tk] = acl
-            acls[param] = these_acls
-        return acls
-
-    @classmethod
-    def calculate_logevidence(cls, fp, thin_start=None, thin_end=None,
-                              thin_interval=None):
-        """Calculates the log evidence from the given file using emcee's
-        thermodynamic integration.
-
-        Parameters
-        ----------
-        fp : InferenceFile
-            An open file handler to read the stats from.
-        thin_start : int
-            Index of the sample to begin returning stats. Default is to read
-            stats after burn in. To start from the beginning set thin_start
-            to 0.
-        thin_interval : int
-            Interval to accept every i-th sample. Default is to use the
-            `fp.acl`. If `fp.acl` is not set, then use all stats
-            (set thin_interval to 1).
-        thin_end : int
-            Index of the last sample to read. If not given then
-            `fp.niterations` is used.
-
-        Returns
-        -------
-        lnZ : float
-            The estimate of log of the evidence.
-        dlnZ : float
-            The error on the estimate.
-        """
+        if cp.has_option(section, "effective-nsamples"):
+            nsamples = int(cp.get(section, "effective-nsamples"))
+        else:
+            nsamples = None
+        obj.set_target(niterations=niterations, eff_nsamples=nsamples)
+        # add burn-in if it's specified
         try:
-            import emcee
-        except ImportError:
-            raise ImportError("emcee is not installed.")
-
-        stats_group = fp.stats_group
-        parameters = fp[stats_group].keys()
-        logstats = cls.read_samples(fp, parameters, samples_group=stats_group,
-                                    thin_start=thin_start,  thin_end=thin_end,
-                                    thin_interval=thin_interval,
-                                    temps='all', flatten=False)
-        # get the likelihoods
-        logls = logstats['loglr'] + fp.lognl
-        # we need the betas that were used
-        betas = fp.attrs['betas']
-        # annoyingly, theromdynaimc integration in PTSampler is an instance
-        # method, so we'll implement a dummy one
-        ntemps = fp.ntemps
-        nwalkers = fp.nwalkers
-        ndim = len(fp.variable_params)
-        dummy_sampler = emcee.PTSampler(ntemps, nwalkers, ndim, None,
-                                        None, betas=betas)
-        return dummy_sampler.thermodynamic_integration_log_evidence(
-            logls=logls, fburnin=0.)
+            bit = obj.burn_in_class.from_config(cp, obj)
+        except ConfigParser.Error:
+            bit = None
+        obj.set_burn_in(bit)
+        return obj
diff --git a/gwin/sampler/emcee_pt.py b/gwin/sampler/emcee_pt.py
new file mode 100644
index 0000000..cef83fd
--- /dev/null
+++ b/gwin/sampler/emcee_pt.py
@@ -0,0 +1,756 @@
+# Copyright (C) 2016  Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+"""
+This modules provides classes and functions for using the emcee sampler
+packages for parameter estimation.
+"""
+
+# The following two classes are needed for two reason
+# 1) pools freeze state when created and so classes *cannot be updated*
+# 2) methods cannot be pickled.
+
+
+class _callprior(object):
+    """Calls the model's prior function, and ensures that no
+    metadata is returned."""
+    def __init__(self, model_call):
+        self.callable = model_call
+
+    def __call__(self, args):
+        prior = self.callable(args, callfunc='prior')
+        return prior if isinstance(prior, numpy.float64) else prior[0]
+
+
+class _callloglikelihood(object):
+    """Calls the model's loglikelihood function.
+    """
+    def __init__(self, model_call):
+        self.callable = model_call
+
+    def __call__(self, args):
+        return self.callable(args, callfunc='loglikelihood')
+
+
+class EmceePTSampler(BaseMCMCSampler):
+    """This class is used to construct a parallel-tempered MCMC sampler from
+    the emcee package's PTSampler.
+
+    Parameters
+    ----------
+    model : model
+        A model from ``gwin.models``.
+    ntemps : int
+        Number of temeratures to use in the sampler.
+    nwalkers : int
+        Number of walkers to use in sampler.
+    pool : function with map, Optional
+        A provider of a map function that allows a function call to be run
+        over multiple sets of arguments and possibly maps them to
+        cores/nodes/etc.
+    """
+    name = "emcee_pt"
+
+    def __init__(self, model, ntemps, nwalkers, pool=None,
+                 model_call=None):
+
+        try:
+            import emcee
+        except ImportError:
+            raise ImportError("emcee is not installed.")
+
+        if model_call is None:
+            model_call = model
+
+        # construct the sampler: PTSampler needs the likelihood and prior
+        # functions separately
+        ndim = len(model.variable_params)
+        sampler = emcee.PTSampler(ntemps, nwalkers, ndim,
+                                  _callloglikelihood(model_call),
+                                  _callprior(model_call),
+                                  pool=pool)
+        # initialize
+        super(EmceePTSampler, self).__init__(
+              sampler, model)
+        self._nwalkers = nwalkers
+        self._ntemps = ntemps
+
+    @classmethod
+    def from_cli(cls, opts, model, pool=None,
+                 model_call=None):
+        """Create an instance of this sampler from the given command-line
+        options.
+
+        Parameters
+        ----------
+        opts : ArgumentParser options
+            The options to parse.
+        model : LikelihoodEvaluator
+            The model to use with the sampler.
+
+        Returns
+        -------
+        EmceePTSampler
+            An emcee sampler initialized based on the given arguments.
+        """
+        return cls(model, opts.ntemps, opts.nwalkers,
+                   pool=pool, model_call=model_call)
+
+    @property
+    def ntemps(self):
+        return self._ntemps
+
+    @property
+    def chain(self):
+        """Get all past samples as an ntemps x nwalker x niterations x ndim
+        array.
+        """
+        # emcee returns the chain as ntemps x nwalker x niterations x ndim
+        return self._sampler.chain
+
+    def clear_chain(self):
+        """Clears the chain and blobs from memory.
+        """
+        # store the iteration that the clear is occuring on
+        self.lastclear = self.niterations
+        # now clear the chain
+        self._sampler.reset()
+
+    @property
+    def model_stats(self):
+        """Returns the log likelihood ratio and log prior as a FieldArray.
+        The returned array has shape ntemps x nwalkers x niterations.
+        """
+        # likelihood has shape ntemps x nwalkers x niterations
+        logl = self._sampler.lnlikelihood
+        # get prior from posterior
+        logp = self._sampler.lnprobability - logl
+        # compute the likelihood ratio
+        loglr = logl - self.model.lognl
+        kwargs = {'loglr': loglr, 'prior': logp}
+        # if different coordinates were used for sampling, get the jacobian
+        if self.model.sampling_transforms is not None:
+            samples = self.samples
+            # convert to dict
+            d = {param: samples[param] for param in samples.fieldnames}
+            logj = self.model.logjacobian(**d)
+            kwargs['logjacobian'] = logj
+        return FieldArray.from_kwargs(**kwargs)
+
+    @property
+    def lnpost(self):
+        """Get the natural logarithm of the likelihood + the prior as an
+        ntemps x nwalkers x niterations array.
+        """
+        # emcee returns ntemps x nwalkers x niterations
+        return self._sampler.lnprobability
+
+    def set_p0(self, samples_file=None, prior=None):
+        """Sets the initial position of the walkers.
+
+        Parameters
+        ----------
+        samples_file : InferenceFile, optional
+            If provided, use the last iteration in the given file for the
+            starting positions.
+        prior : JointDistribution, optional
+            Use the given prior to set the initial positions rather than
+            ``model``'s prior.
+
+        Returns
+        -------
+        p0 : array
+            An ntemps x nwalkers x ndim array of the initial positions that
+            were set.
+        """
+        # create a (nwalker, ndim) array for initial positions
+        ntemps = self.ntemps
+        nwalkers = self.nwalkers
+        ndim = len(self.variable_params)
+        p0 = numpy.ones((ntemps, nwalkers, ndim))
+        # if samples are given then use those as initial positions
+        if samples_file is not None:
+            samples = self.read_samples(samples_file, self.variable_params,
+                                        iteration=-1, temps='all',
+                                        flatten=False)[..., 0]
+            # transform to sampling parameter space
+            samples = self.model.apply_sampling_transforms(
+                samples)
+        # draw random samples if samples are not provided
+        else:
+            samples = self.model.prior_rvs(
+                size=nwalkers*ntemps, prior=prior).reshape((ntemps, nwalkers))
+        # convert to array
+        for i, param in enumerate(self.sampling_params):
+            p0[..., i] = samples[param]
+        self._p0 = p0
+        return p0
+
+    def run(self, niterations, **kwargs):
+        """Advance the ensemble for a number of samples.
+
+        Parameters
+        ----------
+        niterations : int
+            Number of samples to get from sampler.
+
+        Returns
+        -------
+        p : numpy.array
+            An array of current walker positions with shape (nwalkers, ndim).
+        lnpost : numpy.array
+            The list of log posterior probabilities for the walkers at
+            positions p, with shape (nwalkers, ndim).
+        rstate :
+            The current state of the random number generator.
+        """
+        pos = self._pos
+        if pos is None:
+            pos = self.p0
+        res = self._sampler.run_mcmc(pos, niterations, **kwargs)
+        p, lnpost, rstate = res[0], res[1], res[2]
+        # update the positions
+        self._pos = p
+        return p, lnpost, rstate
+
+    # read/write functions
+
+    # add ntemps and betas to metadata
+    def write_metadata(self, fp, **kwargs):
+        """Writes metadata about this sampler to the given file. Metadata is
+        written to the file's `attrs`.
+
+        Parameters
+        ----------
+        fp : InferenceFile
+            A file handler to an open inference file.
+        **kwargs :
+            All keyword arguments are saved as separate arguments in the
+            file attrs. If any keyword argument is a dictionary, the keyword
+            will point to the list of keys in the the file's ``attrs``. Each
+            key is then stored as a separate attr with its corresponding value.
+        """
+        super(EmceePTSampler, self).write_metadata(fp, **kwargs)
+        fp.attrs["ntemps"] = self.ntemps
+        fp.attrs["betas"] = self._sampler.betas
+
+    def write_acceptance_fraction(self, fp):
+        """Write acceptance_fraction data to file. Results are written to
+        `fp[acceptance_fraction/temp{k}]` where k is the temperature.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            A file handler to an open inference file.
+        """
+        group = "acceptance_fraction/temp{tk}"
+        # acf has shape ntemps x nwalkers
+        acf = self.acceptance_fraction
+        for tk in range(fp.ntemps):
+            try:
+                fp[group.format(tk=tk)][:] = acf[tk, :]
+            except KeyError:
+                # dataset doesn't exist yet, create it
+                fp[group.format(tk=tk)] = acf[tk, :]
+
+    @staticmethod
+    def read_acceptance_fraction(fp, temps=None, walkers=None):
+        """Reads the acceptance fraction from the given file.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            An open file handler to read the samples from.
+        temps : {None, (list of) int}
+            The temperature index (or a list of indices) to retrieve. If None,
+            acfs from all temperatures and all walkers will be retrieved.
+        walkers : {None, (list of) int}
+            The walker index (or a list of indices) to retrieve. If None,
+            samples from all walkers will be obtained.
+
+        Returns
+        -------
+        array
+            Array of acceptance fractions with shape (requested temps,
+            requested walkers).
+        """
+        group = 'acceptance_fraction/temp{tk}'
+        if temps is None:
+            temps = numpy.arange(fp.ntemps)
+        if walkers is None:
+            wmask = numpy.ones(fp.nwalkers, dtype=bool)
+        else:
+            wmask = numpy.zeros(fp.nwalkers, dtype=bool)
+            wmask[walkers] = True
+        arrays = []
+        for tk in temps:
+            arrays.extend(fp[group.format(tk=tk)][wmask])
+        return arrays
+
+    @staticmethod
+    def write_samples_group(fp, samples_group, parameters, samples,
+                            start_iteration=None, max_iterations=None):
+        """Writes samples to the given file.
+
+        Results are written to:
+
+            ``fp[samples_group/{vararg}]``,
+
+        where ``{vararg}`` is the name of a variable arg. The samples are
+        written as an ``ntemps x nwalkers x niterations`` array.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            A file handler to an open inference file.
+        samples_group : str
+            Name of samples group to write.
+        parameters : list
+            The parameters to write to the file.
+        samples : FieldArray
+            The samples to write. Should be a FieldArray with fields containing
+            the samples to write and shape nwalkers x niterations.
+        start_iteration : int, optional
+            Write results to the file's datasets starting at the given
+            iteration. Default is to append after the last iteration in the
+            file.
+        max_iterations : int, optional
+            Set the maximum size that the arrays in the hdf file may be resized
+            to. Only applies if the samples have not previously been written
+            to file. The default (None) is to use the maximum size allowed by
+            h5py.
+        """
+        ntemps, nwalkers, niterations = samples.shape
+        if max_iterations is not None and max_iterations < niterations:
+            raise IndexError("The provided max size is less than the "
+                             "number of iterations")
+        group = samples_group + '/{name}'
+        # loop over number of dimensions
+        for param in parameters:
+            dataset_name = group.format(name=param)
+            istart = start_iteration
+            try:
+                fp_niterations = fp[dataset_name].shape[-1]
+                if istart is None:
+                    istart = fp_niterations
+                istop = istart + niterations
+                if istop > fp_niterations:
+                    # resize the dataset
+                    fp[dataset_name].resize(istop, axis=2)
+            except KeyError:
+                # dataset doesn't exist yet
+                if istart is not None and istart != 0:
+                    raise ValueError("non-zero start_iteration provided, but "
+                                     "dataset doesn't exist yet")
+                istart = 0
+                istop = istart + niterations
+                fp.create_dataset(dataset_name, (ntemps, nwalkers, istop),
+                                  maxshape=(ntemps, nwalkers, max_iterations),
+                                  dtype=float, fletcher32=True)
+            fp[dataset_name][:, :, istart:istop] = samples[param]
+
+    def write_results(self, fp, start_iteration=None, max_iterations=None,
+                      **metadata):
+        """Writes metadata, samples, model stats, and acceptance fraction
+        to the given file. See the write function for each of those for
+        details.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            A file handler to an open inference file.
+        start_iteration : int, optional
+            Write results to the file's datasets starting at the given
+            iteration. Default is to append after the last iteration in the
+            file.
+        max_iterations : int, optional
+            Set the maximum size that the arrays in the hdf file may be resized
+            to. Only applies if the samples have not previously been written
+            to file. The default (None) is to use the maximum size allowed by
+            h5py.
+        \**metadata :
+            All other keyword arguments are passed to ``write_metadata``.
+        """
+        self.write_metadata(fp, **metadata)
+        self.write_chain(fp, start_iteration=start_iteration,
+                         max_iterations=max_iterations)
+        self.write_model_stats(fp, start_iteration=start_iteration,
+                               max_iterations=max_iterations)
+        self.write_acceptance_fraction(fp)
+        self.write_state(fp)
+
+    @staticmethod
+    def _read_fields(fp, fields_group, fields, array_class,
+                     thin_start=None, thin_interval=None, thin_end=None,
+                     iteration=None, temps=None, walkers=None, flatten=True):
+        """Base function for reading samples and model stats. See
+        `read_samples` and `read_model_stats` for details.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            An open file handler to read the samples from.
+        fields_group : str
+            The name of the group to retrieve the desired fields.
+        fields : list
+            The list of field names to retrieve. Must be names of groups in
+            `fp[fields_group/]`.
+        array_class : FieldArray or similar
+            The type of array to return. Must have a `from_kwargs` attribute.
+
+        For other details on keyword arguments, see `read_samples` and
+        `read_model_stats`.
+
+        Returns
+        -------
+        array_class
+            An instance of the given array class populated with values
+            retrieved from the fields.
+        """
+        # walkers to load
+        if walkers is not None:
+            widx = numpy.zeros(fp.nwalkers, dtype=bool)
+            widx[walkers] = True
+            nwalkers = widx.sum()
+        else:
+            widx = slice(None, None)
+            nwalkers = fp.nwalkers
+        # temperatures to load
+        selecttemps = False
+        if temps is None:
+            tidx = 0
+            ntemps = 1
+        elif isinstance(temps, int):
+            tidx = temps
+            ntemps = 1
+        else:
+            # temps is either 'all' or a list of temperatures;
+            # in either case, we'll get all of the temperatures from the file;
+            # if not 'all', then we'll pull out the ones we want
+            tidx = slice(None, None)
+            selecttemps = temps != 'all'
+            if selecttemps:
+                ntemps = len(temps)
+            else:
+                ntemps = fp.ntemps
+        # get the slice to use
+        if iteration is not None:
+            get_index = iteration
+            niterations = 1
+        else:
+            if thin_end is None:
+                # use the number of current iterations
+                thin_end = fp.niterations
+            get_index = fp.get_slice(thin_start=thin_start, thin_end=thin_end,
+                                     thin_interval=thin_interval)
+            # we'll just get the number of iterations from the returned shape
+            niterations = None
+        # load
+        arrays = {}
+        group = fields_group + '/{name}'
+        for name in fields:
+            arr = fp[group.format(name=name)][tidx, widx, get_index]
+            if niterations is None:
+                niterations = arr.shape[-1]
+            # pull out the temperatures we need
+            if selecttemps:
+                arr = arr[temps, ...]
+            if flatten:
+                arr = arr.flatten()
+            else:
+                # ensure that the returned array is 3D
+                arr = arr.reshape((ntemps, nwalkers, niterations))
+            arrays[name] = arr
+        return array_class.from_kwargs(**arrays)
+
+    @classmethod
+    def read_samples(cls, fp, parameters,
+                     thin_start=None, thin_interval=None, thin_end=None,
+                     iteration=None, temps=0, walkers=None, flatten=True,
+                     samples_group=None, array_class=None):
+        """Reads samples for the given parameter(s).
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            An open file handler to read the samples from.
+        parameters : (list of) strings
+            The parameter(s) to retrieve. A parameter can be the name of any
+            field in `fp[fp.samples_group]`, a virtual field or method of
+            `FieldArray` (as long as the file contains the necessary fields
+            to derive the virtual field or method), and/or a function of
+            these.
+        thin_start : int
+            Index of the sample to begin returning samples. Default is to read
+            samples after burn in. To start from the beginning set thin_start
+            to 0.
+        thin_interval : int
+            Interval to accept every i-th sample. Default is to use the
+            `fp.acl`. If `fp.acl` is not set, then use all samples
+            (set thin_interval to 1).
+        thin_end : int
+            Index of the last sample to read. If not given then
+            `fp.niterations` is used.
+        iteration : int
+            Get a single iteration. If provided, will override the
+            `thin_{start/interval/end}` arguments.
+        walkers : {None, (list of) int}
+            The walker index (or a list of indices) to retrieve. If None,
+            samples from all walkers will be obtained.
+        temps : {None, (list of) int, 'all'}
+            The temperature index (or list of indices) to retrieve. If None,
+            only samples from the coldest (= 0) temperature chain will be
+            retrieved. To retrieve all temperates pass 'all', or a list of
+            all of the temperatures.
+        flatten : {True, bool}
+            The returned array will be one dimensional, with all desired
+            samples from all desired walkers concatenated together. If False,
+            the returned array will have dimension requested temps x requested
+            walkers x requested iterations.
+        samples_group : {None, str}
+            The group in `fp` from which to retrieve the parameter fields. If
+            None, searches in `fp.samples_group`.
+        array_class : {None, array class}
+            The type of array to return. The class must have a `from_kwargs`
+            class method and a `parse_parameters` method. If None, will return
+            a FieldArray.
+
+        Returns
+        -------
+        array_class
+            Samples for the given parameters, as an instance of a the given
+            `array_class` (`FieldArray` if `array_class` is None).
+        """
+        # get the group to load from
+        if samples_group is None:
+            samples_group = fp.samples_group
+        # get the type of array class to use
+        if array_class is None:
+            array_class = FieldArray
+        # get the names of fields needed for the given parameters
+        possible_fields = fp[samples_group].keys()
+        loadfields = array_class.parse_parameters(parameters, possible_fields)
+        return cls._read_fields(
+                fp, samples_group, loadfields, array_class,
+                thin_start=thin_start, thin_interval=thin_interval,
+                thin_end=thin_end, iteration=iteration, temps=temps,
+                walkers=walkers, flatten=flatten)
+
+    @classmethod
+    def compute_acfs(cls, fp, start_index=None, end_index=None,
+                     per_walker=False, walkers=None, parameters=None,
+                     temps=None):
+        """Computes the autocorrleation function of the model params in the
+        given file.
+
+        By default, parameter values are averaged over all walkers at each
+        iteration. The ACF is then calculated over the averaged chain for each
+        temperature. An ACF per-walker will be returned instead if
+        ``per_walker=True``.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            An open file handler to read the samples from.
+        start_index : {None, int}
+            The start index to compute the acl from. If None, will try to use
+            the number of burn-in iterations in the file; otherwise, will start
+            at the first sample.
+        end_index : {None, int}
+            The end index to compute the acl to. If None, will go to the end
+            of the current iteration.
+        per_walker : optional, bool
+            Return the ACF for each walker separately. Default is False.
+        walkers : optional, int or array
+            Calculate the ACF using only the given walkers. If None (the
+            default) all walkers will be used.
+        parameters : optional, str or array
+            Calculate the ACF for only the given parameters. If None (the
+            default) will calculate the ACF for all of the model params.
+        temps : optional, (list of) int or 'all'
+            The temperature index (or list of indices) to retrieve. If None
+            (the default), the ACF will only be computed for the coldest (= 0)
+            temperature chain. To compute an ACF for all temperates pass 'all',
+            or a list of all of the temperatures.
+
+        Returns
+        -------
+        FieldArray
+            A ``FieldArray`` of the ACF vs iteration for each parameter. If
+            `per-walker` is True, the FieldArray will have shape
+            ``ntemps x nwalkers x niterations``. Otherwise, the returned
+            array will have shape ``ntemps x niterations``.
+        """
+        acfs = {}
+        if parameters is None:
+            parameters = fp.variable_params
+        if isinstance(parameters, str) or isinstance(parameters, unicode):
+            parameters = [parameters]
+        if isinstance(temps, int):
+            temps = [temps]
+        elif temps == 'all':
+            temps = numpy.arange(fp.ntemps)
+        elif temps is None:
+            temps = [0]
+        for param in parameters:
+            subacfs = []
+            for tk in temps:
+                if per_walker:
+                    # just call myself with a single walker
+                    if walkers is None:
+                        walkers = numpy.arange(fp.nwalkers)
+                    arrays = [cls.compute_acfs(fp, start_index=start_index,
+                                               end_index=end_index,
+                                               per_walker=False, walkers=ii,
+                                               parameters=param,
+                                               temps=tk)[param][0, :]
+                              for ii in walkers]
+                    # we'll stack all of the walker arrays to make a single
+                    # nwalkers x niterations array; when these are stacked
+                    # below, we'll get a ntemps x nwalkers x niterations array
+                    subacfs.append(numpy.vstack(arrays))
+                else:
+                    samples = cls.read_samples(fp, param,
+                                               thin_start=start_index,
+                                               thin_interval=1,
+                                               thin_end=end_index,
+                                               walkers=walkers, temps=tk,
+                                               flatten=False)[param]
+                    # contract the walker dimension using the mean, and flatten
+                    # the (length 1) temp dimension
+                    samples = samples.mean(axis=1)[0, :]
+                    thisacf = autocorrelation.calculate_acf(samples).numpy()
+                    subacfs.append(thisacf)
+            # stack the temperatures
+            # FIXME: the following if/else can be condensed to a single line
+            # using numpy.stack, once the version requirements are bumped to
+            # numpy >= 1.10
+            if per_walker:
+                nw, ni = subacfs[0].shape
+                acfs[param] = numpy.zeros((len(temps), nw, ni), dtype=float)
+                for tk in range(len(temps)):
+                    acfs[param][tk, ...] = subacfs[tk]
+            else:
+                acfs[param] = numpy.vstack(subacfs)
+        return FieldArray.from_kwargs(**acfs)
+
+    @classmethod
+    def compute_acls(cls, fp, start_index=None, end_index=None):
+        """Computes the autocorrleation length for all model params and
+        temperatures in the given file.
+
+        Parameter values are averaged over all walkers at each iteration and
+        temperature.  The ACL is then calculated over the averaged chain. If
+        the returned ACL is `inf`,  will default to the number of current
+        iterations.
+
+        Parameters
+        -----------
+        fp : InferenceFile
+            An open file handler to read the samples from.
+        start_index : {None, int}
+            The start index to compute the acl from. If None, will try to use
+            the number of burn-in iterations in the file; otherwise, will start
+            at the first sample.
+        end_index : {None, int}
+            The end index to compute the acl to. If None, will go to the end
+            of the current iteration.
+
+        Returns
+        -------
+        dict
+            A dictionary of ntemps-long arrays of the ACLs of each parameter.
+        """
+        acls = {}
+        if end_index is None:
+            end_index = fp.niterations
+        tidx = numpy.arange(fp.ntemps)
+        for param in fp.variable_params:
+            these_acls = numpy.zeros(fp.ntemps, dtype=int)
+            for tk in tidx:
+                samples = cls.read_samples(fp, param, thin_start=start_index,
+                                           thin_interval=1, thin_end=end_index,
+                                           temps=tk, flatten=False)[param]
+                # contract the walker dimension using the mean, and flatten
+                # the (length 1) temp dimension
+                samples = samples.mean(axis=1)[0, :]
+                acl = autocorrelation.calculate_acl(samples)
+                if numpy.isinf(acl):
+                    acl = samples.size
+                these_acls[tk] = acl
+            acls[param] = these_acls
+        return acls
+
+    @classmethod
+    def calculate_logevidence(cls, fp, thin_start=None, thin_end=None,
+                              thin_interval=None):
+        """Calculates the log evidence from the given file using emcee's
+        thermodynamic integration.
+
+        Parameters
+        ----------
+        fp : InferenceFile
+            An open file handler to read the stats from.
+        thin_start : int
+            Index of the sample to begin returning stats. Default is to read
+            stats after burn in. To start from the beginning set thin_start
+            to 0.
+        thin_interval : int
+            Interval to accept every i-th sample. Default is to use the
+            `fp.acl`. If `fp.acl` is not set, then use all stats
+            (set thin_interval to 1).
+        thin_end : int
+            Index of the last sample to read. If not given then
+            `fp.niterations` is used.
+
+        Returns
+        -------
+        lnZ : float
+            The estimate of log of the evidence.
+        dlnZ : float
+            The error on the estimate.
+        """
+        try:
+            import emcee
+        except ImportError:
+            raise ImportError("emcee is not installed.")
+
+        stats_group = fp.stats_group
+        parameters = fp[stats_group].keys()
+        logstats = cls.read_samples(fp, parameters, samples_group=stats_group,
+                                    thin_start=thin_start,  thin_end=thin_end,
+                                    thin_interval=thin_interval,
+                                    temps='all', flatten=False)
+        # get the likelihoods
+        logls = logstats['loglr'] + fp.lognl
+        # we need the betas that were used
+        betas = fp.attrs['betas']
+        # annoyingly, theromdynaimc integration in PTSampler is an instance
+        # method, so we'll implement a dummy one
+        ntemps = fp.ntemps
+        nwalkers = fp.nwalkers
+        ndim = len(fp.variable_params)
+        dummy_sampler = emcee.PTSampler(ntemps, nwalkers, ndim, None,
+                                        None, betas=betas)
+        return dummy_sampler.thermodynamic_integration_log_evidence(
+            logls=logls, fburnin=0.)

From 3f603715fa7e4688c58b4ab036cbd66b2d40b793 Mon Sep 17 00:00:00 2001
From: Collin Capano <cdcapano@gmail.com>
Date: Tue, 25 Sep 2018 11:13:14 +0200
Subject: [PATCH 2/3] Update plotting (#71)

* move results from cli functions to io module; add ability to specify different options for different input files

* make write_kwargs_to_attrs a classmethod to cyclic imports

* use argparse action to parse parameters option

* move injections_from_cli to __init__; add a read_injections method to base_hdf

* use parameters opt to load parameters in base_mcmc

* move get_file_type to io; fix bugs

* fix import error

* create a custom ArgumentParser

* allow input-file option to accept labels

* move param parsing to option_utils and remove unused functions

* update create_multidim plot

* update plot_posterior

* switch from normed to density to please matplotlib

* update plot_posterior

* fix plotting injection parameters

* move thin options to base_mcmc; add ability to skip arguments

* update plot_movie

* fix pep8 issues
---
 bin/gwin_plot_movie                |  61 ++--
 bin/gwin_plot_posterior            |  87 ++----
 gwin/io/__init__.py                | 439 +++++++++++++++++++++++++-
 gwin/io/base_hdf.py                | 169 ++++++++--
 gwin/io/base_mcmc.py               |  75 ++++-
 gwin/models/base.py                |   3 +-
 gwin/option_utils.py               | 478 ++++++-----------------------
 gwin/results/scatter_histograms.py |  12 +-
 8 files changed, 824 insertions(+), 500 deletions(-)

diff --git a/bin/gwin_plot_movie b/bin/gwin_plot_movie
index 7c79530..0face6a 100644
--- a/bin/gwin_plot_movie
+++ b/bin/gwin_plot_movie
@@ -41,7 +41,7 @@ from matplotlib import pyplot
 import pycbc.results
 from pycbc import transforms
 
-from gwin import (__version__, option_utils)
+from gwin import (__version__, option_utils, io)
 from gwin.results.scatter_histograms import (create_multidim_plot,
                                              get_scale_fac)
 
@@ -91,12 +91,12 @@ def integer_logspace(start, end, num):
     out[start_idx:len(x)+start_idx] = x
     return out
 
-parser = argparse.ArgumentParser()
-
+# we won't add thinning arguments nor iteration, since this is determined by
+# the frame number/step options
+skip_args = ['thin-start', 'thin-interval', 'thin-end', 'iteration']
+parser = io.ResultsArgumentParser(skip_args=skip_args)
 parser.add_argument("--version", action="version", version=__version__,
                     help="show version number and exit")
-parser.add_argument("--input-file", type=str, required=True,
-                    help="Results file path.")
 parser.add_argument("--start-sample", type=int, default=1,
                     help="Start sample for the first frame. Note: sample "
                          "counting starts from 1. Default is 1.")
@@ -117,10 +117,6 @@ parser.add_argument("--log-steps", action="store_true", default=False,
 parser.add_argument("--output-prefix", type=str, required=True,
                     help="Output path and prefix for the frame files "
                          "(without extension).")
-parser.add_argument("--parameters", type=str, nargs="+",
-                    metavar="PARAM[:LABEL]",
-                    help="Name of parameters to plot in same format "
-                         "as for pycbc_inference_plot_posterior.")
 parser.add_argument('--verbose', action='store_true')
 parser.add_argument('--dpi', type=int, default=200,
                     help='Set the dpi for each frame; default is 200')
@@ -144,10 +140,12 @@ option_utils.add_density_option_group(parser)
 opts = parser.parse_args()
 pycbc.init_logging(opts.verbose)
 
+if len(opts.input_file) > 1:
+    raise ValueError("this program can only plot one file at a time")
+
 # Get data
 logging.info('Loading parameters')
-fp, parameters, labels, _ = option_utils.results_from_cli(opts,
-                            load_samples=False)
+fp, parameters, labels, _ = io.results_from_cli(opts, load_samples=False)
 
 if opts.end_sample is None:
     opts.end_sample = fp.niterations
@@ -190,31 +188,29 @@ else:
     raise ValueError("At least one of frame-number or frame-step must be "
                      "provided.")
 
-# get samples from InferenceFile
+# get the samples
 file_parameters, trans = transforms.get_common_cbc_transforms(
                                          parameters, fp.variable_params)
-samples = fp.read_samples(file_parameters, thin_start=thin_start,
-                          thin_interval=thinint, thin_end=thin_end,
-                          iteration=itermask, flatten=False)
+samples = fp.samples_from_cli(opts, file_parameters, thin_start=thin_start,
+                              thin_interval=thinint, thin_end=thin_end,
+                              iteration=itermask, flatten=False)
 samples = transforms.apply_transforms(samples, trans)
 if samples.ndim > 2:
-    # multi-tempered samplers will return a 3 dims, so flatten
+    # multi-tempered samplers will return 3 dims, so flatten
     _, ii, jj = samples.shape
     samples = samples.reshape((ii, jj))
 
 # Get z-values
 if opts.z_arg is not None:
-    logging.info("Getting model stats")
-    z_arg, zlbl = option_utils.parse_parameters_opt([opts.z_arg])
-    z_arg = z_arg[0]
-    zlbl = zlbl[z_arg]
-    model_stats = fp.read_model_stats(thin_start=thin_start,
-        thin_end=thin_end, thin_interval=thinint, iteration=itermask,
-        flatten=False)
-    if model_stats.ndim > 2:
-        _, ii, jj = model_stats.shape
-        model_stats = model_stats.reshape((ii, jj))
-    zvals = model_stats[z_arg]
+    logging.info("Getting samples for colorbar")
+    zsamples = fp.samples_from_cli(opts, opts.z_arg, thin_start=thin_start,
+                                   thin_interval=thinint, thin_end=thin_end,
+                                   iteration=itermask, flatten=False)
+    zlbl = opts.z_arg_labels[opts.z_arg]
+    if zsamples.ndim > 2:
+        _, ii, jj = zsamples.shape
+        zsamples = zsamples.reshape((ii, jj))
+    zvals = zsamples[opts.z_arg]
     show_colorbar = True
     # Set common min and max for colorbar in all plots
     if opts.vmin is None:
@@ -236,10 +232,16 @@ fp.close()
 # get injection values if desired
 expected_parameters = {}
 if opts.plot_injection_parameters:
-    injections = option_utils.injections_from_cli(opts)
+    injections = io.injections_from_cli(opts)
     for p in parameters:
         # check that all of the injections are the same
-        unique_vals = numpy.unique(injections[p])
+        try:
+            vals = injections[p]
+        except NameError:
+            # injection doesn't have this parameter, skip
+            logging.warn("Could not find injection parameter {}".format(p))
+            continue
+        unique_vals = numpy.unique(vals)
         if unique_vals.size != 1:
             raise ValueError("More than one injection found! To use "
                 "plot-injection-parameters, there must be a single unique "
@@ -247,6 +249,7 @@ if opts.plot_injection_parameters:
                 "option to specify an expected parameter instead.")
         # passed: use the value for the expected
         expected_parameters[p] = unique_vals[0]
+
 # get expected parameter values from command line
 expected_parameters.update(option_utils.expected_parameters_from_cli(opts))
 expected_parameters_color = opts.expected_parameters_color
diff --git a/bin/gwin_plot_posterior b/bin/gwin_plot_posterior
index ce2026f..95598ab 100644
--- a/bin/gwin_plot_posterior
+++ b/bin/gwin_plot_posterior
@@ -37,88 +37,62 @@ from matplotlib import (patches, use)
 import pycbc
 import pycbc.version
 from pycbc.results import metadata
-from pycbc.results.scatter_histograms import create_multidim_plot
 
-from gwin import (__version__, option_utils)
+from gwin import (__version__, option_utils, io)
+from gwin.results.scatter_histograms import create_multidim_plot
 
 use('agg')
 
 # add options to command line
-parser = argparse.ArgumentParser()
+parser = io.ResultsArgumentParser()
+# program-specific
 parser.add_argument("--version", action="version", version=__version__,
                     help="Prints version information.")
 parser.add_argument("--output-file", type=str, required=True,
                     help="Output plot path.")
 parser.add_argument("--verbose", action="store_true", default=False,
                     help="Be verbose")
-parser.add_argument("--input-file-labels", nargs="+", default=None,
-                    help="Labels to add to plot if using more than one"
-                         "input file.")
-
 # add options for what plots to create
 option_utils.add_plot_posterior_option_group(parser)
-
 # scatter configuration
 option_utils.add_scatter_option_group(parser)
-
 # density configuration
 option_utils.add_density_option_group(parser)
 
-# add standard option utils
-option_utils.add_inference_results_option_group(parser)
-
 # parse command line
 opts = parser.parse_args()
 
 # set logging
 pycbc.init_logging(opts.verbose)
 
-# get parameters
-logging.info("Loading parameters")
-fp, parameters, labels, samples = option_utils.results_from_cli(opts)
+# load the samples
+fps, parameters, labels, samples = io.results_from_cli(opts)
 
 # typecast to list so the input files can be iterated over
-fp = fp if isinstance(fp, list) else [fp]
-parameters = parameters if isinstance(parameters[0], list) else [parameters]
-labels = labels if isinstance(labels[0], list) else [labels]
+fps = fps if isinstance(fps, list) else [fps]
 samples = samples if isinstance(samples, list) else [samples]
 
-# get likelihood statistic values
+# if a z-arg is specified, load samples for it
 if opts.z_arg is not None:
-    logging.info("Getting model stats")
-
-    z_arg, f_zlbl = option_utils.parse_parameters_opt([opts.z_arg])
-    z_arg = z_arg[0]
-    f_zlbl = f_zlbl[z_arg]
-
-    # lists to hold z-axis values and labels for each input file
+    logging.info("Getting samples for colorbar")
+    zlbl = opts.z_arg_labels[opts.z_arg]
     zvals = []
-    zlbl = []
-
-    # loop over each input file and append z-axis values and labels to lists
-    for f in fp:
-        model_stats = f.read_model_stats(
-            thin_start=opts.thin_start, thin_end=opts.thin_end,
-            thin_interval=opts.thin_interval, iteration=opts.iteration)
-        zvals.append(model_stats[z_arg])
-        zlbl.append(f_zlbl)
-        f.close()
-
-# else there are no z-axis values
+    for fp in fps:
+        zsamples = fp.samples_from_cli(opts, parameters=opts.z_arg)
+        zvals.append(zsamples[opts.z_arg])
 else:
     zvals = None
     zlbl = None
-    for f in fp:
-        f.close()
 
-# determine if colorbar should be shown
-show_colorbar = True if opts.z_arg else False
+# closet the files, we don't need them anymore
+for fp in fps:
+    fp.close()
 
 # if no plotting options selected, then the default options are based
 # on the number of parameters
 plot_options = [opts.plot_marginal, opts.plot_scatter, opts.plot_density]
 if not numpy.any(plot_options):
-    if len(parameters[0]) == 1:
+    if len(parameters) == 1:
         opts.plot_marginal = True
     else:
         opts.plot_scatter = True
@@ -132,7 +106,7 @@ if not numpy.any(plot_options):
 mins, maxs = option_utils.plot_ranges_from_cli(opts)
 
 # add any missing parameters
-for p in parameters[0]:
+for p in parameters:
     if p not in mins:
         mins[p] = numpy.array([s[p].min() for s in samples]).min()
     if p not in maxs:
@@ -141,10 +115,16 @@ for p in parameters[0]:
 # get injection values if desired
 expected_parameters = {}
 if opts.plot_injection_parameters:
-    injections = option_utils.injections_from_cli(opts)
-    for p in parameters[0]:
+    injections = io.injections_from_cli(opts)
+    for p in parameters:
         # check that all of the injections are the same
-        unique_vals = numpy.unique(injections[p])
+        try:
+            vals = injections[p]
+        except NameError:
+            # injection doesn't have this parameter, skip
+            logging.warn("Could not find injection parameter {}".format(p))
+            continue
+        unique_vals = numpy.unique(vals)
         if unique_vals.size != 1:
             raise ValueError("More than one injection found! To use "
                 "plot-injection-parameters, there must be a single unique "
@@ -162,7 +142,7 @@ colors = itertools.cycle(["black"] + ["C{}".format(i) for i in range(10)])
 # plot each input file
 logging.info("Plotting")
 hist_colors = []
-for i, (p, l, s) in enumerate(zip(parameters, labels, samples)):
+for (i, s) in enumerate(samples):
 
     # on first iteration create figure otherwise update old figure
     if i == 0:
@@ -185,13 +165,13 @@ for i, (p, l, s) in enumerate(zip(parameters, labels, samples)):
 
     # plot
     fig, axis_dict = create_multidim_plot(
-                    p, s, labels=l, fig=fig, axis_dict=axis_dict,
+                    parameters, s, labels=labels, fig=fig, axis_dict=axis_dict,
                     plot_marginal=opts.plot_marginal,
                     marginal_percentiles=opts.marginal_percentiles,
                     plot_scatter=opts.plot_scatter,
                     zvals=zvals[i] if zvals is not None else None,
-                    show_colorbar=show_colorbar,
-                    cbar_label=zlbl[i] if zlbl is not None else None,
+                    show_colorbar=opts.z_arg is not None,
+                    cbar_label=zlbl,
                     vmin=opts.vmin, vmax=opts.vmax,
                     scatter_cmap=opts.scatter_cmap,
                     plot_density=opts.plot_density,
@@ -208,7 +188,7 @@ for i, (p, l, s) in enumerate(zip(parameters, labels, samples)):
                     expected_parameters_color=opts.expected_parameters_color)
 
 # add legend to upper right for input files
-if opts.input_file_labels:
+if len(opts.input_file) > 1:
     handles = []
     for color, label in zip(hist_colors, opts.input_file_labels):
         handles.append(patches.Patch(color=color, label=label))
@@ -218,9 +198,6 @@ if opts.input_file_labels:
 # set DPI
 fig.set_dpi(200)
 
-# set tight layout
-fig.set_tight_layout(True)
-
 # save
 metadata.save_fig_with_metadata(
                  fig, opts.output_file, {},
diff --git a/gwin/io/__init__.py b/gwin/io/__init__.py
index c284bf6..7c15d3b 100644
--- a/gwin/io/__init__.py
+++ b/gwin/io/__init__.py
@@ -18,12 +18,21 @@
 """
 
 from __future__ import absolute_import
+from __future__ import print_function
 
 import os
+import sys
+import argparse
 import shutil
+import textwrap
+import numpy
 import logging
 import h5py as _h5py
+from pycbc.io.record import FieldArray, _numpy_function_lib
+from pycbc import transforms as _transforms
+from pycbc import waveform as _waveform
 
+from ..option_utils import (ParseLabelArg, ParseParametersArg)
 from .emcee import EmceeFile
 from .txt import InferenceTXTFile
 
@@ -32,6 +41,32 @@
 }
 
 
+def get_file_type(filename):
+    """ Returns I/O object to use for file.
+
+    Parameters
+    ----------
+    filename : str
+        Name of file.
+
+    Returns
+    -------
+    file_type : {InferenceFile, InferenceTXTFile}
+        The type of inference file object to use.
+    """
+    txt_extensions = [".txt", ".dat", ".csv"]
+    hdf_extensions = [".hdf", ".h5", ".bkup", ".checkpoint"]
+    for ext in hdf_extensions:
+        if filename.endswith(ext):
+            with _h5py.File(filename, 'r') as fp:
+                filetype = fp.attrs['filetype']
+            return filetypes[filetype]
+    for ext in txt_extensions:
+        if filename.endswith(ext):
+            return InferenceTXTFile
+    raise TypeError("Extension is not supported.")
+
+
 def loadfile(path, mode=None, filetype=None, **kwargs):
     """Loads the given file using the appropriate InferenceFile class.
 
@@ -60,13 +95,15 @@ def loadfile(path, mode=None, filetype=None, **kwargs):
     if filetype is None:
         # try to read the file to get its filetype
         try:
-            with _h5py.File(path, 'r') as fp:
-                filetype = fp.attrs['filetype']
+            fileclass = get_file_type(path)
         except IOError:
             # file doesn't exist, filetype must be provided
             raise IOError("The file appears not to exist. In this case, "
                           "filetype must be provided.")
-    return filetypes[filetype](path, mode=mode, **kwargs)
+    else:
+        fileclass = filetypes[filetype]
+    return fileclass(path, mode=mode, **kwargs)
+
 
 #
 # =============================================================================
@@ -204,3 +241,399 @@ def validate_checkpoint_files(checkpoint_file, backup_file):
         shutil.copy(backup_file, checkpoint_file)
         checkpoint_valid = True
     return checkpoint_valid
+
+
+#
+# =============================================================================
+#
+#                         Command-line Utilities
+#
+# =============================================================================
+#
+def get_common_parameters(input_files, collection=None):
+    """Gets a list of variable params that are common across all input files.
+
+    If no common parameters are found, a ``ValueError`` is raised.
+
+    Parameters
+    ----------
+    input_files : list of str
+        List of input files to load.
+    collection : str, optional
+        What group of parameters to load. Can be the name of a list of
+        parameters stored in the files' attrs (e.g., "variable_params"), or
+        "all". If "all", will load all of the parameters in the files'
+        samples group. Default is to load all.
+
+    Returns
+    -------
+    list :
+        List of the parameter names.
+    """
+    if collection is None:
+        collection = "all"
+    parameters = []
+    for fn in input_files:
+        fp = loadfile(fn, 'r')
+        if collection == 'all':
+            ps = fp[fp.samples_group].keys()
+        else:
+            ps = fp.attrs[collection]
+        parameters.append(set(ps))
+        fp.close()
+    parameters = list(set.intersection(*parameters))
+    if parameters == []:
+        raise ValueError("no common parameters found for collection {} in "
+                         "files {}".format(collection, ', '.join(input_files)))
+    return parameters
+
+
+class NoInputFileError(Exception):
+    """Raised in custom argparse Actions by arguments needing input-files when
+    no file(s) were provided."""
+    pass
+
+
+class PrintFileParams(argparse.Action):
+    """Argparse action that will load input files and print possible parameters
+    to screen. Once this is done, the program is forced to exit immediately.
+
+    The behvior is similar to --help, except that the input-file is read.
+
+    .. note::
+        The ``input_file`` attribute must be set in the parser namespace before
+        this action is called. Otherwise, a ``NoInputFileError`` is raised.
+    """
+    def __init__(self, skip_args=None, nargs=0, **kwargs):
+        if nargs != 0:
+            raise ValueError("nargs for this action must be 0")
+        super(PrintFileParams, self).__init__(nargs=nargs, **kwargs)
+        self.skip_args = skip_args
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        # get the input file(s)
+        input_files = namespace.input_file
+        if input_files is None:
+            # see if we should raise an error
+            try:
+                raise_err = not parser.no_input_file_err
+            except AttributeError:
+                raise_err = True
+            if raise_err:
+                raise NoInputFileError("must provide at least one input file")
+            else:
+                # just return to stop further processing
+                return
+        filesbytype = {}
+        fileparsers = {}
+        for fn in input_files:
+            fp = loadfile(fn, 'r')
+            try:
+                filesbytype[fp.name].append(fn)
+            except KeyError:
+                filesbytype[fp.name] = [fn]
+                # get any extra options
+                fileparsers[fp.name], _ = fp.extra_args_parser(
+                    skip_args=self.skip_args, add_help=False)
+            fp.close()
+        # now print information about the intersection of all parameters
+        parameters = get_common_parameters(input_files, collection='all')
+        print("\n"+textwrap.fill("Parameters available with this (these) "
+                                 "input file(s):"), end="\n\n")
+        print(textwrap.fill(' '.join(sorted(parameters))),
+              end="\n\n")
+        # information about the pycbc functions
+        pfuncs = sorted(FieldArray.functionlib.fget(FieldArray).keys())
+        print(textwrap.fill("Available pycbc functions (see "
+                            "http://pycbc.org/pycbc/latest/html for "
+                            "more details):"), end="\n\n")
+        print(textwrap.fill(', '.join(pfuncs)), end="\n\n")
+        # numpy funcs
+        npfuncs = sorted([name for (name, obj) in _numpy_function_lib.items()
+                          if isinstance(obj, numpy.ufunc)])
+        print(textwrap.fill("Available numpy functions:"),
+              end="\n\n")
+        print(textwrap.fill(', '.join(npfuncs)), end="\n\n")
+        # misc
+        consts = "e euler_gamma inf nan pi"
+        print(textwrap.fill("Recognized constants:"),
+              end="\n\n")
+        print(consts, end="\n\n")
+        print(textwrap.fill("Python arthimetic (+ - * / // ** %), "
+                            "binary (&, |, etc.), and comparison (>, <, >=, "
+                            "etc.) operators may also be used."), end="\n\n")
+        # print out the extra arguments that may be used
+        outstr = textwrap.fill("The following are additional command-line "
+                               "options that may be provided, along with the "
+                               "input files that understand them:")
+        print("\n"+outstr, end="\n\n")
+        for ftype, fparser in fileparsers.items():
+            fnames = ', '.join(filesbytype[ftype])
+            if fparser is None:
+                outstr = textwrap.fill(
+                    "File(s) {} use no additional options.".format(fnames))
+                print(outstr, end="\n\n")
+            else:
+                fparser.usage = fnames
+                fparser.print_help()
+        parser.exit(0)
+
+
+class ResultsArgumentParser(argparse.ArgumentParser):
+    """Wraps argument parser, and preloads arguments needed for loading samples
+    from a file.
+
+    This parser class should be used by any program that wishes to use the
+    standard arguments for loading samples. It provides functionality to parse
+    file specific options. These file-specific arguments are not included in
+    the standard ``--help`` (since they depend on what input files are given),
+    but can be seen by running ``--file-help/-H``. The ``--file-help`` will
+    also print off information about what parameters may be used given the
+    input files.
+
+    As with the standard ``ArgumentParser``, running this class's
+    ``parse_args`` will result in an error if arguments are provided that are
+    not recognized by the parser, nor by any of the file-specific arguments.
+    For example, ``parse_args`` would work on the command
+    ``--input-file results.hdf --walker 0`` if
+    ``results.hdf`` was created by a sampler that recognizes a ``--walker``
+    argument, but would raise an error if ``results.hdf`` was created by a
+    sampler that does not recognize a ``--walker`` argument. The extra
+    arguments that are recognized are determined by the sampler IO class's
+    ``extra_args_parser``.
+
+    Some arguments may be excluded from the parser using the ``skip_args``
+    optional parameter.
+
+    Parameters
+    ----------
+    skip_args : list of str, optional
+        Do not add the given arguments to the parser. Arguments should be
+        specified as the option string minus the leading '--'; e.g.,
+        ``skip_args=['thin-start']`` would cause the ``thin-start`` argument
+        to not be included. May also specify sampler-specific arguments. Note
+        that ``input-file``, ``file-help``, and ``parameters`` are always
+        added.
+    \**kwargs :
+        All other keyword arguments are passed to ``argparse.ArgumentParser``.
+    """
+    def __init__(self, skip_args=None, **kwargs):
+        super(ResultsArgumentParser, self).__init__(**kwargs)
+        # add attribute to communicate to arguments what to do when there is
+        # no input files
+        self.no_input_file_err = False
+        if skip_args is None:
+            skip_args = []
+        self.skip_args = skip_args
+        # add the results option grup
+        self.add_results_option_group()
+
+    @property
+    def actions(self):
+        """Exposes the actions this parser can do as a dictionary.
+
+        The dictionary maps the ``dest`` to actions.
+        """
+        return {act.dest: act for act in self._actions}
+
+    def _unset_required(self):
+        """Convenience function to turn off required arguments for first parse.
+        """
+        self._required_args = [act for act in self._actions if act.required]
+        for act in self._required_args:
+            act.required = False
+
+    def _reset_required(self):
+        """Convenience function to turn required arguments back on.
+        """
+        for act in self._required_args:
+            act.required = True
+
+    def parse_known_args(self, args=None, namespace=None):
+        """Parse args method to handle input-file dependent arguments."""
+        # run parse args once to make sure the name space is populated with
+        # the input files. We'll turn off raising NoInputFileErrors on this
+        # pass
+        self.no_input_file_err = True
+        self._unset_required()
+        opts, extra_opts = super(ResultsArgumentParser, self).parse_known_args(
+            args, namespace)
+        # now do it again
+        self.no_input_file_err = False
+        self._reset_required()
+        opts, extra_opts = super(ResultsArgumentParser, self).parse_known_args(
+            args, opts)
+        # populate the parameters option if it wasn't specified
+        if opts.parameters is None:
+            parameters = get_common_parameters(opts.input_file,
+                                               collection='variable_params')
+            # now call parse parameters action to populate the namespace
+            self.actions['parameters'](self, opts, parameters)
+        # parse the sampler-specific options and check for any unknowns
+        unknown = []
+        for fn in opts.input_file:
+            fp = loadfile(fn, 'r')
+            sampler_parser, _ = fp.extra_args_parser(skip_args=self.skip_args)
+            if sampler_parser is not None:
+                opts, still_unknown = sampler_parser.parse_known_args(
+                    extra_opts, namespace=opts)
+                unknown.append(set(still_unknown))
+        # the intersection of the unknowns are options not understood by
+        # any of the files
+        unknown = set.intersection(*unknown)
+        return opts, list(unknown)
+
+    def add_results_option_group(self):
+        """Adds the options used to call gwin.io.results_from_cli function
+        to the parser.
+
+        These are options releated to loading the results from a run of
+        gwin, for purposes of plotting and/or creating tables.
+
+        Any argument strings included in the ``skip_args`` attribute will not
+        be added.
+        """
+        results_reading_group = self.add_argument_group(
+            title="Arguments for loading results",
+            description="Additional, file-specific arguments may also be "
+            "provided, depending on what input-files are given. See "
+            "--file-help for details.")
+        results_reading_group.add_argument(
+            "--input-file", type=str, required=True, nargs="+",
+            action=ParseLabelArg, metavar='FILE[:LABEL]',
+            help="Path to input HDF file(s). A label may be specified for "
+                 "each input file to use for plots when multiple files are "
+                 "specified.")
+        # advanced help
+        results_reading_group.add_argument(
+            "-H", "--file-help",
+            action=PrintFileParams, skip_args=self.skip_args,
+            help="Based on the provided input-file(s), print all available "
+                 "parameters that may be retrieved and all possible functions "
+                 "on those parameters. Also print available additional "
+                 "arguments that may be passed. This option is like an "
+                 "advanced --help: if run, the program will just print the "
+                 "information to screen, then exit.")
+        results_reading_group.add_argument(
+            "--parameters", type=str, nargs="+", metavar="PARAM[:LABEL]",
+            action=ParseParametersArg,
+            help="Name of parameters to load. If none provided will load all "
+                 "of the model params in the input-file. If provided, the "
+                 "parameters can be any of the model params or posterior "
+                 "stats (loglikelihood, logprior, etc.) in the input file(s), "
+                 "derived parameters from them, or any function of them. If "
+                 "multiple files are provided, any parameter common to all "
+                 "files may be used. Syntax for functions is python; any math "
+                 "functions in the numpy libary may be used. Can optionally "
+                 "also specify a LABEL for each parameter. If no LABEL is "
+                 "provided, PARAM will used as the LABEL. If LABEL is the "
+                 "same as a parameter in pycbc.waveform.parameters, the label "
+                 "property of that parameter will be used (e.g., if LABEL "
+                 "were 'mchirp' then {} would be used). To see all possible "
+                 "parameters that may be used with the given input file(s), "
+                 "as well as all avaiable functions, run --file-help, along "
+                 "with one or more input files.".format(
+                    _waveform.parameters.mchirp.label))
+        return results_reading_group
+
+
+def results_from_cli(opts, load_samples=True):
+    """Loads an inference result file along with any labels associated with it
+    from the command line options.
+
+    Parameters
+    ----------
+    opts : ArgumentParser options
+        The options from the command line.
+    load_samples : bool, optional
+        Load the samples from the file.
+
+    Returns
+    -------
+    fp_all : (list of) BaseInferenceFile type
+        The result file as an hdf file. If more than one input file,
+        then it returns a list.
+    parameters : list of str
+        List of the parameters to use, parsed from the parameters option.
+    labels : dict
+        Dictionary of labels to associate with the parameters.
+    samples_all : (list of) FieldArray(s) or None
+        If load_samples, the samples as a FieldArray; otherwise, None.
+        If more than one input file, then it returns a list.
+    """
+
+    # lists for files and samples from all input files
+    fp_all = []
+    samples_all = []
+
+    input_files = opts.input_file
+    if isinstance(input_files, str):
+        input_files = [input_files]
+
+    # loop over all input files
+    for input_file in input_files:
+        logging.info("Reading input file %s", input_file)
+
+        # read input file
+        fp = loadfile(input_file, "r")
+
+        # load the samples
+        if load_samples:
+            logging.info("Loading samples")
+
+            # check if need extra parameters for a non-sampling parameter
+            file_parameters, ts = _transforms.get_common_cbc_transforms(
+                opts.parameters, fp.variable_params)
+
+            # read samples from file
+            samples = fp.samples_from_cli(opts, parameters=file_parameters)
+
+            logging.info("Using {} samples".format(samples.size))
+
+            # add parameters not included in file
+            samples = _transforms.apply_transforms(samples, ts)
+
+        # else do not read samples
+        else:
+            samples = None
+
+        # add results to lists from all input files
+        if len(input_files) > 1:
+            fp_all.append(fp)
+            samples_all.append(samples)
+
+        # else only one input file then do not return lists
+        else:
+            fp_all = fp
+            samples_all = samples
+
+    return fp_all, opts.parameters, opts.parameters_labels, samples_all
+
+
+def injections_from_cli(opts):
+    """Gets injection parameters from the inference file(s).
+
+    Parameters
+    ----------
+    opts : argparser
+        Argparser object that has the command-line objects to parse.
+
+    Returns
+    -------
+    FieldArray
+        Array of the injection parameters from all of the input files given
+        by ``opts.input_file``.
+    """
+    input_files = opts.input_file
+    if isinstance(input_files, str):
+        input_files = [input_files]
+    injections = None
+    # loop over all input files getting the injection files
+    for input_file in input_files:
+        fp = loadfile(input_file, 'r')
+        these_injs = fp.read_injections()
+        if injections is None:
+            injections = these_injs
+        else:
+            injections = injections.append(these_injs)
+    return injections
diff --git a/gwin/io/base_hdf.py b/gwin/io/base_hdf.py
index 8a1665c..cd66929 100644
--- a/gwin/io/base_hdf.py
+++ b/gwin/io/base_hdf.py
@@ -40,7 +40,7 @@
 from pycbc.io import FieldArray
 from pycbc.types import FrequencySeries
 from pycbc.waveform import parameters as wfparams
-
+from pycbc.inject import InjectionSet
 
 class BaseInferenceFile(h5py.File):
     """Base class for all inference hdf files.
@@ -212,6 +212,114 @@ def write_posterior(self, posterior_file, **kwargs):
         """
         pass
 
+    @abstractmethod
+    def samples_from_cli(self, opts, extra_opts=None, parameters=None,
+                         **kwargs):
+        """This should load samples using the given command-line options.
+        """
+        pass
+
+    @staticmethod
+    def extra_args_parser(parser=None, skip_args=None, **kwargs):
+        """Provides a parser that can be used to parse sampler-specific command
+        line options for loading samples.
+
+        This is optional. Inheriting classes may override this if they want to
+        implement their own options.
+
+        Parameters
+        ----------
+        parser : argparse.ArgumentParser, optional
+            Instead of creating a parser, add arguments to the given one. If
+            none provided, will create one.
+        skip_args : list, optional
+            Don't include the given options. Options should be given as the
+            option string, minus the '--'. For example,
+            ``skip_args=['iteration']`` would cause the ``--iteration``
+            argument not to be included.
+        \**kwargs :
+            All other keyword arguments are passed to the parser that is
+            created.
+
+        Returns
+        -------
+        parser : argparse.ArgumentParser or None
+            If this class adds extra arguments, an argument parser with the
+            extra arguments. Otherwise, will just return whatever was passed
+            for the ``parser`` argument (default is None).
+        actions : list of argparse.Action
+            List of the actions that were added.
+        """
+        return parser, []
+
+    @staticmethod
+    def _get_optional_args(args, opts, err_on_missing=False, **kwargs):
+        """Convenience function to retrieve arguments from an argparse
+        namespace.
+
+        Parameters
+        ----------
+        args : list of str
+            List of arguments to retreive.
+        opts : argparse.namespace
+            Namespace to retreive arguments for.
+        err_on_missing : bool, optional
+            If an argument is not found in the namespace, raise an
+            AttributeError. Otherwise, just pass. Default is False.
+        \**kwargs :
+            All other keyword arguments are added to the return dictionary.
+            Any keyword argument that is the same as an argument in ``args``
+            will override what was retrieved from ``opts``.
+
+        Returns
+        -------
+        dict :
+            Dictionary mapping arguments to values retrieved from ``opts``. If
+            keyword arguments were provided, these will also be included in the
+            dictionary.
+        """
+        parsed = {}
+        for arg in args:
+            try:
+                parsed[arg] = getattr(opts, arg)
+            except AttributeError as e:
+                if err_on_missing:
+                    raise AttributeError(e)
+                else:
+                    continue
+        parsed.update(kwargs)
+        return parsed
+
+    def samples_from_cli(self, opts, parameters=None, **kwargs):
+        """Reads samples from the given command-line options.
+
+        Parameters
+        ----------
+        opts : argparse Namespace
+            The options with the settings to use for loading samples (the sort
+            of thing returned by ``ArgumentParser().parse_args``).
+        parameters : (list of) str, optional
+            A list of the parameters to load. If none provided, will try to
+            get the parameters to load from ``opts.parameters``.
+        \**kwargs :
+            All other keyword arguments are passed to ``read_samples``. These
+            will override any options with the same name.
+
+        Returns
+        -------
+        FieldArray :
+            Array of the loaded samples.
+        """
+        if parameters is None and opts.parameters is None:
+            parameters = self.variable_args
+        elif parameters is None:
+            parameters = opts.parameters
+        # parse optional arguments
+        _, extra_actions = self.extra_args_parser()
+        extra_args = [act.dest for act in extra_actions]
+        kwargs = self._get_optional_args(extra_args, opts, **kwargs)
+        return self.read_samples(parameters, **kwargs)
+
     @property
     def static_params(self):
         """Returns a dictionary of the static_params. The keys are the argument
@@ -415,7 +523,6 @@ def write_psd(self, psds, group=None):
         if group is None:
             group = subgroup
         else:
-            print group, subgroup
             group = '/'.join([group, subgroup])
         for ifo in psds:
             self[group.format(ifo=ifo)] = psds[ifo]
@@ -437,6 +544,20 @@ def write_injections(self, injection_file):
         except IOError:
             logging.warn("Could not read %s as an HDF file", injection_file)
 
+    def read_injections(self):
+        """Gets injection parameters.
+
+        Returns
+        -------
+        FieldArray
+            Array of the injection parameters.
+        """
+        injset = InjectionSet(self.filename, hdf_group=self.injections_group)
+        injections = injset.table.view(FieldArray)
+        # close the new open filehandler to self
+        injset._injhandler.filehandler.close()
+        return injections
+
     def write_command_line(self):
         """Writes command line to attributes.
 
@@ -633,27 +754,27 @@ def copy(self, other, ignore=None, parameters=None, parameter_names=None,
                 other.attrs['thin_end'] = None
         return other
 
+    @classmethod
+    def write_kwargs_to_attrs(cls, attrs, **kwargs):
+        """Writes the given keywords to the given ``attrs``.
 
-def write_kwargs_to_hdf_attrs(attrs, **kwargs):
-    """Writes the given keywords to the given ``attrs``.
-
-    If any keyword argument points to a dict, the keyword will point to a
-    list of the dict's keys. Each key is then written to the attrs with its
-    corresponding value.
+        If any keyword argument points to a dict, the keyword will point to a
+        list of the dict's keys. Each key is then written to the attrs with its
+        corresponding value.
 
-    Parameters
-    ----------
-    attrs : an HDF attrs
-        Can be either the ``attrs`` of the hdf file, or any group in a file.
-    \**kwargs :
-        The keywords to write.
-    """
-    for arg, val in kwargs.items():
-        if val is None:
-            val = str(None)
-        if isinstance(val, dict):
-            attrs[arg] = val.keys()
-            # just call self again with the dict as kwargs
-            write_kwargs_to_hdf_attrs(attrs, **val)
-        else:
-            attrs[arg] = val
+        Parameters
+        ----------
+        attrs : an HDF attrs
+            The ``attrs`` of an hdf file or a group in an hdf file.
+        \**kwargs :
+            The keywords to write.
+        """
+        for arg, val in kwargs.items():
+            if val is None:
+                val = str(None)
+            if isinstance(val, dict):
+                attrs[arg] = val.keys()
+                # just call self again with the dict as kwargs
+                cls.write_kwargs_to_attrs(attrs, **val)
+            else:
+                attrs[arg] = val
diff --git a/gwin/io/base_mcmc.py b/gwin/io/base_mcmc.py
index f77247f..0d60935 100644
--- a/gwin/io/base_mcmc.py
+++ b/gwin/io/base_mcmc.py
@@ -29,7 +29,7 @@
 from abc import (ABCMeta, abstractmethod)
 
 import numpy
-from .base_hdf import write_kwargs_to_hdf_attrs
+import argparse
 
 
 class MCMCIO(object):
@@ -148,6 +148,77 @@ def read_raw_samples(self, fields,
             arrays[name] = arr
         return arrays
 
+    @staticmethod
+    def extra_args_parser(parser=None, skip_args=None, **kwargs):
+        """Create a parser to parse sampler-specific arguments for loading
+        samples.
+
+        Parameters
+        ----------
+        parser : argparse.ArgumentParser, optional
+            Instead of creating a parser, add arguments to the given one. If
+            none provided, will create one.
+        skip_args : list, optional
+            Don't parse the given options. Options should be given as the
+            option string, minus the '--'. For example,
+            ``skip_args=['iteration']`` would cause the ``--iteration``
+            argument not to be included.
+        \**kwargs :
+            All other keyword arguments are passed to the parser that is
+            created.
+
+        Returns
+        -------
+        parser : argparse.ArgumentParser
+            An argument parser with th extra arguments added.
+        actions : list of argparse.Action
+            A list of the actions that were added.
+        """
+        if parser is None:
+            parser = argparse.ArgumentParser(**kwargs)
+        elif kwargs:
+            raise ValueError("No other keyword arguments should be provded if "
+                             "a parser is provided.")
+        if skip_args is None:
+            skip_args = []
+        actions = []
+        if 'thin-start' not in skip_args:
+            act = parser.add_argument(
+                "--thin-start", type=int, default=None,
+                help="Sample number to start collecting samples to plot. If "
+                     "none provided, will use the input file's `thin_start` "
+                     "attribute.")
+            actions.append(act)
+        if 'thin-interval' not in skip_args:
+            act = parser.add_argument(
+                "--thin-interval", type=int, default=None,
+                help="Interval to use for thinning samples. If none provided, "
+                     "will use the input file's `thin_interval` attribute.")
+            actions.append(act)
+        if 'thin-end' not in skip_args:
+            act = parser.add_argument(
+                "--thin-end", type=int, default=None,
+                help="Sample number to stop collecting samples to plot. If "
+                     "none provided, will use the input file's `thin_end` "
+                     "attribute.")
+            actions.append(act)
+        if 'iteration' not in skip_args:
+            act = parser.add_argument(
+                "--iteration", type=int, default=None,
+                help="Only retrieve the given iteration. To load "
+                     "the last n-th sampe use -n, e.g., -1 will "
+                     "load the last iteration. This overrides "
+                     "the thin-start/interval/end options.")
+            actions.append(act)
+        if 'walkers' not in skip_args:
+            act = parser.add_argument(
+                "--walkers", type=int, nargs="+", default=None,
+                help="Only retrieve samples from the listed "
+                     "walkers. Default is to retrieve from all "
+                     "walkers.")
+            actions.append(act)
+        return parser, actions
+
     def write_resume_point(self):
         """Keeps a list of the number of iterations that were in a file when a
         run was resumed from a checkpoint."""
@@ -248,4 +319,4 @@ def write_burn_in(self, burn_in):
             except KeyError:
                 group.create_group(key)
                 attrs = group[key].attrs
-            write_kwargs_to_hdf_attrs(attrs, **burn_in.burn_in_data[tst])
+            self.write_kwargs_to_attrs(attrs, **burn_in.burn_in_data[tst])
diff --git a/gwin/models/base.py b/gwin/models/base.py
index e15dc57..a734ee5 100644
--- a/gwin/models/base.py
+++ b/gwin/models/base.py
@@ -34,7 +34,6 @@
 from pycbc.io import FieldArray
 from pycbc.workflow import ConfigParser
 
-from gwin.io.base_hdf import write_kwargs_to_hdf_attrs
 
 #
 # =============================================================================
@@ -755,4 +754,4 @@ def write_metadata(self, fp):
         fp.attrs['model'] = self.name
         fp.attrs['variable_params'] = list(self.variable_params)
         fp.attrs['sampling_params'] = list(self.sampling_params)
-        write_kwargs_to_hdf_attrs(fp.attrs, static_params=self.static_params)
+        fp.write_kwargs_to_attrs(fp.attrs, static_params=self.static_params)
diff --git a/gwin/option_utils.py b/gwin/option_utils.py
index 47ff79c..813cb37 100644
--- a/gwin/option_utils.py
+++ b/gwin/option_utils.py
@@ -19,6 +19,7 @@
 
 import logging
 import shutil
+import argparse
 
 from pycbc import (conversions, inject, transforms)
 from pycbc.distributions import (bounded, constraints)
@@ -29,10 +30,7 @@
 from pycbc.strain import from_cli_multi_ifos as strain_from_cli_multi_ifos
 from pycbc.strain import (gates_from_cli, psd_gates_from_cli,
                           apply_gates_to_td, apply_gates_to_fd)
-
-from gwin import (burn_in, models, sampler)
-from gwin.io.hdf import InferenceFile, check_integrity
-from gwin.io.txt import InferenceTXTFile
+from pycbc import waveform
 
 
 # -----------------------------------------------------------------------------
@@ -67,124 +65,6 @@ def config_parser_from_cli(opts):
     return WorkflowConfigParser(opts.config_files, overrides)
 
 
-# -----------------------------------------------------------------------------
-#
-#                    Utilities for setting up a sampler
-#
-# -----------------------------------------------------------------------------
-
-def add_sampler_option_group(parser):
-    """Adds the options needed to set up an inference sampler.
-
-    Parameters
-    ----------
-    parser : object
-        ArgumentParser instance.
-    """
-    sampler_group = parser.add_argument_group(
-        "Arguments for setting up a sampler")
-
-    # required options
-    sampler_group.add_argument(
-        "--sampler", required=True, choices=sampler.samplers.keys(),
-        help="Sampler class to use for finding posterior.")
-    sampler_group.add_argument(
-        "--niterations", type=int,
-        help="Number of iterations to perform. If 'use_sampler' is given to "
-             "burn-in-function, this will be counted after the sampler's burn "
-             "function has run. Otherwise, this is the total number of "
-             "iterations, including any burn in.")
-    sampler_group.add_argument(
-        "--n-independent-samples", type=int,
-        help="Run the sampler until the specified number of "
-             "independent samples is obtained, at minimum. Requires "
-             "checkpoint-interval. At each checkpoint the burn-in iteration "
-             "and ACL is updated. The number of independent samples is the "
-             "number of samples across all walkers starting at the "
-             "burn-in-iteration and skipping every `ACL`th iteration. "
-             "Either this or niteration should be specified (but not both).")
-    # sampler-specific options
-    sampler_group.add_argument(
-        "--nwalkers", type=int, default=None,
-        help="Number of walkers to use in sampler. Required for MCMC "
-             "samplers.")
-    sampler_group.add_argument(
-        "--ntemps", type=int, default=None,
-        help="Number of temperatures to use in sampler. Required for parallel "
-             "tempered MCMC samplers.")
-    sampler_group.add_argument(
-        "--burn-in-function", default=None, nargs='+',
-        choices=burn_in.burn_in_functions.keys(),
-        help="Use the given function to determine when chains are burned in. "
-             "If none provided, no burn in will be estimated. "
-             "If multiple functions are provided, will use the maximum "
-             "iteration from all functions.")
-    sampler_group.add_argument(
-        "--min-burn-in", type=int, default=0,
-        help="Force the burn-in to be at least the given number of "
-             "iterations.")
-    sampler_group.add_argument(
-        "--update-interval", type=int, default=None,
-        help="If using kombine, specify the number of steps to take between "
-             "proposal updates. Note: for purposes of updating, kombine "
-             "counts iterations since the last checkpoint. This interval "
-             "should therefore be less than the checkpoint interval, else "
-             "no updates will occur. To ensure that updates happen at equal "
-             "intervals, make checkpoint-interval a multiple of "
-             "update-interval.")
-    sampler_group.add_argument(
-        "--nprocesses", type=int, default=None,
-        help="Number of processes to use. If not given then use maximum.")
-    sampler_group.add_argument(
-        "--use-mpi", action='store_true', default=False,
-        help="Use MPI to parallelize the sampler")
-    sampler_group.add_argument(
-        "--logpost-function", default="logposterior",
-        help="Which attribute of the model to use for the logposterior. "
-             "The default is logposterior. For example, if using the "
-             "gaussian_noise model, you may wish to set this to logplr, since "
-             "the logposterior includes a large constant contribution from "
-             "log noise likelihood.")
-
-    return sampler_group
-
-
-def sampler_from_cli(opts, model, pool=None):
-    """Parses the given command-line options to set up a sampler.
-
-    Parameters
-    ----------
-    opts : object
-        ArgumentParser options.
-    model : model
-        The model to use with the sampler.
-
-    Returns
-    -------
-    gwin.sampler
-        A sampler initialized based on the given arguments.
-    """
-    # create a wrapper for the model
-    model = models.CallModel(model, opts.logpost_function)
-
-    # Used to help paralleize over multiple cores / MPI
-    if opts.nprocesses > 1:
-        models._global_instance = model
-        model_call = models._call_global_model
-    else:
-        model_call = None
-
-    sclass = sampler.samplers[opts.sampler]
-
-    pool = choose_pool(mpi=opts.use_mpi, processes=opts.nprocesses)
-
-    if pool is not None:
-        pool.count = opts.nprocesses
-
-    return sclass.from_cli(opts, model,
-                           pool=pool, model_call=model_call)
-
-
 # -----------------------------------------------------------------------------
 #
 #                       Utilities for loading data
@@ -299,226 +179,112 @@ def data_from_cli(opts):
 
 # -----------------------------------------------------------------------------
 #
-#                Utilities for loading and plotting results
+#                Utilities for plotting results
 #
 # -----------------------------------------------------------------------------
 
-def add_inference_results_option_group(parser, include_parameters_group=True):
-    """Adds the options used to call gwin.results_from_cli function
-    to an argument parser. These are options releated to loading the results
-    from a run of pycbc_inference, for purposes of plotting and/or creating
-    tables.
 
-    Parameters
-    ----------
-    parser : object
-        ArgumentParser instance.
-    include_parameters_group : bool
-        If true then include `--parameters-group` option.
-    """
+class ParseLabelArg(argparse.Action):
+    """Argparse action that will parse arguments that can accept labels.
 
-    results_reading_group = parser.add_argument_group(
-        "Arguments for loading inference results")
-
-    # required options
-    results_reading_group.add_argument(
-        "--input-file", type=str, required=True, nargs="+",
-        help="Path to input HDF files.")
-    results_reading_group.add_argument(
-        "--parameters", type=str, nargs="+", metavar="PARAM[:LABEL]",
-        help="Name of parameters to load. If none provided will load all of "
-             "the model params in the input-file. If provided, the "
-             "parameters can be any of the model params or posteriors in "
-             "the input file, derived parameters from them, or any function "
-             "of them. Syntax for functions is python; any math functions in "
-             "the numpy libary may be used. Can optionally also specify a "
-             "label for each parameter. If no label is provided, will try to "
-             "retrieve a label from the input-file. If no label can be found "
-             "in the input-file, will try to get a label from "
-             "pycbc.waveform.parameters. If no label can be found in either "
-             "place, will just use the parameter.")
-
-    # optionals
-    results_reading_group.add_argument(
-        "--thin-start", type=int, default=None,
-        help="Sample number to start collecting samples to plot. If none "
-             "provided, will start at the end of the burn-in.")
-    results_reading_group.add_argument(
-        "--thin-interval", type=int, default=None,
-        help="Interval to use for thinning samples. If none provided, will "
-             "use the auto-correlation length found in the file.")
-    results_reading_group.add_argument(
-        "--thin-end", type=int, default=None,
-        help="Sample number to stop collecting samples to plot. If none "
-             "provided, will stop at the last sample from the sampler.")
-    results_reading_group.add_argument(
-        "--iteration", type=int, default=None,
-        help="Only retrieve the given iteration. To load the last n-th sampe "
-             "use -n, e.g., -1 will load the last iteration. This overrides "
-             "the thin-start/interval/end options.")
-    if include_parameters_group:
-        results_reading_group.add_argument(
-            "--parameters-group", type=str,
-            default=InferenceFile.samples_group,
-            choices=[InferenceFile.samples_group, InferenceFile.stats_group],
-            help="Group in the HDF InferenceFile to look for parameters.")
-
-    return results_reading_group
-
-
-def parse_parameters_opt(parameters):
-    """Parses the --parameters opt in the results_reading_group.
+    This assumes that the values set on the command line for its assigned
+    argument are strings formatted like ``PARAM[:LABEL]``. When the arguments
+    are parsed, the ``LABEL`` bit is stripped off and added to a dictionary
+    mapping ``PARAM -> LABEL``. This dictionary is stored to the parsed
+    namespace called ``{dest}_labels``, where ``{dest}`` is the argument's
+    ``dest`` setting (by default, this is the same as the option string).
+    Likewise, the argument's ``dest`` in the parsed namespace is updated so
+    that it is just ``PARAM``.
 
-    Parameters
-    ----------
-    parameters : list of str or None
-        The parameters to parse.
-    Returns
-    -------
-    parameters : list of str
-        The parameters.
-    labels : dict
-        A dictionary mapping parameters for which labels were provide to those
-        labels.
-    """
-    if parameters is None:
-        return None, {}
-    # load the labels
-    labels = {}
-    for ii, p in enumerate(parameters):
-        if len(p.split(':')) == 2:
-            p, label = p.split(':')
-            parameters[ii] = p
-            labels[p] = label
-    return parameters, labels
-
-
-def results_from_cli(opts, load_samples=True, **kwargs):
-    """
-    Loads an inference result file along with any labels associated with it
-    from the command line options.
+    If no ``LABEL`` is provided, then ``PARAM`` will be used for ``LABEL``.
 
-    Parameters
-    ----------
-    opts : ArgumentParser options
-        The options from the command line.
-    load_samples : {True, bool}
-        Load samples from the results file using the parameters, thin_start,
-        and thin_interval specified in the options. The samples are returned
-        as a FieldArray instance.
-
-    **kwargs :
-        All other keyword arguments are passed to the InferenceFile's
-        read_samples function.
-
-    Returns
-    -------
-    fp_all : pycbc.io.InferenceFile
-        The result file as an InferenceFile. If more than one input file,
-        then it returns a list.
-    parameters_all : list
-        List of the parameters to use, parsed from the parameters option.
-        If more than one input file, then it returns a list.
-    labels_all : list
-        List of labels to associate with the parameters. If more than one
-        input file, then it returns a list.
-    samples_all : {None, FieldArray}
-        If load_samples, the samples as a FieldArray; otherwise, None.
-        If more than one input file, then it returns a list.
+    This action can work on arguments that have ``nargs != 0`` and ``type`` set
+    to ``str``.
     """
-
-    # lists for files and samples from all input files
-    fp_all = []
-    parameters_all = []
-    labels_all = []
-    samples_all = []
-
-    input_files = opts.input_file
-    if isinstance(input_files, str):
-        input_files = [input_files]
-
-    # loop over all input files
-    for input_file in input_files:
-        logging.info("Reading input file %s", input_file)
-
-        # read input file
-        fp = InferenceFile(input_file, "r")
-
-        # get parameters and a dict of labels for each parameter
-        parameters = (fp.variable_params if opts.parameters is None
-                      else opts.parameters)
-        parameters, ldict = parse_parameters_opt(parameters)
-
-        # convert labels dict to list
-        labels = []
-        for p in parameters:
-            try:
-                label = ldict[p]
-            except KeyError:
-                label = fp.read_label(p)
-            labels.append(label)
-
-        # load the samples
-        if load_samples:
-            logging.info("Loading samples")
-
-            # check if need extra parameters for a non-sampling parameter
-            file_parameters, ts = transforms.get_common_cbc_transforms(
-                parameters, fp.variable_params)
-
-            # read samples from file
-            samples = fp.read_samples(
-                file_parameters, thin_start=opts.thin_start,
-                thin_interval=opts.thin_interval, thin_end=opts.thin_end,
-                iteration=opts.iteration,
-                samples_group=opts.parameters_group, **kwargs)
-
-            # add parameters not included in file
-            samples = transforms.apply_transforms(samples, ts)
-
-        # else do not read samples
-        else:
-            samples = None
-
-        # add results to lists from all input files
-        if len(input_files) > 1:
-            fp_all.append(fp)
-            parameters_all.append(parameters)
-            labels_all.append(labels)
-            samples_all.append(samples)
-
-        # else only one input file then do not return lists
-        else:
-            fp_all = fp
-            parameters_all = parameters
-            labels_all = labels
-            samples_all = samples
-
-    return fp_all, parameters_all, labels_all, samples_all
-
-
-def get_file_type(filename):
-    """ Returns I/O object to use for file.
-
-    Parameters
-    ----------
-    filename : str
-        Name of file.
-
-    Returns
-    -------
-    file_type : {InferenceFile, InferenceTXTFile}
-        The type of inference file object to use.
+    def __init__(self, type=str, nargs=None, **kwargs):
+        # check that type is string
+        if type != str:
+            raise ValueError("the type for this action must be a string")
+        if nargs == 0:
+            raise ValueError("nargs must not be 0 for this action")
+        super(ParseLabelArg, self).__init__(type=type, nargs=nargs,
+                                            **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        singlearg = isinstance(values, (str, unicode))
+        if singlearg:
+            values = [values]
+        params = []
+        labels = {}
+        for param in values:
+            psplit = param.split(':')
+            if len(psplit) == 2:
+                param, label = psplit
+            else:
+                label = param
+            labels[param] = label
+            params.append(param)
+        # update the namespace
+        if singlearg:
+            params = params[0]
+        setattr(namespace, self.dest, params)
+        setattr(namespace, '{}_labels'.format(self.dest), labels)
+
+
+class ParseParametersArg(ParseLabelArg):
+    """Argparse action that will parse parameters and labels from an opton.
+
+    Does the same as ``ParseLabelArg``, with the additional functionality that
+    if ``LABEL`` is a known parameter in ``pycbc.waveform.parameters``, then
+    the label attribute there will be used in the labels dictionary.
+    Otherwise, ``LABEL`` will be used.
+
+    Examples
+    --------
+    Create a parser and add two arguments that use this action (note that the
+    first argument accepts multiple inputs while the second only accepts a
+    single input):
+
+    >>> import argparse
+    >>> parser = argparse.ArgumentParser()
+    >>> parser.add_argument('--parameters', type=str, nargs="+",
+                            action=ParseParametersArg)
+    >>> parser.add_argument('--z-arg', type=str, action=ParseParametersArg)
+
+    Parse a command line that uses these options:
+
+    >>> import shlex
+    >>> cli = "--parameters 'mass1+mass2:mtotal' ra ni --z-arg foo:bar"
+    >>> opts = parser.parse_args(shlex.split(cli))
+    >>> opts.parameters
+    ['mass1+mass2', 'ra', 'ni']
+    >>> opts.parameters_labels
+    {'mass1+mass2': '$M~(\\mathrm{M}_\\odot)$', 'ni': 'ni', 'ra': '$\\alpha$'}
+    >>> opts.z_arg
+    'foo'
+    >>> opts.z_arg_labels
+    {'foo': 'bar'}
+
+    In the above, the first argument to ``--parameters`` was ``mtotal``. Since
+    this is a recognized parameter in ``pycbc.waveform.parameters``, the label
+    dictionary contains the latex string associated with the ``mtotal``
+    parameter. A label was not provided for the second argument, and so ``ra``
+    was used. Since ``ra`` is also a recognized parameter, its associated latex
+    string was used in the labels dictionary. Since ``ni`` and ``bar`` (the
+    label for ``z-arg``) are not recognized parameters, they were just used
+    as-is in the labels dictionaries.
     """
-    txt_extensions = [".txt", ".dat", ".csv"]
-    hdf_extensions = [".hdf", ".h5"]
-    for ext in hdf_extensions:
-        if filename.endswith(ext):
-            return InferenceFile
-    for ext in txt_extensions:
-        if filename.endswith(ext):
-            return InferenceTXTFile
-    raise TypeError("Extension is not supported.")
+    def __call__(self, parser, namespace, values, option_string=None):
+        super(ParseParametersArg, self).__call__(parser, namespace, values,
+                                                 option_string=option_string)
+        # try to replace the labels with a label from waveform.parameters
+        labels = getattr(namespace, '{}_labels'.format(self.dest))
+        for param, label in labels.items():
+            try:
+                label = getattr(waveform.parameters, label).label
+                labels[param] = label
+            except AttributeError:
+                pass
 
 
 def add_plot_posterior_option_group(parser):
@@ -585,9 +351,6 @@ def add_plot_posterior_option_group(parser):
                              "injection in the file to work. Any values "
                              "specified by expected-parameters will override "
                              "the values obtained for the injection.")
-    # FIXME: the following should be made an attribute of the results file
-    pgroup.add_argument("--injection-hdf-group", default="H1/injections",
-                        help="HDF group that contains injection values.")
     return pgroup
 
 
@@ -626,47 +389,6 @@ def plot_ranges_from_cli(opts):
     return mins, maxs
 
 
-def injections_from_cli(opts):
-    """Gets injection parameters from the inference file(s).
-
-    Parameters
-    ----------
-    opts : argparser
-        Argparser object that has the command-line objects to parse.
-
-    Returns
-    -------
-    FieldArray
-        Array of the injection parameters from all of the input files given
-        by ``opts.input_file``.
-    """
-    input_files = opts.input_file
-    if isinstance(input_files, str):
-        input_files = [input_files]
-    parameters, _ = parse_parameters_opt(opts.parameters)
-    if parameters is None:
-        with InferenceFile(input_files[0], 'r') as fp:
-            parameters = fp.variable_params
-    injections = None
-    # loop over all input files getting the injection files
-    for input_file in input_files:
-        # read injections from HDF input file as FieldArray
-        these_injs = inject.InjectionSet(
-            input_file,
-            hdf_group=opts.injection_hdf_group,
-        ).table.view(FieldArray)
-        if injections is None:
-            injections = these_injs
-        else:
-            injections = injections.append(these_injs)
-    # check if need extra parameters than parameters stored in injection file
-    _, ts = transforms.get_common_cbc_transforms(parameters,
-                                                 injections.fieldnames)
-    # add parameters not included in injection file
-    injections = transforms.apply_transforms(injections, ts)
-    return injections
-
-
 def expected_parameters_from_cli(opts):
     """Parses the --expected-parameters arguments from the `plot_posterior`
     option group.
@@ -705,7 +427,7 @@ def add_scatter_option_group(parser):
                                               "scatter plot.")
 
     scatter_group.add_argument(
-        '--z-arg', type=str, default=None,
+        '--z-arg', type=str, default=None, action=ParseParametersArg,
         help='What to color the scatter points by. Syntax is the same as the '
              'parameters option.')
     scatter_group.add_argument(
diff --git a/gwin/results/scatter_histograms.py b/gwin/results/scatter_histograms.py
index 740b85b..36d34b4 100644
--- a/gwin/results/scatter_histograms.py
+++ b/gwin/results/scatter_histograms.py
@@ -341,7 +341,7 @@ def create_marginalized_hist(ax, values, label, percentiles=None,
     else:
         orientation = 'vertical'
     ax.hist(values, bins=50, histtype=htype, orientation=orientation,
-            facecolor=fillcolor, edgecolor=color, lw=2, normed=True)
+            facecolor=fillcolor, edgecolor=color, lw=2, density=True)
     if percentiles is None:
         percentiles = [5., 50., 95.]
     values = numpy.percentile(values, percentiles)
@@ -496,8 +496,9 @@ def create_multidim_plot(parameters, samples, labels=None,
         Names of the variables to be plotted.
     samples : FieldArray
         A field array of the samples to plot.
-    labels: {None, list}, optional
-        A list of names for the parameters.
+    labels: dict, optional
+        A dictionary mapping parameters to labels. If none provided, will just
+        use the parameter strings as the labels.
     mins : {None, dict}, optional
         Minimum value for the axis of each variable in `parameters`.
         If None, it will use the minimum of the corresponding variable in
@@ -563,10 +564,7 @@ def create_multidim_plot(parameters, samples, labels=None,
         `{('param1', 'param2'): (pyplot.axes, row index, column index)}`
     """
     if labels is None:
-        labels = [p for p in parameters]
-    # turn labels into a dict for easier access
-    labels = dict(zip(parameters, labels))
-
+        labels = {p: p for p in parameters}
     # set up the figure with a grid of axes
     # if only plotting 2 parameters, make the marginal plots smaller
     nparams = len(parameters)

From b29dd389de7c348801a8007f754aab20013ea5bb Mon Sep 17 00:00:00 2001
From: Collin Capano <cdcapano@gmail.com>
Date: Tue, 25 Sep 2018 12:28:55 +0200
Subject: [PATCH 3/3] Update emcee pt (#73)

* fix a bug in base_mcmc hdf and add docs

* fix docs in emcee io

* add global call method for logprior

* add base_multitemper sampler methods and io

* move some config loading to base_mcmc

* remove unnecessary import

* add support for multi tempered samplers to burn_in

* update emcee_pt

* add imports to module __init__s

* remove note from executable

* add emcee_pt to io

* fix import errors

* make sure stats are written out with the correct dtype

* fix bugs

* fix more bugs

* create an action for parsing temps arg

* make sure fields is a list

* fix pep8 issues
---
 bin/gwin                         |   9 -
 gwin/burn_in.py                  |  52 +-
 gwin/io/__init__.py              |   2 +
 gwin/io/base_hdf.py              |   8 +-
 gwin/io/base_mcmc.py             |  42 +-
 gwin/io/base_multitemper.py      | 260 ++++++++++
 gwin/io/emcee.py                 |   3 +-
 gwin/io/emcee_pt.py              |  97 ++++
 gwin/models/__init__.py          |   9 +
 gwin/sampler/__init__.py         |   4 +-
 gwin/sampler/base_mcmc.py        | 152 ++++--
 gwin/sampler/base_multitemper.py | 192 ++++++++
 gwin/sampler/emcee.py            |  39 +-
 gwin/sampler/emcee_pt.py         | 822 +++++++------------------------
 14 files changed, 963 insertions(+), 728 deletions(-)
 create mode 100644 gwin/io/base_multitemper.py
 create mode 100644 gwin/io/emcee_pt.py
 create mode 100644 gwin/sampler/base_multitemper.py

diff --git a/bin/gwin b/bin/gwin
index cacded7..146a44e 100644
--- a/bin/gwin
+++ b/bin/gwin
@@ -195,15 +195,6 @@ with ctx:
     # Finalize the output 
     sampler.finalize()
 
-    # FIXME: move to emcee_pt's finalize method
-    #with InferenceFile(checkpoint_file, 'a') as fp:
-    #    try:
-    #        lnz, dlnz = sampler.calculate_logevidence(fp)
-    #        logging.info("Saving evidence")
-    #        sampler.write_logevidence(fp, lnz, dlnz)
-    #    except NotImplementedError:
-    #        pass
-
 # rename checkpoint to output and delete backup
 logging.info("Moving checkpoint to output")
 os.rename(sampler.checkpoint_file, opts.output_file)
diff --git a/gwin/burn_in.py b/gwin/burn_in.py
index d87bf69..c7eb348 100644
--- a/gwin/burn_in.py
+++ b/gwin/burn_in.py
@@ -215,6 +215,16 @@ def _getlogposts(self, filename):
             logposts = samples['loglikelihood'] + samples['logprior']
         return logposts
 
+    def _getacls(self, filename, start_index):
+        """Convenience function for calculating acls for the given filename.
+
+        Since we calculate the acls, this will also store it to the sampler.
+        """
+        acls = self.sampler.compute_acl(filename, start_index=start_index)
+        # since we calculated it, save the acls to the sampler
+        self.sampler.acls = acls
+        return acls
+
     def halfchain(self, filename):
         """Just uses half the chain as the burn-in iteration.
         """
@@ -279,7 +289,7 @@ def nacl(self, filename):
         """
         niters = self._getniters(filename)
         kstart = int(niters / 2.)
-        acls = self.sampler.compute_acl(filename, start_index=kstart)
+        acls = self._getacls(filename, start_index=kstart)
         is_burned_in = {param: (self._nacls * acl) < kstart
                         for (param, acl) in acls.items()}
         data = self.burn_in_data['nacl']
@@ -291,8 +301,6 @@ def nacl(self, filename):
             data['burn_in_iteration'] = NOT_BURNED_IN_ITER
         # additional information
         data['status_per_parameter'] = is_burned_in
-        # since we calculated it, save the acls to the sampler
-        self.sampler.acls = acls
 
     def ks_test(self, filename):
         """Applies ks burn-in test."""
@@ -371,3 +379,41 @@ def from_config(cls, cp, sampler):
             kwargs['min_iterations'] = int(
                 cp.get_opt_tag(section, 'min-iterations', tag))
         return cls(sampler, burn_in_test, **kwargs)
+
+
+class MultiTemperedMCMCBurnInTests(MCMCBurnInTests):
+    """Adds support for multiple temperatures to the MCMCBurnInTests."""
+
+    def _getacls(self, filename, start_index):
+        """Convenience function for calculating acls for the given filename.
+
+        This function is used by the ``n_acl`` burn-in test. That function
+        expects the returned ``acls`` dict to just report a single ACL for
+        each parameter. Since multi-tempered samplers return an array of ACLs
+        for each parameter instead, this takes the max over the array before
+        returning.
+
+        Since we calculate the acls, this will also store it to the sampler.
+        """
+        acls = super(MultiTemperedMCMCBurnInTests, self)._getacls(
+            filename, start_index)
+        # return the max for each parameter
+        return {param: vals.max() for (param, vals) in acls.items()}
+
+    def _getlogposts(self, filename):
+        """Convenience function for retrieving log posteriors.
+
+        This just gets the coldest temperature chain, and returns arrays with
+        shape nwalkers x niterations, so the parent class can run the same
+        ``posterior_step`` function.
+        """
+        with self.sampler.io(filename, 'r') as fp:
+            samples = fp.read_raw_samples(
+                ['loglikelihood', 'logprior'], thin_start=0, thin_interval=1,
+                temps=0, flatten=False)
+            # reshape to drop the first dimension
+            for (stat, arr) in samples.items():
+                _, nwalkers, niterations = arr.shape
+                samples[stat] = arr.reshape((nwalkers, niterations))
+            logposts = samples['loglikelihood'] + samples['logprior']
+        return logposts
diff --git a/gwin/io/__init__.py b/gwin/io/__init__.py
index 7c15d3b..23cb79c 100644
--- a/gwin/io/__init__.py
+++ b/gwin/io/__init__.py
@@ -34,10 +34,12 @@
 
 from ..option_utils import (ParseLabelArg, ParseParametersArg)
 from .emcee import EmceeFile
+from .emcee_pt import EmceePTFile
 from .txt import InferenceTXTFile
 
 filetypes = {
     EmceeFile.name: EmceeFile,
+    EmceePTFile.name: EmceePTFile
 }
 
 
diff --git a/gwin/io/base_hdf.py b/gwin/io/base_hdf.py
index cd66929..cc8df50 100644
--- a/gwin/io/base_hdf.py
+++ b/gwin/io/base_hdf.py
@@ -609,11 +609,17 @@ def get_slice(self, thin_start=None, thin_interval=None, thin_end=None):
             The slice needed.
         """
         if thin_start is None:
-            thin_start = self.thin_start
+            thin_start = int(self.thin_start)
+        else:
+            thin_start = int(thin_start)
         if thin_interval is None:
             thin_interval = self.thin_interval
+        else:
+            thin_interval = int(numpy.ceil(thin_interval))
         if thin_end is None:
             thin_end = self.thin_end
+        else:
+            thin_end = int(thin_end)
         return slice(thin_start, thin_end, thin_interval)
 
     def copy_metadata(self, other):
diff --git a/gwin/io/base_mcmc.py b/gwin/io/base_mcmc.py
index 0d60935..c6f7f73 100644
--- a/gwin/io/base_mcmc.py
+++ b/gwin/io/base_mcmc.py
@@ -47,12 +47,9 @@ def write_samples(self, samples, parameters=None,
                       start_iteration=None, max_iterations=None):
         """Writes samples to the given file.
 
-        Results are written to:
-
-            ``fp[samples_group/{vararg}]``,
-
-        where ``{vararg}`` is the name of a model params. The samples are
-        written as an ``nwalkers x niterations`` array.
+        Results are written to ``samples_group/{vararg}``, where ``{vararg}``
+        is the name of a model params. The samples are written as an
+        ``nwalkers x niterations`` array.
 
         Parameters
         -----------
@@ -117,6 +114,23 @@ def read_raw_samples(self, fields,
         fields : list
             The list of field names to retrieve. Must be names of datasets in
             the ``samples_group``.
+        thin_start : int, optional
+            Start reading from the given iteration. Default is to start from
+            the first iteration.
+        thin_interval : int, optional
+            Only read every ``thin_interval``th sample. Default is 1.
+        thin_end : int, optional
+            Stop reading at the given iteration. Default is to end at the last
+            iteration.
+        iteration : int, optional
+            Only read the given iteration. If this provided, it overrides
+            the ``thin_(start|interval|end)`` options.
+        walkers : int, optional
+            Only read from the given walkers. Default is to read all.
+        flatten : bool, optional
+            Flatten the samples to 1D arrays before returning. Otherwise, the
+            returned arrays will have shape (requested walkers x
+            requested iteration(s)). Default is True.
 
         Returns
         -------
@@ -127,13 +141,13 @@ def read_raw_samples(self, fields,
             fields = [fields]
         # walkers to load
         if walkers is not None:
-            widx = numpy.zeros(fp.nwalkers, dtype=bool)
+            widx = numpy.zeros(self.nwalkers, dtype=bool)
             widx[walkers] = True
         else:
             widx = slice(0, None)
         # get the slice to use
         if iteration is not None:
-            get_index = iteration
+            get_index = int(iteration)
         else:
             get_index = self.get_slice(thin_start=thin_start,
                                        thin_end=thin_end,
@@ -242,6 +256,11 @@ def niterations(self):
         """Returns the number of iterations the sampler was run for."""
         return self[self.sampler_group].attrs['niterations']
 
+    @property
+    def nwalkers(self):
+        """Returns the number of walkers used by the sampler."""
+        return self[self.sampler_group].attrs['nwalkers']
+
     def write_sampler_metadata(self, sampler):
         """Writes the sampler's metadata."""
         self.attrs['sampler'] = sampler.name
@@ -285,16 +304,11 @@ def write_acls(self, acls):
         self[self.sampler_group].attrs['acl'] = acl
         # set the default thin interval to be the acl (if it is finite)
         if numpy.isfinite(acl):
-            self.attrs['thin_interval'] = acl
+            self.attrs['thin_interval'] = int(numpy.ceil(acl))
 
     def read_acls(self):
         """Reads the acls of all the parameters.
 
-        Parameters
-        ----------
-        fp : InferenceFile
-            An open file handler to read the acls from.
-
         Returns
         -------
         dict
diff --git a/gwin/io/base_multitemper.py b/gwin/io/base_multitemper.py
new file mode 100644
index 0000000..e389809
--- /dev/null
+++ b/gwin/io/base_multitemper.py
@@ -0,0 +1,260 @@
+# Copyright (C) 2018 Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# self.option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+"""Provides I/O support for multi-tempered sampler.
+"""
+
+from __future__ import absolute_import
+
+import argparse
+
+from .base_mcmc import MCMCIO
+
+
+class ParseTempsArg(argparse.Action):
+    """Argparse action that will parse temps argument.
+
+    If the provided argument is 'all', sets 'all' in the namespace dest. If a
+    a sequence of numbers are provided, converts those numbers to ints before
+    saving to the namespace.
+    """
+    def __init__(self, type=str, **kwargs):
+        # check that type is string
+        if type != str:
+            raise ValueError("the type for this action must be a string")
+        super(ParseTempsArg, self).__init__(type=type, **kwargs)
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        singlearg = isinstance(values, (str, unicode))
+        if singlearg:
+            values = [values]
+        if values[0] == 'all':
+            # check that only a single value was provided
+            if len(values) > 1:
+                raise ValueError("if provide 'all', should not specify any "
+                                 "other temps")
+            temps = 'all'
+        else:
+            temps = []
+            for val in values:
+                try:
+                    val = int(val)
+                except ValueError:
+                    pass
+                temps.append(val)
+            if singlearg:
+                temps = temps[0]
+        setattr(namespace, self.dest, temps)
+
+
+class MultiTemperedMCMCIO(MCMCIO):
+    """Abstract base class for multi-tempered MCMC sampler IO.
+    """
+
+    @property
+    def ntemps(self):
+        """Returns the number of temperatures used by the sampler."""
+        return self[self.sampler_group].attrs['ntemps']
+
+    def write_sampler_metadata(self, sampler):
+        """Adds writing ntemps to MCMCIO.
+        """
+        super(MultiTemperedMCMCIO, self).write_sampler_metadata(sampler)
+        self[self.sampler_group].attrs["ntemps"] = sampler.ntemps
+
+    def write_samples(self, samples, parameters=None,
+                      start_iteration=None, max_iterations=None):
+        """Writes samples to the given file.
+
+        Results are written to ``samples_group/{vararg}``, where ``{vararg}``
+        is the name of a model params. The samples are written as an
+        ``ntemps x nwalkers x niterations`` array.
+
+        Parameters
+        -----------
+        samples : dict
+            The samples to write. Each array in the dictionary should have
+            shape nwalkers x niterations.
+        parameters : list, optional
+            Only write the specified parameters to the file. If None, will
+            write all of the keys in the ``samples`` dict.
+        start_iteration : int, optional
+            Write results to the file's datasets starting at the given
+            iteration. Default is to append after the last iteration in the
+            file.
+        max_iterations : int, optional
+            Set the maximum size that the arrays in the hdf file may be resized
+            to. Only applies if the samples have not previously been written
+            to file. The default (None) is to use the maximum size allowed by
+            h5py.
+        """
+        ntemps, nwalkers, niterations = samples.values()[0].shape
+        assert all(p.shape == (ntemps, nwalkers, niterations)
+                   for p in samples.values()), (
+               "all samples must have the same shape")
+        if max_iterations is not None and max_iterations < niterations:
+            raise IndexError("The provided max size is less than the "
+                             "number of iterations")
+        group = self.samples_group + '/{name}'
+        if parameters is None:
+            parameters = samples.keys()
+        # loop over number of dimensions
+        for param in parameters:
+            dataset_name = group.format(name=param)
+            istart = start_iteration
+            try:
+                fp_niterations = self[dataset_name].shape[-1]
+                if istart is None:
+                    istart = fp_niterations
+                istop = istart + niterations
+                if istop > fp_niterations:
+                    # resize the dataset
+                    self[dataset_name].resize(istop, axis=2)
+            except KeyError:
+                # dataset doesn't exist yet
+                if istart is not None and istart != 0:
+                    raise ValueError("non-zero start_iteration provided, "
+                                     "but dataset doesn't exist yet")
+                istart = 0
+                istop = istart + niterations
+                self.create_dataset(dataset_name, (ntemps, nwalkers, istop),
+                                    maxshape=(ntemps, nwalkers,
+                                              max_iterations),
+                                    dtype=samples[param].dtype,
+                                    fletcher32=True)
+            self[dataset_name][:, :, istart:istop] = samples[param]
+
+    def read_raw_samples(self, fields,
+                         thin_start=None, thin_interval=None, thin_end=None,
+                         iteration=None, temps=None, walkers=None,
+                         flatten=True):
+        """Base function for reading samples.
+
+        Parameters
+        -----------
+        fields : list
+            The list of field names to retrieve. Must be names of datasets in
+            the ``samples_group``.
+        thin_start : int, optional
+            Start reading from the given iteration. Default is to start from
+            the first iteration.
+        thin_interval : int, optional
+            Only read every ``thin_interval``th sample. Default is 1.
+        thin_end : int, optional
+            Stop reading at the given iteration. Default is to end at the last
+            iteration.
+        iteration : int, optional
+            Only read the given iteration. If this provided, it overrides
+            the ``thin_(start|interval|end)`` options.
+        temps : 'all' or (list of) int, optional
+            The temperature index (or list of indices) to retrieve. If None,
+            only samples from the coldest (= 0) temperature chain will be
+            retrieved. To retrieve all temperates pass 'all', or a list of
+            all of the temperatures. Default is to only load the coldest
+            temperature.
+        walkers : (list of) int, optional
+            Only read from the given walkers. Default is to read all.
+        flatten : bool, optional
+            Flatten the samples to 1D arrays before returning. Otherwise, the
+            returned arrays will have shape (requested temps x
+            requested walkers x requested iteration(s)). Default is True.
+
+        Returns
+        -------
+        array_class
+            An instance of the given array class populated with values
+            retrieved from the fields.
+        """
+        if isinstance(fields, (str, unicode)):
+            fields = [fields]
+        # walkers to load
+        if walkers is not None:
+            widx = numpy.zeros(self.nwalkers, dtype=bool)
+            widx[walkers] = True
+            nwalkers = widx.sum()
+        else:
+            widx = slice(None, None)
+            nwalkers = self.nwalkers
+        # temperatures to load
+        selecttemps = False
+        if temps is None:
+            tidx = 0
+            ntemps = 1
+        elif isinstance(temps, int):
+            tidx = temps
+            ntemps = 1
+        else:
+            # temps is either 'all' or a list of temperatures;
+            # in either case, we'll get all of the temperatures from the file;
+            # if not 'all', then we'll pull out the ones we want
+            tidx = slice(None, None)
+            selecttemps = temps != 'all'
+            if selecttemps:
+                ntemps = len(temps)
+            else:
+                ntemps = self.ntemps
+        # get the slice to use
+        if iteration is not None:
+            get_index = int(iteration)
+            niterations = 1
+        else:
+            get_index = self.get_slice(thin_start=thin_start,
+                                       thin_end=thin_end,
+                                       thin_interval=thin_interval)
+            # we'll just get the number of iterations from the returned shape
+            niterations = None
+        # load
+        group = self.samples_group + '/{name}'
+        arrays = {}
+        for name in fields:
+            arr = self[group.format(name=name)][tidx, widx, get_index]
+            if niterations is None:
+                niterations = arr.shape[-1]
+            # pull out the temperatures we need
+            if selecttemps:
+                arr = arr[temps, ...]
+            if flatten:
+                arr = arr.flatten()
+            else:
+                # ensure that the returned array is 3D
+                arr = arr.reshape((ntemps, nwalkers, niterations))
+            arrays[name] = arr
+        return arrays
+
+    @staticmethod
+    def extra_args_parser(parser=None, skip_args=None, **kwargs):
+        """Adds --temps to MCMCIO parser.
+        """
+        if skip_args is None:
+            skip_args = []
+        parser, actions = MCMCIO.extra_args_parser(
+            parser=parser, skip_args=skip_args, **kwargs)
+        if 'temps' not in skip_args:
+            act = parser.add_argument(
+                "--temps", nargs="+", default=None, action=ParseTempsArg,
+                help="Get the given temperatures. May provide either a "
+                     "sequence of integers specifying the temperatures to "
+                     "plot, or 'all' for all temperatures. Default is to only "
+                     "plot the coldest (= 0) temperature chain.")
+            actions.append(act)
+        return parser, actions
diff --git a/gwin/io/emcee.py b/gwin/io/emcee.py
index 8331226..f792e72 100644
--- a/gwin/io/emcee.py
+++ b/gwin/io/emcee.py
@@ -38,7 +38,7 @@ def read_acceptance_fraction(self, walkers=None):
 
         Parameters
         -----------
-        walkers : {None, (list of) int}
+        walkers : (list of) int, optional
             The walker index (or a list of indices) to retrieve. If None,
             samples from all walkers will be obtained.
 
@@ -72,4 +72,5 @@ def write_acceptance_fraction(self, acceptance_fraction):
             self[group] = acceptance_fraction
 
     def write_posterior(self, filename, **kwargs):
+        """Write me."""
         pass
diff --git a/gwin/io/emcee_pt.py b/gwin/io/emcee_pt.py
new file mode 100644
index 0000000..1948360
--- /dev/null
+++ b/gwin/io/emcee_pt.py
@@ -0,0 +1,97 @@
+# Copyright (C) 2018 Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# self.option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+"""Provides I/O support for emcee_pt.
+"""
+
+from __future__ import absolute_import
+
+from .base_hdf import BaseInferenceFile
+from .base_multitemper import MultiTemperedMCMCIO
+
+
+class EmceePTFile(MultiTemperedMCMCIO, BaseInferenceFile):
+    """Class to handle file IO for the ``emcee`` sampler."""
+
+    name = 'emcee_pt_file'
+
+    @property
+    def betas(self):
+        """The betas that were used."""
+        return self[self.sampler_group].attrs["betas"]
+
+    def write_sampler_metadata(self, sampler):
+        """Adds writing betas to MultiTemperedMCMCIO.
+        """
+        super(EmceePTFile, self).write_sampler_metadata(sampler)
+        self[self.sampler_group].attrs["betas"] = sampler.betas
+
+    def read_acceptance_fraction(self, temps=None, walkers=None):
+        """Reads the acceptance fraction.
+
+        Parameters
+        -----------
+        temps : (list of) int, optional
+            The temperature index (or a list of indices) to retrieve. If None,
+            acfs from all temperatures and all walkers will be retrieved.
+        walkers : (list of) int, optional
+            The walker index (or a list of indices) to retrieve. If None,
+            samples from all walkers will be obtained.
+
+        Returns
+        -------
+        array
+            Array of acceptance fractions with shape (requested temps,
+            requested walkers).
+        """
+        group = self.sampler_group + '/acceptance_fraction'
+        if walkers is None:
+            wmask = numpy.ones(self.nwalkers, dtype=bool)
+        else:
+            wmask = numpy.zeros(self.nwalkers, dtype=bool)
+            wmask[walkers] = True
+        if temps is None:
+            tmask = numpy.ones(self.ntemps, dtype=bool)
+        else:
+            tmask = numpy.zeros(self.ntemps, dtype=bool)
+            tmask[temps] = True
+        return self[group][:][numpy.ix_(tmask, wmask)]
+
+    def write_acceptance_fraction(self, acceptance_fraction):
+        """Write acceptance_fraction data to file.
+
+        Results are written to ``[sampler_group]/acceptance_fraction``; the
+        resulting dataset has shape (ntemps, nwalkers).
+
+        Parameters
+        -----------
+        acceptance_fraction : numpy.ndarray
+            Array of acceptance fractions to write. Must have shape
+            ntemps x nwalkers.
+        """
+        # check
+        assert acceptance_fraction.shape == (self.ntemps, self.nwalkers), (
+            "acceptance fraction must have shape ntemps x nwalker")
+        group = self.sampler_group + '/acceptance_fraction'
+        try:
+            self[group][:] = acceptance_fraction
+        except KeyError:
+            # dataset doesn't exist yet, create it
+            self[group] = acceptance_fraction
+
+    def write_posterior(self, filename, **kwargs):
+        """Write me."""
+        pass
diff --git a/gwin/models/__init__.py b/gwin/models/__init__.py
index 48af52c..06f2eae 100644
--- a/gwin/models/__init__.py
+++ b/gwin/models/__init__.py
@@ -34,6 +34,15 @@ def _call_global_model(*args, **kwds):
     return _global_instance(*args, **kwds)  # pylint:disable=not-callable
 
 
+def _call_global_model_logprior(*args, **kwds):
+    """Private function for a calling global's logprior.
+
+    This is needed for samplers that use a separate function for the logprior,
+    like ``emcee_pt``.
+    """
+    return _global_instance(*args, callstat='logprior', **kwds)
+
+
 class CallModel(object):
     """Wrapper class for calling models from a sampler.
 
diff --git a/gwin/sampler/__init__.py b/gwin/sampler/__init__.py
index aa7cf3a..d52ce48 100644
--- a/gwin/sampler/__init__.py
+++ b/gwin/sampler/__init__.py
@@ -22,14 +22,14 @@
 from .base import (initial_dist_from_config, create_new_output_file)
 # from .kombine import KombineSampler
 from .emcee import EmceeEnsembleSampler
-# from .emcee_pt import EmceePTSampler
+from .emcee_pt import EmceePTSampler
 # from .mcmc import MCMCSampler
 
 # list of available samplers
 samplers = {cls.name: cls for cls in (
     # KombineSampler,
     EmceeEnsembleSampler,
-    # EmceePTSampler,
+    EmceePTSampler,
     # MCMCSampler,
 )}
 
diff --git a/gwin/sampler/base_mcmc.py b/gwin/sampler/base_mcmc.py
index d5afbc0..cba9a8b 100644
--- a/gwin/sampler/base_mcmc.py
+++ b/gwin/sampler/base_mcmc.py
@@ -28,6 +28,7 @@
 from abc import (ABCMeta, abstractmethod, abstractproperty)
 import logging
 import numpy
+from pycbc.workflow import ConfigParser
 from pycbc.filter import autocorrelation
 
 from ..io import validate_checkpoint_files
@@ -76,36 +77,69 @@ def raw_samples_to_dict(sampler, raw_samples):
     return samples
 
 
-def raw_stats_to_dict(sampler, raw_stats):
-    """Converts an ND array of model stats to a dict.
+def blob_data_to_dict(stat_names, blobs):
+    """Converts list of "blobs" to a dictionary of model stats.
 
-    The ``raw_stats`` may either be a numpy array or a list. If the
-    former, the stats are assumed to have shape
-    ``[sampler.base_shape x] niterations x nstats, where nstats are the number
-    of stats returned by ``sampler.model.default_stats``. If the latter, the
-    list is cast to an array that is assumed to be the same shape as if an
-    array was given.
+    Samplers like ``emcee`` store the extra tuple returned by ``CallModel`` to
+    a list called blobs. This is a list of lists of tuples with shape
+    niterations x nwalkers x nstats, where nstats is the number of stats
+    returned by the model's ``default_stats``. This converts that list to a
+    dictionary of arrays keyed by the stat names.
 
     Parameters
     ----------
-    sampler : sampler instance
-        An instance of an MCMC sampler.
-    raw_stats : array or list
-        The stats to convert.
+    stat_names : list of str
+        The list of the stat names.
+    blobs : list of list of tuples
+        The data to convert.
 
     Returns
     -------
     dict :
         A dictionary mapping the model's ``default_stats`` to arrays of values.
-        Each array will have shape ``[sampler.base_shape x] niterations``.
+        Each array will have shape ``nwalkers x niterations``.
     """
-    if not isinstance(raw_stats, numpy.ndarray):
-        # Assume list. Since the model returns a tuple of values, this should
-        # be a [sampler.base_shape x] x niterations list of tuples. We can
-        # therefore immediately convert this to a ND array.
-        raw_stats = numpy.array(raw_stats)
-    return {stat: raw_stats[..., ii]
-            for (ii, stat) in enumerate(sampler.model.default_stats)}
+    # get the dtypes of each of the stats; we'll just take this from the
+    # first iteration and walker
+    dtypes = [type(val) for val in blobs[0][0]]
+    assert len(stat_names) == len(dtypes), (
+        "number of stat names must match length of tuples in the blobs")
+    # convert to an array; to ensure that we get the dtypes correct, we'll
+    # cast to a structured array
+    raw_stats = numpy.array(blobs, dtype=zip(stat_names, dtypes))
+    # transpose so that it has shape nwalkers x niterations
+    raw_stats = raw_stats.transpose()
+    # now return as a dictionary
+    return {stat: raw_stats[stat] for stat in stat_names}
+
+
+def get_optional_arg_from_config(cp, section, arg, dtype=str):
+    """Convenience function to retrieve an optional argument from a config
+    file.
+
+    Parameters
+    ----------
+    cp : ConfigParser
+        Open config parser to retrieve the argument from.
+    section : str
+        Name of the section to retrieve from.
+    arg : str
+        Name of the argument to retrieve.
+    dtype : datatype, optional
+        Cast the retrieved value (if it exists) to the given datatype. Default
+        is ``str``.
+
+    Returns
+    -------
+    val : None or str
+        If the argument is present, the value. Otherwise, None.
+    """
+    if cp.has_option(section, arg):
+        val = dtype(cp.get(section, arg))
+    else:
+        val = None
+    return val
+
 
 #
 # =============================================================================
@@ -436,6 +470,62 @@ def checkpoint(self):
         logging.info("Clearing samples from memory")
         self.clear_samples()
 
+    @staticmethod
+    def checkpoint_from_config(cp, section):
+        """Gets the checkpoint interval from the given config file.
+
+        This looks for 'checkpoint-interval' in the section.
+
+        Parameters
+        ----------
+        cp : ConfigParser
+            Open config parser to retrieve the argument from.
+        section : str
+            Name of the section to retrieve from.
+
+        Return
+        ------
+        int or None :
+            The checkpoint interval, if it is in the section. Otherw
+        """
+        return get_optional_arg_from_config(cp, section, 'checkpoint-interval',
+                                            dtype=int)
+
+    def set_target_from_config(self, cp, section):
+        """Sets the target using the given config file.
+
+        This looks for 'niterations' to set the ``target_niterations``, and
+        'effective-nsamples' to set the ``target_eff_nsamples``.
+
+        Parameters
+        ----------
+        cp : ConfigParser
+            Open config parser to retrieve the argument from.
+        section : str
+            Name of the section to retrieve from.
+        """
+        if cp.has_option(section, "niterations"):
+            niterations = int(cp.get(section, "niterations"))
+        else:
+            niterations = None
+        if cp.has_option(section, "effective-nsamples"):
+            nsamples = int(cp.get(section, "effective-nsamples"))
+        else:
+            nsamples = None
+        self.set_target(niterations=niterations, eff_nsamples=nsamples)
+
+    def set_burn_in_from_config(self, cp):
+        """Sets the burn in class from the given config file.
+
+        If no burn-in section exists in the file, then this just set the
+        burn-in class to None.
+        """
+        try:
+            bit = self.burn_in_class.from_config(cp, self)
+        except ConfigParser.Error:
+            bit = None
+        self.set_burn_in(bit)
+
     @abstractmethod
     def compute_acf(cls, filename, **kwargs):
         """A method to compute the autocorrelation function of samples in the
@@ -519,25 +609,31 @@ def compute_acf(cls, filename, start_index=None, end_index=None,
         return acfs
 
     @classmethod
-    def compute_acl(cls, filename, start_index=None, end_index=None):
+    def compute_acl(cls, filename, start_index=None, end_index=None,
+                    min_nsamples=10):
         """Computes the autocorrleation length for all model params in the
         given file.
 
         Parameter values are averaged over all walkers at each iteration.
-        The ACL is then calculated over the averaged chain. If the returned ACL
-        is `inf`,  will default to the number of current iterations.
+        The ACL is then calculated over the averaged chain. If an ACL cannot
+        be calculated because there are not enough samples, it will be set
+        to ``inf``.
 
         Parameters
         -----------
         filename : str
             Name of a samples file to compute ACLs for.
-        start_index : {None, int}
+        start_index : int, optional
             The start index to compute the acl from. If None, will try to use
             the number of burn-in iterations in the file; otherwise, will start
             at the first sample.
-        end_index : {None, int}
+        end_index : int, optional
             The end index to compute the acl to. If None, will go to the end
             of the current iteration.
+        min_nsamples : int, optional
+            Require a minimum number of samples to compute an ACL. If the
+            number of samples per walker is less than this, will just set to
+            ``inf``. Default is 10.
 
         Returns
         -------
@@ -551,10 +647,8 @@ def compute_acl(cls, filename, start_index=None, end_index=None):
                     param, thin_start=start_index, thin_interval=1,
                     thin_end=end_index, flatten=False)[param]
                 samples = samples.mean(axis=0)
-                # if < 10 samples, just set to inf
-                # Note: this should be done inside of pycbc's autocorrelation
-                # function
-                if samples.size < 10:
+                # if < min number of samples, just set to inf
+                if samples.size < min_nsamples:
                     acl = numpy.inf
                 else:
                     acl = autocorrelation.calculate_acl(samples)
diff --git a/gwin/sampler/base_multitemper.py b/gwin/sampler/base_multitemper.py
new file mode 100644
index 0000000..13541cd
--- /dev/null
+++ b/gwin/sampler/base_multitemper.py
@@ -0,0 +1,192 @@
+# Copyright (C) 2018  Collin Capano
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+
+#
+# =============================================================================
+#
+#                                   Preamble
+#
+# =============================================================================
+#
+"""Provides constructor classes provide support for parallel tempered MCMC
+samplers."""
+
+from __future__ import absolute_import
+
+import numpy
+from pycbc.filter import autocorrelation
+
+
+class MultiTemperedSupport(object):
+    """Provides methods for supporting multi-tempered samplers.
+    """
+    _ntemps = None
+
+    @property
+    def ntemps(self):
+        """The number of temeratures that are set."""
+        return self._ntemps
+
+
+class MultiTemperedAutocorrSupport(object):
+    """Provides class methods for calculating multi-tempered ACFs/ACLs.
+    """
+
+    @classmethod
+    def compute_acf(cls, filename, start_index=None, end_index=None,
+                    per_walker=False, walkers=None, parameters=None,
+                    temps=None):
+        """Computes the autocorrleation function of the model params in the
+        given file.
+
+        By default, parameter values are averaged over all walkers at each
+        iteration. The ACF is then calculated over the averaged chain for each
+        temperature. An ACF per-walker will be returned instead if
+        ``per_walker=True``.
+
+        Parameters
+        -----------
+        filename : str
+            Name of a samples file to compute ACFs for.
+        start_index : {None, int}
+            The start index to compute the acl from. If None, will try to use
+            the number of burn-in iterations in the file; otherwise, will start
+            at the first sample.
+        end_index : {None, int}
+            The end index to compute the acl to. If None, will go to the end
+            of the current iteration.
+        per_walker : optional, bool
+            Return the ACF for each walker separately. Default is False.
+        walkers : optional, int or array
+            Calculate the ACF using only the given walkers. If None (the
+            default) all walkers will be used.
+        parameters : optional, str or array
+            Calculate the ACF for only the given parameters. If None (the
+            default) will calculate the ACF for all of the model params.
+        temps : optional, (list of) int or 'all'
+            The temperature index (or list of indices) to retrieve. If None
+            (the default), the ACF will only be computed for the coldest (= 0)
+            temperature chain. To compute an ACF for all temperates pass 'all',
+            or a list of all of the temperatures.
+
+        Returns
+        -------
+        dict :
+            Dictionary of arrays giving the ACFs for each parameter. If
+            ``per-walker`` is True, the arrays will have shape
+            ``ntemps x nwalkers x niterations``. Otherwise, the returned array
+            will have shape ``ntemps x niterations``.
+        """
+        acfs = {}
+        with cls._io(filename, 'r') as fp:
+            if parameters is None:
+                parameters = fp.variable_params
+            if isinstance(parameters, str) or isinstance(parameters, unicode):
+                parameters = [parameters]
+            if isinstance(temps, int):
+                temps = [temps]
+            elif temps == 'all':
+                temps = numpy.arange(fp.ntemps)
+            elif temps is None:
+                temps = [0]
+            for param in parameters:
+                subacfs = []
+                for tk in temps:
+                    if per_walker:
+                        # just call myself with a single walker
+                        if walkers is None:
+                            walkers = numpy.arange(fp.nwalkers)
+                        arrays = [cls.compute_acfs(filename,
+                                                   start_index=start_index,
+                                                   end_index=end_index,
+                                                   per_walker=False,
+                                                   walkers=ii,
+                                                   parameters=param,
+                                                   temps=tk)[param][0, :]
+                                  for ii in walkers]
+                        # we'll stack all of the walker arrays to make a single
+                        # nwalkers x niterations array; when these are stacked
+                        # below, we'll get a ntemps x nwalkers x niterations
+                        # array
+                        subacfs.append(numpy.vstack(arrays))
+                    else:
+                        samples = fp.read_raw_samples(
+                            param, thin_start=start_index,
+                            thin_interval=1, thin_end=end_index,
+                            walkers=walkers, temps=tk, flatten=False)[param]
+                        # contract the walker dimension using the mean, and
+                        # flatten the (length 1) temp dimension
+                        samples = samples.mean(axis=1)[0, :]
+                        thisacf = autocorrelation.calculate_acf(
+                            samples).numpy()
+                        subacfs.append(thisacf)
+                # stack the temperatures
+                acfs[param] = numpy.stack(subacfs)
+        return acfs
+
+    @classmethod
+    def compute_acl(cls, filename, start_index=None, end_index=None,
+                    min_nsamples=10):
+        """Computes the autocorrleation length for all model params and
+        temperatures in the given file.
+
+        Parameter values are averaged over all walkers at each iteration and
+        temperature.  The ACL is then calculated over the averaged chain.
+
+        Parameters
+        -----------
+        filename : str
+            Name of a samples file to compute ACLs for.
+        start_index : {None, int}
+            The start index to compute the acl from. If None, will try to use
+            the number of burn-in iterations in the file; otherwise, will start
+            at the first sample.
+        end_index : {None, int}
+            The end index to compute the acl to. If None, will go to the end
+            of the current iteration.
+        min_nsamples : int, optional
+            Require a minimum number of samples to compute an ACL. If the
+            number of samples per walker is less than this, will just set to
+            ``inf``. Default is 10.
+
+        Returns
+        -------
+        dict
+            A dictionary of ntemps-long arrays of the ACLs of each parameter.
+        """
+        acls = {}
+        with cls._io(filename, 'r') as fp:
+            if end_index is None:
+                end_index = fp.niterations
+            tidx = numpy.arange(fp.ntemps)
+            for param in fp.variable_params:
+                these_acls = numpy.zeros(fp.ntemps)
+                for tk in tidx:
+                    samples = fp.read_raw_samples(
+                        param, thin_start=start_index, thin_interval=1,
+                        thin_end=end_index, temps=tk, flatten=False)[param]
+                    # contract the walker dimension using the mean, and flatten
+                    # the (length 1) temp dimension
+                    samples = samples.mean(axis=1)[0, :]
+                    if samples.size < min_nsamples:
+                        acl = numpy.inf
+                    else:
+                        acl = autocorrelation.calculate_acl(samples)
+                    if acl <= 0:
+                        acl = numpy.inf
+                    these_acls[tk] = acl
+                acls[param] = these_acls
+        return acls
diff --git a/gwin/sampler/emcee.py b/gwin/sampler/emcee.py
index 443f89d..0cc3f60 100644
--- a/gwin/sampler/emcee.py
+++ b/gwin/sampler/emcee.py
@@ -31,11 +31,10 @@
 import numpy
 import emcee
 from pycbc.pool import choose_pool
-from pycbc.workflow import ConfigParser
 
 from .base import BaseSampler
 from .base_mcmc import (BaseMCMC, MCMCAutocorrSupport, raw_samples_to_dict,
-                        raw_stats_to_dict)
+                        blob_data_to_dict, get_optional_arg_from_config)
 from ..burn_in import MCMCBurnInTests
 from ..io import EmceeFile
 from .. import models
@@ -123,11 +122,8 @@ def model_stats(self):
 
         The returned array has shape ``nwalkers x niterations``.
         """
-        raw_stats = numpy.array(self._sampler.blobs)
-        # raw_stats has shape niterations x nwalkers x nstats; transpose
-        # so that it has shape nwalkers x niterations x nstats
-        raw_stats = raw_stats.transpose((1, 0, 2))
-        return raw_stats_to_dict(self, raw_stats)
+        stats = self.model.default_stats
+        return blob_data_to_dict(stats, self._sampler.blobs)
 
     def clear_samples(self):
         """Clears the samples and stats from memory.
@@ -202,31 +198,14 @@ def from_config(cls, cp, model, nprocesses=1, use_mpi=False):
         # get the number of walkers to use
         nwalkers = int(cp.get(section, "nwalkers"))
         # get the checkpoint interval, if it's specified
-        if cp.has_option(section, "checkpoint-interval"):
-            checkpoint_interval = int(cp.get(section, "checkpoint-interval"))
-        else:
-            checkpoint_interval = None
-        if cp.has_option(section, "logpost-function"):
-            lnpost = cp.get(section, "logpost-function")
-        else:
-            lnpost = None
+        checkpoint_interval = cls.checkpoint_from_config(cp, section)
+        # get the logpost function
+        lnpost = get_optional_arg_from_config(cp, section, 'logpost-function')
         obj = cls(model, nwalkers, checkpoint_interval=checkpoint_interval,
                   logpost_function=lnpost, nprocesses=nprocesses,
                   use_mpi=use_mpi)
-        # get target
-        if cp.has_option(section, "niterations"):
-            niterations = int(cp.get(section, "niterations"))
-        else:
-            niterations = None
-        if cp.has_option(section, "effective-nsamples"):
-            nsamples = int(cp.get(section, "effective-nsamples"))
-        else:
-            nsamples = None
-        obj.set_target(niterations=niterations, eff_nsamples=nsamples)
+        # set target
+        obj.set_target_from_config(cp, section)
         # add burn-in if it's specified
-        try:
-            bit = obj.burn_in_class.from_config(cp, obj)
-        except ConfigParser.Error:
-            bit = None
-        obj.set_burn_in(bit)
+        obj.set_burn_in_from_config(cp)
         return obj
diff --git a/gwin/sampler/emcee_pt.py b/gwin/sampler/emcee_pt.py
index cef83fd..19ab4d8 100644
--- a/gwin/sampler/emcee_pt.py
+++ b/gwin/sampler/emcee_pt.py
@@ -14,45 +14,30 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
 
-#
-# =============================================================================
-#
-#                                   Preamble
-#
-# =============================================================================
-#
 """
-This modules provides classes and functions for using the emcee sampler
+This modules provides classes and functions for using the emcee_pt sampler
 packages for parameter estimation.
 """
 
-# The following two classes are needed for two reason
-# 1) pools freeze state when created and so classes *cannot be updated*
-# 2) methods cannot be pickled.
-
-
-class _callprior(object):
-    """Calls the model's prior function, and ensures that no
-    metadata is returned."""
-    def __init__(self, model_call):
-        self.callable = model_call
-
-    def __call__(self, args):
-        prior = self.callable(args, callfunc='prior')
-        return prior if isinstance(prior, numpy.float64) else prior[0]
-
+from __future__ import absolute_import
 
-class _callloglikelihood(object):
-    """Calls the model's loglikelihood function.
-    """
-    def __init__(self, model_call):
-        self.callable = model_call
+import numpy
+import emcee
+import logging
+from pycbc.pool import choose_pool
 
-    def __call__(self, args):
-        return self.callable(args, callfunc='loglikelihood')
+from .base import BaseSampler
+from .base_mcmc import (BaseMCMC, raw_samples_to_dict,
+                        get_optional_arg_from_config)
+from .base_multitemper import (MultiTemperedSupport,
+                               MultiTemperedAutocorrSupport)
+from ..burn_in import MultiTemperedMCMCBurnInTests
+from ..io import EmceePTFile
+from .. import models
 
 
-class EmceePTSampler(BaseMCMCSampler):
+class EmceePTSampler(MultiTemperedAutocorrSupport, MultiTemperedSupport,
+                     BaseMCMC, BaseSampler):
     """This class is used to construct a parallel-tempered MCMC sampler from
     the emcee package's PTSampler.
 
@@ -70,647 +55,190 @@ class EmceePTSampler(BaseMCMCSampler):
         cores/nodes/etc.
     """
     name = "emcee_pt"
-
-    def __init__(self, model, ntemps, nwalkers, pool=None,
-                 model_call=None):
-
-        try:
-            import emcee
-        except ImportError:
-            raise ImportError("emcee is not installed.")
-
-        if model_call is None:
-            model_call = model
+    _io = EmceePTFile
+    burn_in_class = MultiTemperedMCMCBurnInTests
+
+    def __init__(self, model, ntemps, nwalkers, checkpoint_interval=None,
+                 loglikelihood_function=None, nprocesses=1, use_mpi=False):
+
+        self.model = model
+
+        # create a wrapper for calling the model
+        if loglikelihood_function is None:
+            loglikelihood_function = 'loglikelihood'
+        # frustratingly, emcee_pt does not support blob data, so we have to
+        # turn it off
+        model_call = models.CallModel(model, loglikelihood_function,
+                                      return_all_stats=False)
+
+        # Set up the pool
+        if nprocesses > 1:
+            # these are used to help paralleize over multiple cores / MPI
+            models._global_instance = model_call
+            model_call = models._call_global_model
+            prior_call = models._call_global_model_logprior
+        else:
+            prior_call = models.CallModel(model, 'logprior',
+                                          return_all_stats=False)
+        pool = choose_pool(mpi=use_mpi, processes=nprocesses)
+        if pool is not None:
+            pool.count = nprocesses
 
         # construct the sampler: PTSampler needs the likelihood and prior
         # functions separately
         ndim = len(model.variable_params)
-        sampler = emcee.PTSampler(ntemps, nwalkers, ndim,
-                                  _callloglikelihood(model_call),
-                                  _callprior(model_call),
-                                  pool=pool)
-        # initialize
-        super(EmceePTSampler, self).__init__(
-              sampler, model)
+        self._sampler = emcee.PTSampler(ntemps, nwalkers, ndim,
+                                        model_call, prior_call, pool=pool)
         self._nwalkers = nwalkers
         self._ntemps = ntemps
+        self._checkpoint_interval = checkpoint_interval
 
-    @classmethod
-    def from_cli(cls, opts, model, pool=None,
-                 model_call=None):
-        """Create an instance of this sampler from the given command-line
-        options.
-
-        Parameters
-        ----------
-        opts : ArgumentParser options
-            The options to parse.
-        model : LikelihoodEvaluator
-            The model to use with the sampler.
+    @property
+    def io(self):
+        return self._io
 
-        Returns
-        -------
-        EmceePTSampler
-            An emcee sampler initialized based on the given arguments.
-        """
-        return cls(model, opts.ntemps, opts.nwalkers,
-                   pool=pool, model_call=model_call)
+    @property
+    def base_shape(self):
+        return (self.ntemps, self.nwalkers,)
 
     @property
-    def ntemps(self):
-        return self._ntemps
+    def betas(self):
+        return self._sampler.betas
+
+    @classmethod
+    def from_config(cls, cp, model, nprocesses=1, use_mpi=False):
+        """Loads the sampler from the given config file."""
+        section = "sampler"
+        # check name
+        assert cp.get(section, "name") == cls.name, (
+            "name in section [sampler] must match mine")
+        # get the number of walkers to use
+        nwalkers = int(cp.get(section, "nwalkers"))
+        # get the number of temps
+        ntemps = int(cp.get(section, "ntemps"))
+        # get the checkpoint interval, if it's specified
+        checkpoint_interval = cls.checkpoint_from_config(cp, section)
+        # get the loglikelihood function
+        logl = get_optional_arg_from_config(cp, section, 'logl-function')
+        obj = cls(model, ntemps, nwalkers,
+                  checkpoint_interval=checkpoint_interval,
+                  loglikelihood_function=logl, nprocesses=nprocesses,
+                  use_mpi=use_mpi)
+        # set target
+        obj.set_target_from_config(cp, section)
+        # add burn-in if it's specified
+        obj.set_burn_in_from_config(cp)
+        return obj
 
     @property
-    def chain(self):
-        """Get all past samples as an ntemps x nwalker x niterations x ndim
-        array.
-        """
-        # emcee returns the chain as ntemps x nwalker x niterations x ndim
-        return self._sampler.chain
+    def samples(self):
+        """A dict mapping ``variable_params`` to arrays of samples currently
+        in memory.
 
-    def clear_chain(self):
-        """Clears the chain and blobs from memory.
+        The arrays have shape ``ntemps x nwalkers x niterations``.
         """
-        # store the iteration that the clear is occuring on
-        self.lastclear = self.niterations
-        # now clear the chain
-        self._sampler.reset()
+        # emcee stores samples to it's chain attribute as a
+        # nwalker x niterations x ndim array
+        raw_samples = self._sampler.chain
+        return raw_samples_to_dict(self, raw_samples)
 
     @property
     def model_stats(self):
-        """Returns the log likelihood ratio and log prior as a FieldArray.
+        """Returns the log likelihood ratio and log prior as a dict of arrays.
+
         The returned array has shape ntemps x nwalkers x niterations.
+
+        Unfortunately, because ``emcee_pt`` does not have blob support, this
+        will only return the loglikelihood, logprior, and logjacobian,
+        regardless of what stats the model can return.
         """
         # likelihood has shape ntemps x nwalkers x niterations
         logl = self._sampler.lnlikelihood
         # get prior from posterior
         logp = self._sampler.lnprobability - logl
-        # compute the likelihood ratio
-        loglr = logl - self.model.lognl
-        kwargs = {'loglr': loglr, 'prior': logp}
+        logjacobian = numpy.zeros(logp.size)
         # if different coordinates were used for sampling, get the jacobian
         if self.model.sampling_transforms is not None:
             samples = self.samples
-            # convert to dict
-            d = {param: samples[param] for param in samples.fieldnames}
-            logj = self.model.logjacobian(**d)
-            kwargs['logjacobian'] = logj
-        return FieldArray.from_kwargs(**kwargs)
-
-    @property
-    def lnpost(self):
-        """Get the natural logarithm of the likelihood + the prior as an
-        ntemps x nwalkers x niterations array.
+            flattened_samples = {param: arr.ravel()
+                                 for param, arr in samples.items()}
+            for ii in range(logp.size):
+                these_samples = {param: vals[ii]
+                                 for param, vals in flattened_samples.items()}
+                self.model.update(**these_samples)
+                logjacobian[ii] = self.model.logjacobian
+        logjacobian = logjacobian.reshape(logp.shape)
+        # put the logprior into the variable_params space
+        logp -= logjacobian
+        return {'loglikelihood': logl, 'logprior': logp,
+                'logjacobian': logjacobian}
+
+    def clear_samples(self):
+        """Clears the chain and blobs from memory.
         """
-        # emcee returns ntemps x nwalkers x niterations
-        return self._sampler.lnprobability
-
-    def set_p0(self, samples_file=None, prior=None):
-        """Sets the initial position of the walkers.
-
-        Parameters
-        ----------
-        samples_file : InferenceFile, optional
-            If provided, use the last iteration in the given file for the
-            starting positions.
-        prior : JointDistribution, optional
-            Use the given prior to set the initial positions rather than
-            ``model``'s prior.
+        # store the iteration that the clear is occuring on
+        self._lastclear = self.niterations
+        self._itercounter = 0
+        # now clear the chain
+        self._sampler.reset()
 
-        Returns
-        -------
-        p0 : array
-            An ntemps x nwalkers x ndim array of the initial positions that
-            were set.
+    def set_state_from_file(self, filename):
+        """Sets the state of the sampler back to the instance saved in a file.
         """
-        # create a (nwalker, ndim) array for initial positions
-        ntemps = self.ntemps
-        nwalkers = self.nwalkers
-        ndim = len(self.variable_params)
-        p0 = numpy.ones((ntemps, nwalkers, ndim))
-        # if samples are given then use those as initial positions
-        if samples_file is not None:
-            samples = self.read_samples(samples_file, self.variable_params,
-                                        iteration=-1, temps='all',
-                                        flatten=False)[..., 0]
-            # transform to sampling parameter space
-            samples = self.model.apply_sampling_transforms(
-                samples)
-        # draw random samples if samples are not provided
-        else:
-            samples = self.model.prior_rvs(
-                size=nwalkers*ntemps, prior=prior).reshape((ntemps, nwalkers))
-        # convert to array
-        for i, param in enumerate(self.sampling_params):
-            p0[..., i] = samples[param]
-        self._p0 = p0
-        return p0
-
-    def run(self, niterations, **kwargs):
+        with self.io(filename, 'r') as fp:
+            rstate = fp.read_random_state()
+        # set the numpy random state
+        numpy.random.set_state(rstate)
+
+    def run_mcmc(self, niterations, **kwargs):
         """Advance the ensemble for a number of samples.
 
         Parameters
         ----------
         niterations : int
             Number of samples to get from sampler.
-
-        Returns
-        -------
-        p : numpy.array
-            An array of current walker positions with shape (nwalkers, ndim).
-        lnpost : numpy.array
-            The list of log posterior probabilities for the walkers at
-            positions p, with shape (nwalkers, ndim).
-        rstate :
-            The current state of the random number generator.
+        \**kwargs :
+            All other keyword arguments are passed to the emcee sampler.
         """
         pos = self._pos
         if pos is None:
-            pos = self.p0
+            pos = self._p0
         res = self._sampler.run_mcmc(pos, niterations, **kwargs)
-        p, lnpost, rstate = res[0], res[1], res[2]
+        p, _, _ = res[0], res[1], res[2]
         # update the positions
         self._pos = p
-        return p, lnpost, rstate
-
-    # read/write functions
-
-    # add ntemps and betas to metadata
-    def write_metadata(self, fp, **kwargs):
-        """Writes metadata about this sampler to the given file. Metadata is
-        written to the file's `attrs`.
-
-        Parameters
-        ----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        **kwargs :
-            All keyword arguments are saved as separate arguments in the
-            file attrs. If any keyword argument is a dictionary, the keyword
-            will point to the list of keys in the the file's ``attrs``. Each
-            key is then stored as a separate attr with its corresponding value.
-        """
-        super(EmceePTSampler, self).write_metadata(fp, **kwargs)
-        fp.attrs["ntemps"] = self.ntemps
-        fp.attrs["betas"] = self._sampler.betas
-
-    def write_acceptance_fraction(self, fp):
-        """Write acceptance_fraction data to file. Results are written to
-        `fp[acceptance_fraction/temp{k}]` where k is the temperature.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        """
-        group = "acceptance_fraction/temp{tk}"
-        # acf has shape ntemps x nwalkers
-        acf = self.acceptance_fraction
-        for tk in range(fp.ntemps):
-            try:
-                fp[group.format(tk=tk)][:] = acf[tk, :]
-            except KeyError:
-                # dataset doesn't exist yet, create it
-                fp[group.format(tk=tk)] = acf[tk, :]
-
-    @staticmethod
-    def read_acceptance_fraction(fp, temps=None, walkers=None):
-        """Reads the acceptance fraction from the given file.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        temps : {None, (list of) int}
-            The temperature index (or a list of indices) to retrieve. If None,
-            acfs from all temperatures and all walkers will be retrieved.
-        walkers : {None, (list of) int}
-            The walker index (or a list of indices) to retrieve. If None,
-            samples from all walkers will be obtained.
-
-        Returns
-        -------
-        array
-            Array of acceptance fractions with shape (requested temps,
-            requested walkers).
-        """
-        group = 'acceptance_fraction/temp{tk}'
-        if temps is None:
-            temps = numpy.arange(fp.ntemps)
-        if walkers is None:
-            wmask = numpy.ones(fp.nwalkers, dtype=bool)
-        else:
-            wmask = numpy.zeros(fp.nwalkers, dtype=bool)
-            wmask[walkers] = True
-        arrays = []
-        for tk in temps:
-            arrays.extend(fp[group.format(tk=tk)][wmask])
-        return arrays
-
-    @staticmethod
-    def write_samples_group(fp, samples_group, parameters, samples,
-                            start_iteration=None, max_iterations=None):
-        """Writes samples to the given file.
-
-        Results are written to:
-
-            ``fp[samples_group/{vararg}]``,
-
-        where ``{vararg}`` is the name of a variable arg. The samples are
-        written as an ``ntemps x nwalkers x niterations`` array.
 
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        samples_group : str
-            Name of samples group to write.
-        parameters : list
-            The parameters to write to the file.
-        samples : FieldArray
-            The samples to write. Should be a FieldArray with fields containing
-            the samples to write and shape nwalkers x niterations.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-        """
-        ntemps, nwalkers, niterations = samples.shape
-        if max_iterations is not None and max_iterations < niterations:
-            raise IndexError("The provided max size is less than the "
-                             "number of iterations")
-        group = samples_group + '/{name}'
-        # loop over number of dimensions
-        for param in parameters:
-            dataset_name = group.format(name=param)
-            istart = start_iteration
-            try:
-                fp_niterations = fp[dataset_name].shape[-1]
-                if istart is None:
-                    istart = fp_niterations
-                istop = istart + niterations
-                if istop > fp_niterations:
-                    # resize the dataset
-                    fp[dataset_name].resize(istop, axis=2)
-            except KeyError:
-                # dataset doesn't exist yet
-                if istart is not None and istart != 0:
-                    raise ValueError("non-zero start_iteration provided, but "
-                                     "dataset doesn't exist yet")
-                istart = 0
-                istop = istart + niterations
-                fp.create_dataset(dataset_name, (ntemps, nwalkers, istop),
-                                  maxshape=(ntemps, nwalkers, max_iterations),
-                                  dtype=float, fletcher32=True)
-            fp[dataset_name][:, :, istart:istop] = samples[param]
-
-    def write_results(self, fp, start_iteration=None, max_iterations=None,
-                      **metadata):
-        """Writes metadata, samples, model stats, and acceptance fraction
-        to the given file. See the write function for each of those for
-        details.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            A file handler to an open inference file.
-        start_iteration : int, optional
-            Write results to the file's datasets starting at the given
-            iteration. Default is to append after the last iteration in the
-            file.
-        max_iterations : int, optional
-            Set the maximum size that the arrays in the hdf file may be resized
-            to. Only applies if the samples have not previously been written
-            to file. The default (None) is to use the maximum size allowed by
-            h5py.
-        \**metadata :
-            All other keyword arguments are passed to ``write_metadata``.
-        """
-        self.write_metadata(fp, **metadata)
-        self.write_chain(fp, start_iteration=start_iteration,
-                         max_iterations=max_iterations)
-        self.write_model_stats(fp, start_iteration=start_iteration,
-                               max_iterations=max_iterations)
-        self.write_acceptance_fraction(fp)
-        self.write_state(fp)
-
-    @staticmethod
-    def _read_fields(fp, fields_group, fields, array_class,
-                     thin_start=None, thin_interval=None, thin_end=None,
-                     iteration=None, temps=None, walkers=None, flatten=True):
-        """Base function for reading samples and model stats. See
-        `read_samples` and `read_model_stats` for details.
+    def write_results(self, filename):
+        """Writes samples, model stats, acceptance fraction, and random state
+        to the given file.
 
         Parameters
         -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        fields_group : str
-            The name of the group to retrieve the desired fields.
-        fields : list
-            The list of field names to retrieve. Must be names of groups in
-            `fp[fields_group/]`.
-        array_class : FieldArray or similar
-            The type of array to return. Must have a `from_kwargs` attribute.
-
-        For other details on keyword arguments, see `read_samples` and
-        `read_model_stats`.
-
-        Returns
-        -------
-        array_class
-            An instance of the given array class populated with values
-            retrieved from the fields.
+        filename : str
+            The file to write to. The file is opened using the ``io`` class
+            in an an append state.
         """
-        # walkers to load
-        if walkers is not None:
-            widx = numpy.zeros(fp.nwalkers, dtype=bool)
-            widx[walkers] = True
-            nwalkers = widx.sum()
-        else:
-            widx = slice(None, None)
-            nwalkers = fp.nwalkers
-        # temperatures to load
-        selecttemps = False
-        if temps is None:
-            tidx = 0
-            ntemps = 1
-        elif isinstance(temps, int):
-            tidx = temps
-            ntemps = 1
-        else:
-            # temps is either 'all' or a list of temperatures;
-            # in either case, we'll get all of the temperatures from the file;
-            # if not 'all', then we'll pull out the ones we want
-            tidx = slice(None, None)
-            selecttemps = temps != 'all'
-            if selecttemps:
-                ntemps = len(temps)
-            else:
-                ntemps = fp.ntemps
-        # get the slice to use
-        if iteration is not None:
-            get_index = iteration
-            niterations = 1
-        else:
-            if thin_end is None:
-                # use the number of current iterations
-                thin_end = fp.niterations
-            get_index = fp.get_slice(thin_start=thin_start, thin_end=thin_end,
-                                     thin_interval=thin_interval)
-            # we'll just get the number of iterations from the returned shape
-            niterations = None
-        # load
-        arrays = {}
-        group = fields_group + '/{name}'
-        for name in fields:
-            arr = fp[group.format(name=name)][tidx, widx, get_index]
-            if niterations is None:
-                niterations = arr.shape[-1]
-            # pull out the temperatures we need
-            if selecttemps:
-                arr = arr[temps, ...]
-            if flatten:
-                arr = arr.flatten()
-            else:
-                # ensure that the returned array is 3D
-                arr = arr.reshape((ntemps, nwalkers, niterations))
-            arrays[name] = arr
-        return array_class.from_kwargs(**arrays)
+        with self.io(filename, 'a') as fp:
+            # write samples
+            fp.write_samples(self.samples, self.model.variable_params)
+            # write stats
+            fp.write_samples(self.model_stats)
+            # write accpetance
+            fp.write_acceptance_fraction(self._sampler.acceptance_fraction)
+            # write random state
+            fp.write_random_state()
 
     @classmethod
-    def read_samples(cls, fp, parameters,
-                     thin_start=None, thin_interval=None, thin_end=None,
-                     iteration=None, temps=0, walkers=None, flatten=True,
-                     samples_group=None, array_class=None):
-        """Reads samples for the given parameter(s).
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        parameters : (list of) strings
-            The parameter(s) to retrieve. A parameter can be the name of any
-            field in `fp[fp.samples_group]`, a virtual field or method of
-            `FieldArray` (as long as the file contains the necessary fields
-            to derive the virtual field or method), and/or a function of
-            these.
-        thin_start : int
-            Index of the sample to begin returning samples. Default is to read
-            samples after burn in. To start from the beginning set thin_start
-            to 0.
-        thin_interval : int
-            Interval to accept every i-th sample. Default is to use the
-            `fp.acl`. If `fp.acl` is not set, then use all samples
-            (set thin_interval to 1).
-        thin_end : int
-            Index of the last sample to read. If not given then
-            `fp.niterations` is used.
-        iteration : int
-            Get a single iteration. If provided, will override the
-            `thin_{start/interval/end}` arguments.
-        walkers : {None, (list of) int}
-            The walker index (or a list of indices) to retrieve. If None,
-            samples from all walkers will be obtained.
-        temps : {None, (list of) int, 'all'}
-            The temperature index (or list of indices) to retrieve. If None,
-            only samples from the coldest (= 0) temperature chain will be
-            retrieved. To retrieve all temperates pass 'all', or a list of
-            all of the temperatures.
-        flatten : {True, bool}
-            The returned array will be one dimensional, with all desired
-            samples from all desired walkers concatenated together. If False,
-            the returned array will have dimension requested temps x requested
-            walkers x requested iterations.
-        samples_group : {None, str}
-            The group in `fp` from which to retrieve the parameter fields. If
-            None, searches in `fp.samples_group`.
-        array_class : {None, array class}
-            The type of array to return. The class must have a `from_kwargs`
-            class method and a `parse_parameters` method. If None, will return
-            a FieldArray.
-
-        Returns
-        -------
-        array_class
-            Samples for the given parameters, as an instance of a the given
-            `array_class` (`FieldArray` if `array_class` is None).
-        """
-        # get the group to load from
-        if samples_group is None:
-            samples_group = fp.samples_group
-        # get the type of array class to use
-        if array_class is None:
-            array_class = FieldArray
-        # get the names of fields needed for the given parameters
-        possible_fields = fp[samples_group].keys()
-        loadfields = array_class.parse_parameters(parameters, possible_fields)
-        return cls._read_fields(
-                fp, samples_group, loadfields, array_class,
-                thin_start=thin_start, thin_interval=thin_interval,
-                thin_end=thin_end, iteration=iteration, temps=temps,
-                walkers=walkers, flatten=flatten)
-
-    @classmethod
-    def compute_acfs(cls, fp, start_index=None, end_index=None,
-                     per_walker=False, walkers=None, parameters=None,
-                     temps=None):
-        """Computes the autocorrleation function of the model params in the
-        given file.
-
-        By default, parameter values are averaged over all walkers at each
-        iteration. The ACF is then calculated over the averaged chain for each
-        temperature. An ACF per-walker will be returned instead if
-        ``per_walker=True``.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        start_index : {None, int}
-            The start index to compute the acl from. If None, will try to use
-            the number of burn-in iterations in the file; otherwise, will start
-            at the first sample.
-        end_index : {None, int}
-            The end index to compute the acl to. If None, will go to the end
-            of the current iteration.
-        per_walker : optional, bool
-            Return the ACF for each walker separately. Default is False.
-        walkers : optional, int or array
-            Calculate the ACF using only the given walkers. If None (the
-            default) all walkers will be used.
-        parameters : optional, str or array
-            Calculate the ACF for only the given parameters. If None (the
-            default) will calculate the ACF for all of the model params.
-        temps : optional, (list of) int or 'all'
-            The temperature index (or list of indices) to retrieve. If None
-            (the default), the ACF will only be computed for the coldest (= 0)
-            temperature chain. To compute an ACF for all temperates pass 'all',
-            or a list of all of the temperatures.
-
-        Returns
-        -------
-        FieldArray
-            A ``FieldArray`` of the ACF vs iteration for each parameter. If
-            `per-walker` is True, the FieldArray will have shape
-            ``ntemps x nwalkers x niterations``. Otherwise, the returned
-            array will have shape ``ntemps x niterations``.
-        """
-        acfs = {}
-        if parameters is None:
-            parameters = fp.variable_params
-        if isinstance(parameters, str) or isinstance(parameters, unicode):
-            parameters = [parameters]
-        if isinstance(temps, int):
-            temps = [temps]
-        elif temps == 'all':
-            temps = numpy.arange(fp.ntemps)
-        elif temps is None:
-            temps = [0]
-        for param in parameters:
-            subacfs = []
-            for tk in temps:
-                if per_walker:
-                    # just call myself with a single walker
-                    if walkers is None:
-                        walkers = numpy.arange(fp.nwalkers)
-                    arrays = [cls.compute_acfs(fp, start_index=start_index,
-                                               end_index=end_index,
-                                               per_walker=False, walkers=ii,
-                                               parameters=param,
-                                               temps=tk)[param][0, :]
-                              for ii in walkers]
-                    # we'll stack all of the walker arrays to make a single
-                    # nwalkers x niterations array; when these are stacked
-                    # below, we'll get a ntemps x nwalkers x niterations array
-                    subacfs.append(numpy.vstack(arrays))
-                else:
-                    samples = cls.read_samples(fp, param,
-                                               thin_start=start_index,
-                                               thin_interval=1,
-                                               thin_end=end_index,
-                                               walkers=walkers, temps=tk,
-                                               flatten=False)[param]
-                    # contract the walker dimension using the mean, and flatten
-                    # the (length 1) temp dimension
-                    samples = samples.mean(axis=1)[0, :]
-                    thisacf = autocorrelation.calculate_acf(samples).numpy()
-                    subacfs.append(thisacf)
-            # stack the temperatures
-            # FIXME: the following if/else can be condensed to a single line
-            # using numpy.stack, once the version requirements are bumped to
-            # numpy >= 1.10
-            if per_walker:
-                nw, ni = subacfs[0].shape
-                acfs[param] = numpy.zeros((len(temps), nw, ni), dtype=float)
-                for tk in range(len(temps)):
-                    acfs[param][tk, ...] = subacfs[tk]
-            else:
-                acfs[param] = numpy.vstack(subacfs)
-        return FieldArray.from_kwargs(**acfs)
-
-    @classmethod
-    def compute_acls(cls, fp, start_index=None, end_index=None):
-        """Computes the autocorrleation length for all model params and
-        temperatures in the given file.
-
-        Parameter values are averaged over all walkers at each iteration and
-        temperature.  The ACL is then calculated over the averaged chain. If
-        the returned ACL is `inf`,  will default to the number of current
-        iterations.
-
-        Parameters
-        -----------
-        fp : InferenceFile
-            An open file handler to read the samples from.
-        start_index : {None, int}
-            The start index to compute the acl from. If None, will try to use
-            the number of burn-in iterations in the file; otherwise, will start
-            at the first sample.
-        end_index : {None, int}
-            The end index to compute the acl to. If None, will go to the end
-            of the current iteration.
-
-        Returns
-        -------
-        dict
-            A dictionary of ntemps-long arrays of the ACLs of each parameter.
-        """
-        acls = {}
-        if end_index is None:
-            end_index = fp.niterations
-        tidx = numpy.arange(fp.ntemps)
-        for param in fp.variable_params:
-            these_acls = numpy.zeros(fp.ntemps, dtype=int)
-            for tk in tidx:
-                samples = cls.read_samples(fp, param, thin_start=start_index,
-                                           thin_interval=1, thin_end=end_index,
-                                           temps=tk, flatten=False)[param]
-                # contract the walker dimension using the mean, and flatten
-                # the (length 1) temp dimension
-                samples = samples.mean(axis=1)[0, :]
-                acl = autocorrelation.calculate_acl(samples)
-                if numpy.isinf(acl):
-                    acl = samples.size
-                these_acls[tk] = acl
-            acls[param] = these_acls
-        return acls
-
-    @classmethod
-    def calculate_logevidence(cls, fp, thin_start=None, thin_end=None,
+    def calculate_logevidence(cls, filename, thin_start=None, thin_end=None,
                               thin_interval=None):
-        """Calculates the log evidence from the given file using emcee's
+        """Calculates the log evidence from the given file using ``emcee_pt``'s
         thermodynamic integration.
 
         Parameters
         ----------
-        fp : InferenceFile
-            An open file handler to read the stats from.
+        filename : str
+            Name of the file to read the samples from. Should be an
+            ``EmceePTFile``.
         thin_start : int
             Index of the sample to begin returning stats. Default is to read
             stats after burn in. To start from the beginning set thin_start
@@ -730,27 +258,43 @@ def calculate_logevidence(cls, fp, thin_start=None, thin_end=None,
         dlnZ : float
             The error on the estimate.
         """
-        try:
-            import emcee
-        except ImportError:
-            raise ImportError("emcee is not installed.")
-
-        stats_group = fp.stats_group
-        parameters = fp[stats_group].keys()
-        logstats = cls.read_samples(fp, parameters, samples_group=stats_group,
-                                    thin_start=thin_start,  thin_end=thin_end,
-                                    thin_interval=thin_interval,
-                                    temps='all', flatten=False)
-        # get the likelihoods
-        logls = logstats['loglr'] + fp.lognl
-        # we need the betas that were used
-        betas = fp.attrs['betas']
-        # annoyingly, theromdynaimc integration in PTSampler is an instance
-        # method, so we'll implement a dummy one
-        ntemps = fp.ntemps
-        nwalkers = fp.nwalkers
-        ndim = len(fp.variable_params)
+        with cls._io(filename, 'r') as fp:
+            logls = fp.read_raw_samples(['loglikelihood'],
+                                        thin_start=thin_start,
+                                        thin_interval=thin_interval,
+                                        thin_end=thin_end,
+                                        temps='all', flatten=False)
+            logls = logls['loglikelihood']
+            # we need the betas that were used
+            betas = fp.betas
+            # annoyingly, theromdynaimc integration in PTSampler is an instance
+            # method, so we'll implement a dummy one
+            ntemps = fp.ntemps
+            nwalkers = fp.nwalkers
+            ndim = len(fp.variable_params)
         dummy_sampler = emcee.PTSampler(ntemps, nwalkers, ndim, None,
                                         None, betas=betas)
         return dummy_sampler.thermodynamic_integration_log_evidence(
             logls=logls, fburnin=0.)
+
+    def finalize(self):
+        """Calculates the log evidence and writes to the checkpoint file.
+
+        The thin start/interval/end for calculating the log evidence are
+        retrieved from the checkpoint file's thinning attributes.
+        """
+        logging.info("Calculating log evidence")
+        # get the thinning settings
+        with self.io(self.checkpoint_file, 'r') as fp:
+            thin_start = fp.thin_start
+            thin_interval = fp.thin_interval
+            thin_end = fp.thin_end
+        # calculate
+        logz, dlogz = self.calculate_logevidence(
+            self.checkpoint_file, thin_start=thin_start, thin_end=thin_end,
+            thin_interval=thin_interval)
+        logging.info("log Z, dlog Z: {}, {}".format(logz, dlogz))
+        # write to both the checkpoint and backup
+        for fn in [self.checkpoint_file, self.backup_file]:
+            with self.io(fn, "a") as fp:
+                fp.write_logevidence(logz, dlogz)