analysis2_manuscript.tex

\documentclass[hidelinks]{article}
\usepackage[letterpaper,margin=1.0in]{geometry}
\usepackage[utf8]{inputenc}
\pagenumbering{arabic}
\usepackage{authblk}
\usepackage{graphicx}
\usepackage[singlelinecheck=false]{caption} % singlelinecheck makes single line caption left aligned instead of centered
\usepackage{subcaption}
\usepackage{amsmath}
\usepackage{fancyhdr}
\usepackage{longtable}
\usepackage{booktabs}
% hyperlinks
\usepackage{hyperref}
\usepackage{wrapfig}
\usepackage{xspace}
\usepackage{mathrsfs}
\usepackage{graphicx}
\usepackage{lipsum}
\usepackage{makecell}


\pagestyle{fancy}
\fancyhead[R]{\textbf{\stdpopsim with selection}}

% for highlighting text
\usepackage{xcolor}
\usepackage{soul}

% bibliography
\usepackage[
    backend=biber,
    natbib=true,
    style=authoryear,
    ]{biblatex}
\addbibresource{references.bib}

%\usepackage[round]{natbib}   % omit 'round' option if you prefer square brackets
%\bibliographystyle{plainnat}


\newcommand{\Stdpopsim}{\texttt{Stdpopsim}\xspace}
\newcommand{\stdpopsim}{\texttt{stdpopsim}\xspace}

%commands to format figure and table references in the supplement
\newcommand{\beginsupplement}{%
        \fancyhead[L]{Supplemental Material}
        \setcounter{table}{0}
        \renewcommand{\thetable}{S\arabic{table}}%
        \setcounter{figure}{0}
        \renewcommand{\thefigure}{S\arabic{figure}}%
     }
\newcommand{\stopsupplement}{%
        \setcounter{table}{0}
        \renewcommand{\thetable}{\arabic{table}}%
        \setcounter{figure}{0}
        \renewcommand{\thefigure}{\arabic{figure}}%
     }

\makeatletter
\newcommand{\labelname}[1]{\def\@currentlabelname{#1}}
\makeatother

% Avoid pandoc bug when there are lists in the body.
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}

\title{Adding selection to \stdpopsim: oh how the tables have turned}


% \author[1,+]{M. Elise Lauterbur}
% \author[3,*]{Ariella L. Gladstein}
 \author[4,*]{Graham Gower}
 \author[5*]{Nathaniel S. Pope}
 \author[5*]{Murillo F. Rodrigues}
\author[6]{Georgia Tsambos}
\author[34]{Linh N. Tran}
\author[2]{Maria Izabel A. Cavassim}
% \author[5,7]{Jeff Adrion}
% \author[5]{Saurabh Belsare}
% \author[8]{Arjun Biddanda}
% \author[5]{Victoria Caudill}
% \author[9]{Jean Cury}
% \author[10]{Ignacio Echevarria}
% \author[11]{Benjamin C. Haller}
% \author[12,13]{Ahmed R. Hasan}
\author[14,15]{Xin Huang}
% \author[16]{Leonardo Nicola Martin Iasi}
% \author[17]{Ekaterina Noskova}
% \author[18]{Jana Obšteter}
% \author[19]{Vitor Antonio Corrêa Pavinato}
% \author[20,21]{Alice Pearson}
% \author[22,23]{David Peede}
% \author[24]{Manolo F. Perez}
\author[5]{Chris C. R. Smith}
% \author[25]{Jeffrey P. Spence}
% \author[5]{Anastasia Teterina}
\author[5]{Silas Tittes}
\author[5]{Scott T. Small}
% \author[26]{Per Unneberg}
% \author[27]{Juan Manuel Vazquez}
% \author[28]{Ryan K. Waples}
% \author[29]{Anthony Wilder Wohns}
% \author[30]{Yan Wong}
% \author[31]{Franz Baumdicker}
% \author[32]{Reed A. Cartwright}
% \author[33]{Gregor Gorjanc}
\author[4]{Kirk E. Lohmueller}
\author[34]{Ryan N. Gutenkunst}
\author[30]{Jerome Kelleher}
\author[35]{Aaron P. Ragsdale}

\author[37]{Daniel R. Schrider}
\author[38]{Ilan Gronau}
\author[5,36]{Peter L. Ralph}
\author[5]{Andrew D. Kern}


 \affil[*]{\small{These authors contributed equally to the paper.}}
% \affil[+]{\small{Corresponding authors: lauterbur@gmail.com ; ilan.gronau@runi.ac.il.}}
% \affil[1]{\small{Department of Ecology and Evolutionary Biology, University of Arizona, Tucson AZ 85719, USA}}
% \affil[2]{\small{Department of Ecology and Evolutionary Biology, University of California, Los Angeles, Los Angeles CA, USA}}
% \affil[3]{\small{Embark Veterinary, Inc., Boston MA 02111, USA}}
 \affil[4]{\small{Section for Molecular Ecology and Evolution, Globe Institute, University of Copenhagen, Denmark}}
 \affil[5]{\small{Institute of Ecology and Evolution, University of Oregon, Eugene OR 97402, USA}}
% \affil[6]{\small{School of Mathematics and Statistics, University of Melbourne, Australia}}
% \affil[7]{\small{AncestryDNA, San Francisco CA 94107, USA}}
% \affil[8]{\small{54Gene, Inc., Washington DC 20005, USA}}
% \affil[9]{\small{Université Paris-Saclay, CNRS, INRIA, Laboratoire Interdisciplinaire des Sciences du Numérique, UMR 9015 Orsay, France}}
% \affil[10]{\small{School of Life Sciences, University of Glasgow, Glasgow, UK}}
% \affil[11]{\small{Department of Computational Biology, Cornell University, Ithaca NY, USA}}
% \affil[12]{\small{Department of Cell and Systems Biology, University of Toronto, Toronto ON, Canada}}
% \affil[13]{\small{Department of Biology, University of Toronto Mississauga, Mississauga ON, Canada}}
% \affil[14]{\small{Department of Evolutionary Anthropology, University of Vienna, Vienna, Austria}}
% \affil[15]{\small{Human Evolution and Archaeological Sciences (HEAS), University of Vienna, Vienna, Austria}}
% \affil[16]{\small{Department of Evolutionary Genetics, Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany}}
% \affil[17]{\small{Computer Technologies Laboratory, ITMO University, St Petersburg, Russia}}
% \affil[18]{\small{Agricultural Institute of Slovenia, Department of Animal Science, Ljubljana, Slovenia}}
% \affil[19]{\small{Entomology Department, The Ohio State University, Wooster OH, USA}}
% \affil[20]{\small{Department of Genetics, University of Cambridge, Cambridge, UK}}
% \affil[21]{\small{Department of Zoology, University of Cambridge, Cambridge, UK}}
% \affil[22]{\small{Department of Ecology, Evolution, and Organismal Biology, Brown University, Providence RI, USA}}
% \affil[23]{\small{Center for Computational Molecular Biology, Brown University, Providence RI, USA}}
% \affil[24]{\small{Department of Genetics and Evolution, Federal University of Sao Carlos, Sao Carlos 13565905, Brazil}}
% \affil[25]{\small{Department of Genetics, Stanford University School of Medicine, Stanford CA 94305, USA}}
% \affil[26]{\small{Department of Cell and Molecular Biology, National Bioinformatics Infrastructure Sweden, Science for Life Laboratory, Uppsala University, Husargatan 3, SE-752 37 Uppsala, Sweden}}
% \affil[27]{\small{Department of Integrative Biology, University of California, Berkeley, Berkeley CA, USA}}
% \affil[28]{\small{Department of Biostatistics, University of Washington, Seattle WA, USA}}
% \affil[29]{\small{Broad Institute of MIT and Harvard, Cambridge MA 02142, USA}}
\affil[30]{\small{Big Data Institute, Li Ka Shing Centre for Health Information and Discovery, University of Oxford, Oxford OX3 7LF, UK}}
% \affil[31]{\small{Cluster of Excellence - Controlling Microbes to Fight Infections, Eberhard Karls Universität Tübingen, Tübingen, Baden-Württemberg, Germany}}
% \affil[32]{\small{School of Life Sciences and The Biodesign Institute, Arizona State University, Tempe AZ, USA}}
% \affil[33]{\small{The Roslin Institute and Royal (Dick) School of Veterinary Studies, University of Edinburgh, Edinburgh EH25 9RG, UK}}
% \affil[34]{\small{Department of Molecular and Cellular Biology, University of Arizona, Tucson AZ 85721, USA}}
\affil[35]{\small{Department of Integrative Biology, University of Wisconsin-Madison, Madison WI, USA}}
\affil[36]{\small{Department of Mathematics, University of Oregon, Eugene OR 97402, USA}}
\affil[37]{\small{Department of Genetics, University of North Carolina at Chapel Hill, Chapel Hill NC 27599, USA}}
\affil[38]{\small{Efi Arazi School of Computer Science, Reichman University, Herzliya, Israel}}

\date{\small{\today{}}}

\begin{document}

\maketitle


\section*{Abstract}

\section*{Introduction}
    \label{introduction}
    % natural selection
    Natural selection is a fundamental force in evolution, shaping the
    genetic diversity of populations and driving the adaptation of
    species to their environments. The effects of natural selection
    on genetic variation are complex, and can be difficult to disentangle
    from other evolutionary processes such as mutation, recombination,
    and genetic drift \cite[e.g.,][]{gillespie1991causes}.
    For instance changes in population size can lead to fluctuations
    in genetic diversity across a recombining chromosome 
    that can mimic the effects of selection \citep{simonsen1995properties, barton1998effect},
    % DRS -- I'm not sure Barton reference is right for this?
    and lead to spurious inferences about the strength and targets of genetic adaptation
    \cite{simonsen1995properties,akey2004population,nielsen2005genomic}.
    In turn selection can confound our ability to infer demographic 
    history from allele frequencies \citep{ewing2016consequences,schrider2016effects} and
    estimates of inverse coalescent rate \citep{schrider2016effects, johri2021impact, cousins2024accurate}.
    Thus it is imperative to joingly account for the effects of selection
    and demography when inferring evolutionary history from genetic data \citep{sheehan2016deep,johri2020toward}.
    However this is a challenging task from a modeling perspective \citep{johri2022prospect}.

    % simulation
    % DRS: lots more refs will be needed in this paragraph. added a few and am happy to help add more later if needed
    To meet the growing need for the interpretation,
    analysis, and exploration of realistic and complex evolutionary models
    the field of population genetics has increasingly turned to simulation.
    Simulation in population genetics has a long history 
    including both backward-in-time coalescent simulations
    \citep{kingman1982genealogy,hudson1983testing, hudson1990gene}
    and forward-in-time simulations of complex demography and selection
    \citep[e.g.,][]{gillespie1984molecular,thornton2014c++, haller2019slim}.
    The development of simulation tools has been driven by the need to
    understand the effects of complex evolutionary processes on genetic
    variation \citep[e.g.,][]{galloway2020few}, to provide a null model for hypothesis testing
    \citep[e.g.,][]{hudson1992statistical,hudson1994evidence,sabeti2002detecting}, to
    to explore the power and limitations of statistical methods \cite[e.g.,][]{przeworski2002signature},
    and increasingly to provide a basis for machine learning and other
    simulation-based inference methods \citep[e.g.,][]{pavlidis2010searching,lin2011distinguishing,kern2018diplos,mughal2019localizing,sanchez2021deep,wang2021automatic}.
    While this is so, joint simulation of complex demography and selection
    is challenging, and requires a deep understanding of the underlying
    evolutionary processes, as well as a number of parameter choices including
    the strength of selection, the distribution of fitness effects, and the
    recombination rate, as well as a parameterized model of demography.
    This is a daunting task for many researchers, and can be a barrier to
    the adoption of simulation-based methods in population genetics.
    Furthermore, sharing and comparing simulation results among studies can 
    itself be challenging, as different simulation tools may use different structures
    and parameter scalins for their models, and may not be directly comparable.
    This is a major limitation for the field, as it can make it difficult
    to assess the robustness of results, and to compare the results of
    different studies.

    % stdpopsim
    % DRS: lots more refs will be needed in this paragraph. added a few and am happy to help add more later if needed
    A lingering challenge with simulation in population genetics has been
    reproducibility and the ability to share and compare results among 
    researchers \citep[e.g.,][]{ragsdale2020lessons}.
    This challenge has been addressed in part by the development
    of community resources for sharing and distributing simulation software
    via the \stdpopsim project \citep{adrion2020community}. \stdpopsim
    provides a standardized interface for accessing a wide range of
    population genetic models, and has begun to be be widely adopted by the community. %maybe add citations here?
    While that is so, the original version of \stdpopsim did not include
    models of selection, which is a major limitation for more empirical
    applications of population genetic simulation. In particular, modeling selection
    through simulation is critical for understanding processes such
    as adaptation, the effects of selective sweeps \citep[e.g.][]{braverman1995hitchhiking,fay2000hitchhiking,przeworski2002signature,przeworski2005signature,schrider2015soft}, and the impact of
    background selection on genetic diversity
    \citep[e.g.][]{charlesworth1993effect,charlesworth1995pattern,williamson2002genealogy,ewing2016consequences,torres2020temporal}.
    Ideally, one would like
    to have a single, unified framework for simulating both neutral and
    non-neutral evolutionary processes, and to be able to compare the
    results of these simulations to empirical data in a manner that is
    both accessible to a wide range of researchers and highly reproducible. 
    Further the framework should include the complex realities of 
    genomes, including heterogenous recombination rates, 
    variation in the size and density of functional elements, and
    non-equilibrium demographic histories. Ideally, one should be able
    to model all of these features after estimates/annotations obtained
    from a species of interest \citep[e.g.][]{schrider2020background}.

    % goals
    In this manuscript we provide an overview of a major new addition
    to \stdpopsim---the inclusion of models of selection.
    We begin by describing the models of selection that we have implemented,
    as well as the parameter choices that are available to the user.
    This includes models of background selection, selective sweeps, and
    models of the distribution of fitness effects (DFE).
    Further we describe how these models can be combined with genomic
    annotations that are specific to a given species available
    through the \stdpopsim API, and combined with a parameterized model of
    demography and recombination to provide a realistic simulation of
    genetic variation in a population.
    We then provide a series of examples of how these models can be used
    to benchmark and compare the performance of different methods for
    inferring demographic history, the targets of recent positive selection,
    and the distribution of fitness effects from genetic data. 

\section*{Implementing Selection in \stdpopsim}
    \label{selection}

    % implementing selection
    \stdpopsim previously provided a wide range of models of neutral
    demographic history from roughly two dozen species \citep{lauterbur2023expanding}.
    This was accomplished through the use of a standardized interface
    and data structure that allows for curated addition of new
    species and models, and can use either
    msprime \citep{Baumdicker2022} or SLiM \citep{haller2019slim}
    as the backend engine for efficient population genetic simulation.
    To extend \stdpopsim's functionality to include models of selection, 
    we introduce an important new class representing a specfic distribution of fitness effects
    to the \stdpopsim API: the \texttt{DFE} class. 
    This class provides an interface for specifying
    a distribution of the fitness effects of new mutations, and
    is added to a \texttt{Contig} object to provide a description
    of the fitness effects of mutations along a chromosome.
    A \texttt{DFE} object can apply to a specific interval set of a chromosome,
    and can be combined with other \texttt{DFE} objects to provide, 
    a rich model of how selection may differ along a chromosome. 
    Once specified a model of selection is then run in the same way
    but currently is limited to using SLiM simulation engine, which
    is perhaps the most flexible simulation engine for modeling selection currently available.
    
    As with other \stdpopsim models, when incorporating selection we aim to hew as closely as possible
    to realistic genomes, and provide public annotations of specices-specific functional
    elements that can be used along with a \texttt{DFE} object to
    readily provide realistic model of selection for a specific genome and set of
    populations. A schematic of the \stdpopsim catalog is shown in Figure \ref{fig:schematic}.
    From a users perspective, one can specify a species, a portion of the genome to simulate,
    a genetic map if available, a model of demography, and a model of selection that is
    composed of a set of annotations and a \texttt{DFE} object. 

    The vast majority of published DFE estimates are based on a limited number of species,
    so \stdpopsim currently has implemented published DFEs from
    four species: \textit{Arabidopsis thaleana} \citep{huber2018gene}, \textit{Drosophila melanogaster} \citep{ragsdale2016triallelic,huber2017determining},
    humans, and the vaquita porpoise \textit{Phocoena sinus} \citep{robinson2022critically}.
    While that is so, these DFEs can be mixed and matched with the other species
    in the catalog. For instance, one could simulate a butterfly species with a Gamma DFE originally
    estimated from humans, or a human population with a DFE estimated from \textit{Drosophila}.
    Furthermore the user can specify a custom DFE, and provide their own annotations
    of functional elements to simulate selection in a species for which we do not yet have 
    a published DFE included in the catalog. This flexibility allows for a wide range of
    models of selection to be simulated, and for the results of these simulations to be
    compared to empirical data or used as a null model for hypothesis testing.

    % sweep interface
    In addition to the \texttt{DFE} class, we have also implemented a class (\texttt{stdpopsim.ext.selective\_sweep})
    that enables selective sweeps to be simulated in \stdpopsim.
    This class augments a demographic model with an ``extended event''
    which conditions on the introduction of a selected mutation at a given time and position
    and with a given selection coefficient. Further the user can specify the minimum frequency
    of the selected allele at the time of sampling. As these extended events are implemented
    on top of the existing \stdpopsim API, they can be combined with other models of selection
    and demography to provide a rich model of the combined effects of multiple disparate evolutionary processes
    on genetic variation in a population.
    
    % DRS: This figure could really use some Python API and comand line examples like in Figs 1B,C
    % form the original stdpopsim paper
    \begin{wrapfigure}[37]{l}{0.4\textwidth}
        \vspace{-0.0cm}
        \includegraphics[width=\linewidth]{figures/schematics/catalog.pdf}
        \caption{\label{fig:schematic}
        A schematic of the \stdpopsim catalog for simulating genetic variation
        in a population. The user specifies a species, a portion of the genome to simulate,
        and optionally a genetic map, a model of demography, and a model of selection that 
        itself is composed of a distribution of fitness effects (DFE) and a set of functional
        annotations.}
    \end{wrapfigure}
    \hfill

    % current numbers of DFEs / species / etc
    At this release the \stdpopsim catalog comprises 24 species (i.e. genome representations)
    for which we have implemented XX demographic models, have XX genetic maps, and XX DFEs.
    Additions to the catalog are ongoing -- we point the reader to our previously published 
    report detailing this effort \citep{lauterbur2023expanding} and we welcome contributions from the
    community---we encourage those interested in contributing to visit https://popsim-consortium.github.io/stdpopsim-docs
    or contact one of the PopSim Consortium members for guidance.


\section*{Example Applications with Selection}
    \label{applications}
    In this section we provide a series of examples of how the new models of selection
    in \stdpopsim can be used to benchmark and compare the performance of different
    methods for population genetic inference.
    We focus on three main areas: demographic inference in the 
    single and multi-population settings, inference of the distribution of fitness effects,
    and the detection of selective sweeps, particularly in the context of background selection.

    \section*{Inference of $N_e(t)$ in the context of selection}
    One of the most common applications of population genetic inference is to estimate
    the effective population size over time, $N_e(t)$, from genetic data. This can be done
    using a variety of methods, including the pairwise sequentially Markovian coalescent
    (PSMC) \citep{li2011inference}, through use of the site frequency spectrum (SFS),
    as well as through identity by decent information. 
    While that is so, it has been well described that selection, because of its effects on
    local and more global patterns of genetic variation, can drive estimates of $N_e(t)$
    further from true census population sizes simply because selection increases the 
    rate of coalescence in the genome. 

    Using \stdpopsim we can simulate genetic data under a model of selection and then 
    examine the performance of different methods for inferring $N_e(t)$ from these simulated 
    data. In Figure \ref{fig:1pop-human-demography} we show the results of such a benchmark
    where we have simulated genomes under a model of human out of Africa (OOA) demography
    with and without background selection on exons. For this simulation we have used
    a genetic map from the HapMap Project \citep{international2007second} along with a
    DFE for deleterious alleles (estimated by \cite{huber2017determining})
    that acts on exonic positions defined from the HAVANA group release 104 for the human genome. 
    These simulations were then used to estimate $N_e(t)$ using four methods: 1) MSMC2 \citep{Schiffels2020}, 
    2)the stairway plot method \citep{liu2020stairway}, 3) GONE \citep{santiago2020recent}, and 4) SMC++ \citep{terhorst2017robust}.
    In this and what follows we have performed 3 replicate whole genome simulations, so uncertainty in estimates might
    be roughly assessed, however we are mindful of wasted compute as thorough benchmarking
    isn't the main purpose of this manuscript.
    
    In Figure \ref{fig:1pop-human-demography} we show six panels, each of which shows the estimated $N_e(t)$
    from each independent sample in a five-population model of human out-of-Africa demography
    from Ragsdale \textit{et al.} (2019)
    with samples taken from three modern geographic groups (CEU, CHB, and YRI from left to right).
    The top row shows the results of simulations without selection, while the bottom row shows the results
    of simulations with a Gamma distributed DFE for the deleterious effects of mutations occurring exons. In each panel we show the true $N_e(t)$ in black, and the estimated $N_e(t)$
    in various colors for the four methods. Dotted lines on the bottom row are to visually reinforce that inference is done
    from a model with selection. We see that these methods generally perform reasonably well in the absence of selection, 
    although GONE seems to overestimate $N_e(t)$ in the more recent past for the CEU and CHB populations.
    Presumably this could be due to patterns of linkage disequilibrium that result from the combination
    of bottleneck and growth in these populations.
    In the presence of selection, different methods behave differently. 
    For instance stairwaiplot which uses the SFS to infer $N_e(t)$ seems to slightly underestimate $N_e(t)$
    but is reasonably close to the true $N_e(t)$ in the absence of selection.
    Likewise SMC++ seems to be more robust to the presence of selection,
    but there is some added noise to the returned estimates. 
    MSMC2 seems to be the most affected by the presence of selection, 
    estimates of $N_e(t)$ varying quite a bit from the true $N_e(t)$.
    % DRS: I don't agree with the interpretation in the previous two sentences (starting with "Likewise").
    % MSMC is already barfing a little bit even without selection, and it is not clear to me from these
    % plots that selection makes things any worse. Also, SMC++ is doing quite well without selection so maybe
    % the minor drop in accuracy is meaningful in relative terms.


    \begin{figure}[t]
        \centering
        \includegraphics[width=\textwidth]{figures/HomSap/OOA/estimated_Ne_t_final}
        \caption{
        \label{fig:1pop-human-demography}
        Performance of methods to infer $N_e(t)$ from a human out-of-Africa model \citep{ragsdale2019models}
        with and without background selection on exons. The top row shows estimates of $N_e(t)$ from simulations
        without selection, while the bottom row shows estimates of $N_e(t)$ from simulations with a Gamma distributed   
        DFE acting on exons. In each panel we show the true $N_e(t)$ in black, and the estimated $N_e(t)$ from four methods:    
        MSMC2 \citep{Schiffels2020}, stairway plot \citep{liu2020stairway}, GONE \citep{santiago2020recent}, and SMC++ \citep{terhorst2017robust}.  
        }
    \end{figure}

    To highlight the ease of comparisons between different species and model enabled by \stdpopsim,
    we have also performed a similar benchmark using the vaquita porpoise \textit{Phocoena sinus}.
    In Figure \ref{fig:1pop-vaquita-demography} we show the results of $N_e(t)$ inference
    where we have simulated genomes under a model of vaquita demography from \textcite{robinson2022critically}.
    For these simulations we have used the genome structure and exon annotations from the vaquita genome assembly
    and a DFE from \textcite{robinson2022critically} that acts on exonic positions, however no genetic map 
    is currently available, so recombination rates are assumed to be constant across the genome.
    The demographic model is a simple two-epoch model where population size has declined in the recent past. 
    In this case we see that all methods perform well both in the absence and presence of selection,
    with a close match between the true $N_e(t)$ and the estimated $N_e(t)$ in all cases.


\begin{figure}[t]
    \centering
    \includegraphics[width=\textwidth]{figures/PhoSin/Vaquita2Epoch_1R22/estimated_Ne_t_final}
    \caption{
    \label{fig:1pop-vaquita-demography}
    Performance of methods to infer $N_e(t)$ from simulations of the vaquita porpoise genome under a single population
    model of declining population size \citep{robinson2022critically} with and without background selection on exons. 
    The top panel shows estimates of $N_e(t)$ from simulations
    without selection, while the bottom panel shows estimates of $N_e(t)$ from simulations with a Gamma distributed   
    DFE acting on exons. In each panel we show the true $N_e(t)$ in black, and the estimated $N_e(t)$ from four methods:    
    MSMC2 \citep{Schiffels2020}, stairway plot \citep{liu2020stairway}, GONE \citep{santiago2020recent}, and SMC++ \citep{terhorst2017robust}.  
    }
\end{figure}

\section*{Estimation of the Distribution of Fitness effects}
    \label{dfe}
    Another common application of population genetic inference is to estimate the distribution of fitness effects (DFE)
    from genetic data. Here we show how the new models of selection in \stdpopsim can be used to benchmark and compare
    the performance of different methods for inferring the DFE from genetic data. Here we have performed simulations using the 
    human and vaquita porpoise models described above, with complex demographic histories and models of selection acting on exons.
    To remind the reader, the model of selection in each case is a Gamma distributed DFE for deleterious mutations.

    \begin{figure}[htbp]
        \centering
        \includegraphics[width=\linewidth]{figures/HomSap/OOA/HomSap_OutOfAfricaArchaicAdmixture_5R19_Gamma_K17_ensembl_havana_104_exons_DFE_plot}
        \caption{A comparison of methods for inferring the distribution of fitness effects (DFE) from genetic data.
        Simulations were performed using a human out-of-Africa model \citep{ragsdale2019models} with a Gamma distributed DFE
        acting on exons parameterized by a mean selection coefficient and a shape parameter. Estimates of the 
        parameters of the DFE are shown in the left ($\lvert E(s) \rvert $) and right hand panels (shape) respectively.
        The human model has three extant populations (CEU, CHB, and YRI), and parameter estimates from each
        sample are shown in different colors.
        Estimates from three different methods are shown: 1) GRAPES \cite{galtier2016adaptive}, 2) polyDFE \citep{tataru2020polydfe},
        and 3) dadi \citep{gutenkunst2009inferring}.}
        \label{fig:homsap-dfe.ooa}
    \end{figure}

    In Figures \ref{fig:homsap-dfe.constant} and \ref{fig:homsap-dfe.ooa} we show a comparison of estimates
    from three software packages for inferring the DFE from genetic data-- 
    dadi \citep{gutenkunst2009inferring}, polyDFE \citep{tataru2020polydfe}, 
    and GRAPES \citep{galtier2016adaptive}. 
    Figure \ref{fig:homsap-dfe.constant} shows estimates from a single, constant size population of humans, 
    while Figure \ref{fig:homsap-dfe.ooa} shows estimates from a model of human out-of-Africa demography.
    In a constant size population, all methods perform reasonably well, although dadi seems to
    perform slightly worse than the other methods examined, at least for the small number of replicates we simulated here.
    Notably only polyDFE overlapped the true mean selection coefficient, whereas 
    the other methods slightly underestimated the strength of selection. 
    For the shape parameter in constant size simulations GRAPES and polyDFE are roughly
    equivalent in their performance whereas dadi seems to yield overestimates. 
    
    In the out-of-Africa model (fig \ref{fig:homsap-dfe.ooa}) the performance of the methods is more mixed. 
    In particular variation in accuracy of the parameters estimated in each
    sample population depends on the methods used, and each method seems to 
    have its own biases with respect to the way in which population history 
    affects parameter estimates. For instance for dadi we see that estimates of 
    the shape parameter are most accurate in the YRI sample, while for polyDFE
    estimates are consistently least accurate in the YRI sample. 
    With respect to the strength of selection, as in constant size populations,
    all methods are performing reasonably well, although dadi seems to
    trail in terms of accuracy. 


    %add PhoSin DFE figure here
    \begin{figure}
        \centering
        \includegraphics[width=\textwidth]{figures/PhoSin/Vaquita2Epoch_1R22/PhoSin_Vaquita2Epoch_1R22_Gamma_R22_Phocoena_sinus.mPhoSin1.pri.110_exons_DFE_plot.pdf}
        \caption{
        \label{fig:vaquita-dfe}
        A comparison of methods for inferring the distribution of fitness effects (DFE) from genetic data.
        Simulations were performed using a model of vaquita porpoise demography \citep{robinson2022critically} with a Gamma distributed DFE
        acting on exons parameterized by a mean selection coefficient and a shape parameter. Estimates of the 
        parameters of the DFE are shown in the left ($\lvert E(s) \rvert $) and right hand panels (shape) respectively.
        This vaquita model has a single extant population, labelled pop 0, and parameter estimates from each
        of the three different methods are shown: 1) GRAPES \cite{galtier2016adaptive}, 2) polyDFE \citep{tataru2020polydfe},
        and 3) dadi \citep{gutenkunst2009inferring}.}
    \end{figure}
    
    In Figures \ref{fig:vaquita-dfe.constant} and \ref{fig:vaquita-dfe} we show a comparison of estimates
    from the same three software packages for inferring the DFE from genetic data, but for simulations of the
    vaquita porpoise genome. In the constant size model (fig \ref{fig:vaquita-dfe.constant}) we see that all methods
    perform uniformly worse that in the human genome simulations, which consistent underestimation of the mean selection coefficient    
    and overestimation of the shape parameter. 
    In context of our model of vaquita porpoise demography \citep{robinson2022critically}, which models a population
    decline towards the present, we see that methods to infer the DFE perform no better-- again each method 
    signficiantly underestimates the mean selection coefficient and overestimates the shape parameter.
    %ADK: add why we think this is the case-- is this a bug? or a feature of the genome?

\section*{Detection of Selective Sweeps}
    \label{sweeps}
    We next highlight how the new models of selection in \stdpopsim can be used to benchmark and compare
    the performance of different methods for detecting selective sweeps from genetic data. 
    Here we have performed simulations of human chromosome 1 under the three population out-of-Africa model from \cite{gutenkunst2009inferring}
    with purifying selection acting on exons under the Gamma-distributed DFE described above, the HapMap II genetic map \citep{international2007second},    
    but overlay on top of this a model of selective sweeps. In particular we have conditioned simulations on the introduction of a beneficial mutation
    at a given location (to be varied) in the genome at a given time, and with a moderately strong selection coefficient ($s = 0.03$; $2Ns \sim 600$).
    For each sweep location we performed 200 replicate simulations, sampling from them only if the selected allele reached a frequency of
    0.95 or greater. Rather than simulate the entire chromosome arm, replicate simulations were performed on 5cM regions of the chromosome
    at each of 100 different locations. As recombination rate varies across the genome, the size of the region in base pairs simulated varies,
    however the recombination distance, which is critical for the linked effects of a sweep, is held constant.
  

    %sweeep power figure
    \begin{figure}
        \centering
        \includegraphics[width=0.8 \textwidth]{figures/sweeps/chr1_power.pdf}
        \caption{
        Power to detect selective sweeps at 100 locations along human chromosome 1 under a three population out-of-Africa model \citep{gutenkunst2009inferring}  
        with and without purifying selection acting on exons under the Gamma-distributed DFE described above. 
        The HapMap II genetic map \citep{international2007second} was used in each simulation. 
        Single beneficial mutations were introduced at each location with a selection coefficient of $s = 0.03$,
        and only sampled if they reached a terminal frequency of 0.95 or greater. 
        Three methods for detecting sweeps were used: 1) sweepfinder2 \citep{degiorgio2016sweepfinder2} (top row), 
        2) diploshic \citep{kern2018diplos} (middle row), and 3) an empirical cutoff of $\pi$ to find regions of reduced diversity (bottom row).
        True positive rate is shown for these methods for samples from the CEU and YRI samples (left and right respectively). 
        Under each method we show as a heat map exon density along the chromosome, along with the local recombination rate in cM/Mb.
        Power is shown as the proportion of replicates in which a sweep was detected at each location. 
        Each panel shows the results of simulations with purifying selection acting on exons (red; labelled BGS)
        and without purifying selection acting on exons (blue; labelled Neutral).
        }
        \label{fig:chr1_power}
    \end{figure}
    
    In Figure \ref{fig:chr1_power} we show the power to detect selective sweeps at 100 locations along human chromosome 1 from
    the simulations described above. We show the results of three methods for detecting sweeps: 1) sweepfinder2 \citep{degiorgio2016sweepfinder2},
    2) diploshic \citep{kern2018diplos}, and 3) an empirical cutoff of $\pi$ to find regions of reduced diversity.
    A few things are immediately apparent from this figure. First, the power to detect sweeps is generally higher in the YRI sample
    than in the CEU sample, which is consistent with the fact that the YRI sample has a larger effective population size and thus
    more variation to detect a sweep. Further the population size of YRI has been relatively constant over the last 100,000 years,
    thus the out-of-Africa bottleneck has does not mute the signal of a sweep as much as in the CEU sample. This is consistent with
    what we know about the effects of demography on the power to detect sweeps \citep[e.g.,][]{simonsen1995properties}.
    Second, the power to detect sweeps is slightly lower in the presence of purifying selection acting on exons but the effect is not
    as strong as one might expect. This is likely because the strength of selection acting on exons is relatively weak, and the
    sweep detection methods we have used are relatively robust to the effects of background selection. 
    Finally and most importantly, the power to detect sweeps varies considerably among regions of the genome. 
    Indeed variation in power along the chromosome is much greater than the variation in power between the CEU and YRI samples or between
    models with or without purifying selection. This is likely due to the effects of recombination rate heterogeniety as well
    as the density of functional elements along the chromosome.

    %sweep power vs recombination rate
    \begin{figure}
        \centering
        \includegraphics[width=0.8 \textwidth]{figures/sweeps/relationship_power_cM.pdf}
        \caption{
        Power to detect selective sweeps at 100 locations along human chromosome 1 under a three population out-of-Africa model \citep{gutenkunst2009inferring}  
        with and without purifying selection as a function of local recombination rate.
        Simulations are as described in Figure \ref{fig:chr1_power}.
        Each panel shows the results of simulations with purifying selection acting on exons (red; labelled BGS)
        and without purifying selection acting on exons (blue; labelled Neutral).
        True positive rate is shown for these methods for samples from the CEU and YRI samples (left and right respectively).
        Points represent the power at each location and trend lines along with 95\% confidence intervals are shown fit to those points.
        }
        \label{fig:power-recomb}
    \end{figure}

    To further explore the effects of recombination rate and exon density on the power to detect sweeps, we show in Figure \ref{fig:power-recomb}
    the relationship between power and local recombination rate for our sweep simulations. We see that power to detect sweeps is generally
    a decreasing function of local recombination rate, which is consistent with sweeps having a larger genomic footprint in regions of low
    recombination. We also see that the presence of purifying selection acting on exons has a relatively small effect on the power to detect sweeps,    
    but one that is present across the entire range of recombinations rates observed in human chromosome 1. 
    We also note that the different methods for detecting sweeps that we have explored here respond differently to the effects of recombination rate.
    Unsurprisingly, the empirical cutoff of $\pi$ to detect sweeps is the less robust to the effects of recombination rate than the other two methods,
    that are specifically designed to find selective sweeps. Whereas sweepfinder2 has a clear decline in power with increasing recombination rate,
    diploshics behavior is more variable, with a slight increase in power at intermediate recombination rates in the CEU sample and a decline
    in power at high recombination rates in the YRI sample.

    In Figure \ref{fig:power-exon} we show the relationship between power and local exon density for our sweep simulations.
    Here there is little relationship between power and exon density, which is somewhat surprising given that the power to detect sweeps
    is generally lower in the presence of purifying selection acting on exons. However while this is the case we see that 
    generally all methods have more power to detect sweeps in regions of lower exon density, which is consistent with the idea that
    background selection acting on nearby exons can reduce the signature of a sweep. However this is far from a simple pattern:
    for instance sweepfinder2 seems to have minimum power in regions of intermediate exon density, while diploshic is roughly constant
    or perhaps has a slight increase in power at intermediate exon densities. Noise is the estimates of power with respect to exon density
    are prohibitively high for the empirical cutoff of $\pi$ to detect sweeps, so we it's not clear what conclusions can be drawn.

\lipsum[20-25]

% \section*{outline}
%     \label{outline}
%     \begin{enumerate}
%         \item Single-population demographic inference methods
%         \begin{enumerate}
%             \item mscm2
%             \item stairwaiplot
%             \item GONE
%             \item SMC++
%         \end{enumerate}
%         \item DFE inference methods
%         \begin{enumerate}
%             \item dadi
%             \item polyDFE
%             \item DFE-alpha
%             \item GRAPES
%         \end{enumerate} 
%         \item sweeps
%         \begin{enumerate}
%             \item compare effect of BGS on sweep detection
%             \item compare power in different pops
%             \item compare methods
%         \end{enumerate}
%     \end{enumerate}

    
\section*{Conclusion}
    \label{conclusion}

\section*{Data availability}\label{data_availability}


\section*{Acknowledgments}\label{acknowledgements}

\section*{Funding}
    \label{funding}

\printbibliography

%%%%%%%% supplementary material
\clearpage
\beginsupplement

\section*{Supplementary Material}

% constant size DFE figure
\begin{figure}[h]
    \centering
    \includegraphics[width=\textwidth]{figures/HomSap/Constant/HomSap_Constant_Gamma_K17_ensembl_havana_104_exons_DFE_plot}
    \caption{
    Performance of methods to infer the distribution of fitness effects (DFE) from genetic data.
    Simulations were performed using a human constant size model with a Gamma distributed DFE
    acting on exons parameterized by a mean selection coefficient and a shape parameter. On each row we show the true parameter 
    as a red line, and the estimates as boxplots from replicate simulations. The human model has three extant populations (CEU, CHB, and YRI).  
    Each row of the figure shows estimates from a different method: 1) dadi \citep{gutenkunst2009inferring}, 2) polyDFE \citep{tataru2020polydfe},  
    3) DFE-alpha \citep{eyre2009estimating}, and 4) GRAPES \citep{galtier2016adaptive}.  
    }
    \label{fig:homsap-dfe.constant}    
\end{figure}

\begin{figure}
    \centering
    \includegraphics[width=\textwidth]{figures/PhoSin/Constant/PhoSin_Constant_Gamma_R22_Phocoena_sinus.mPhoSin1.pri.110_exons_DFE_plot.pdf}
    \caption{
    \label{fig:vaquita-dfe.constant}
    A comparison of methods for inferring the distribution of fitness effects (DFE) from genetic data.
    Simulations were performed using a model of vaquita porpoise genome under constant population size with a Gamma distributed DFE
    acting on exons parameterized by a mean selection coefficient and a shape parameter. Estimates of the 
    parameters of the DFE are shown in the left ($\lvert E(s) \rvert $) and right hand panels (shape) respectively.
    This vaquita model has a single extant population, labelled pop 0, and parameter estimates from each
    of the three different methods are shown: 1) GRAPES \cite{galtier2016adaptive}, 2) polyDFE \citep{tataru2020polydfe},
    and 3) dadi \citep{gutenkunst2009inferring}.}
\end{figure}


% constant all site size DFE figure
\begin{figure}[h]
    \centering
    \includegraphics[width=\textwidth]{figures/HomSap/Constant/HomSap_Constant_Gamma_K17_all_sites_DFE_plot}
    \caption{
    \label{fig:homsap-dfe.constant.all_sites}
    Performance of methods to infer the distribution of fitness effects (DFE) from genetic data
    where all sites along the simulated chromosome can experience selected and neutral mutation
    as opposed to exonic regions only.
    }
\end{figure}


\begin{table}[ht]
\centering
\small
\caption{\bf{Performance of DFE methods across the simulation scenarios}. 
Bold rows show the lowest mean absolute error (MAE) for the two DFE parameters
for each species.}
\begin{tabular}{lllrrrrrr}
\toprule
species ID & demography & annotation & \makecell{MAE \\ $E|s|$ \\ dadi} & \makecell{MAE \\ $E|s|$ \\ grapes} & \makecell{MAE \\ $E|s|$ \\ polyDFE} & \makecell{MAE \\ shape \\ dadi} & \makecell{MAE \\ shape \\ grapes} & \makecell{MAE \\ shape \\ polyDFE} \\
\midrule
HomSap & Constant & all sites & 0.0027 & 0.0049 & \bf{0.0018} & \bf{0.00071} & 0.029 & 0.014 \\
HomSap & Constant & exons & 0.0012 & \bf{0.00081} & 0.0023 & 0.030 & 0.0086 & \bf{0.0068} \\
HomSap & OOA 5R19 & all sites & 0.012 & 0.0079 & \bf{0.0074} & \bf{0.027} & 0.055 & 0.035 \\
HomSap & OOA 5R19 & exons & 0.014 & \bf{0.0049} & 0.0072 & 0.031 & 0.051 & \bf{0.024} \\
PhoSin & Constant & all sites & 0.024 & \bf{0.024} & 0.024 & 0.21 & 0.24 & \bf{0.19} \\
PhoSin & Constant & exons & 0.024 & \bf{0.024} & 0.024 & \bf{0.23} & 0.25 & 0.23 \\
PhoSin & Vaquita2Epoch 1R22 & all sites & 0.024 & \bf{0.023} & 0.024 & 0.20 & 0.23 & \bf{0.18} \\
PhoSin & Vaquita2Epoch 1R22 & exons & 0.024 & \bf{0.023} & 0.024 & \bf{0.18} & 0.23 & 0.21 \\
\bottomrule
\end{tabular}
\label{tab:dfe_table}
\end{table}


%sweep power vs exon density
\begin{figure}
    \centering
    \includegraphics[width=0.8 \textwidth]{figures/sweeps/relationship_power_exon.pdf}
    \caption{
    Power to detect selective sweeps at 100 locations along human chromosome 1 under a three population out-of-Africa model \citep{gutenkunst2009inferring}  
    with and without purifying selection as a function of local exon density.
    Simulations are as described in Figure \ref{fig:chr1_power}.
    Each panel shows the results of simulations with purifying selection acting on exons (red; labelled BGS)
    and without purifying selection acting on exons (blue; labelled Neutral).
    True positive rate is shown for these methods for samples from the CEU and YRI samples (left and right respectively).
    Points represent the power at each location and trend lines along with 95\% confidence intervals are shown fit to those points.
    }
    \label{fig:power-exon}
\end{figure}


\stopsupplement
\end{document}