Copyright (C) 2015 Graham Jones www.indriid.com This software is called BiSSE-Tests. It is part of the supplementary material for the paper "A RESPONSE TO MAYROSE ET AL. (2014) AND THE FATE OF POLYPLOID LINEAGES" by Soltis et al. ############################################################################## ############################################################################## ############################################################################## BiSSE-Tests is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. BiSSE-Tests is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with BiSSE-Tests; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ############################################################################## ############################################################################## ############################################################################## This file contains several R scripts joined together. They should be extracted into separate files before running. bisse.r is the script used to simulate trees, run the MCMC and produce Figure 1. bissebig-seed-fnum.r is similar, but more general. lookat-bisse.r, lookat-bissebig.r, and lookat-bisse-manymayrose.r analyze and plot results. simple.analogy.r shows a simpler situation where a similar statistical issure arises. ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################# bisse.r ######################################## # This R code bisse.r makes the histogram bisse-hist.pdf. It takes about 40mins. # The code simulates trees for the situation where the root has state 0 (diploid), # state 0 can become state 1 (autopolyploid), but not vice-versa. The parameters are # chosen to make trees of similar sizes as the real data, and to make # about 1/3 of the tips state 1. There is no difference between speciation # or extinction rates between state 0 and state 1. The histogram is like # Fig 1A in the Mayrose paper, and shows posterior probabilities of # state 0 having higher diversification rate. The bias towards 1 is # pure artefact. library(diversitree) seed <- 148641 outdir <- paste0("C:/Users/Work/AAA/Programming/ProgramOutput/HybNetSimR/seed", seed) dir.create(outdir) pars <- c(0.2, 0.2, 0.1, 0.1, 0.02, 0) # lambda_dip, lambda_tet, mu_dip, mu_tet, q_dt # make trees targetnumtrees <- 63 thetrees <- NULL attempt <- 1 while (length(thetrees) < targetnumtrees) { phy <- tree.bisse(pars, max.t=26, x0=0) ok <- TRUE if (is.null(phy)) { cat("Tree attempt", attempt, "died\n") ok <- FALSE } if (ok) { states <- phy$tip.state n0 <- length(which(states==0)) n1 <- length(which(states==1)) if (n0==0 || n1==0) { cat("Tree attempt", attempt, "only has one state at tips\n") ok <- FALSE } } if (ok) { if (n0+n1<10) { cat("Tree attempt", attempt, "too small with", n0+n1, "tips\n") ok <- FALSE } } if (ok) { cat("Tree attempt ", attempt, "has", n0, "+", n1, "=", n0+n1, "tips.\n") thetrees <- c(thetrees, list(phy)) } attempt <- attempt + 1 } # summary info about collection of trees numtrees <- length(thetrees) totaln0 <- 0 totaln1 <- 0 for (i in 1:numtrees) { tr <- thetrees[[i]] states <- tr$tip.state n0 <- length(which(states==0)) n1 <- length(which(states==1)) totaln0 <- totaln0 + n0 totaln1 <- totaln1 + n1 htr <- history.from.sim.discrete(tr, 0:1) num01s <- 0 for (j in 1:nrow(tr$edge)) { if (nrow(htr$history[[j]]) > 1) { num01s <- num01s + 1 } } cat("Tree", i, "has", n0, "+", n1, "=", n0+n1, "tips. ") cat("There are", num01s, "state transitions\n") } totaln0 totaln1 # run MCMC to estimate params prior <- make.prior.exponential(1 / (2 * .1)) nmcmcsteps <- 500 burnin <- 300 probs <- rep(-1, numtrees) for (i in 1:numtrees) { tr <- thetrees[[i]] states <- tr$tip.state lik <- make.bisse(tr, states) lik.l <- constrain(lik, q10 ~ 0) cat("MCMC on tree", i, "\n") allsamples <- mcmc(lik.l, pars[1:5], nsteps=nmcmcsteps, prior=prior, lower=0, w=c(.25,.25,.25,.25,.25), print.every=10) samples <- allsamples[ burnin : nrow(allsamples), ] write.table(samples, file=paste0(outdir, "/bisse-mcmc", "_", i, ".txt"), quote=FALSE, row.names=FALSE) divs0 <- samples[,"lambda0"] - samples[,"mu0"] divs1 <- samples[,"lambda1"] - samples[,"mu1"] probs[i] <- length(which(divs0 > divs1)) / length(divs0) cat("prob", probs[i], "\n") } # write out posterior probabilities that diploids diversify faster as text and PDF write.table(probs, file=paste0(outdir, "/bisse-probs.txt"), quote=FALSE, row.names=FALSE) pdf(paste0(outdir, "/bisse-hist.pdf"), height=5, width=5) par(mar=c(5,5,1,1)+.1) hist(probs, breaks=seq(0.0,1.0,length.out=21), col="gray", ylab="Number of trees", xlab="Prob(diploids diversify faster)", main="") dev.off() ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ######################## bissebig-seed-fnum.r ############################### # This is similar to bisse.r, but more general. # It writes results to file to be analyzed later. # It can be run from the command line with two parameters, # a random seed, and a number to be added to an index # which is then used to name output files. With care, # it allows several runs to be done simultaneously. library(diversitree) Args <- commandArgs(TRUE) print(Args) seed <- as.integer(Args[1]) addtofilename <- as.integer(Args[2]) set.seed(seed) outdir <- paste0("C:/Users/Work/AAA/Programming/ProgramOutput/HybNetSimR/multirun") if (!file.exists(outdir)) { dir.create(outdir) } pars <- c(0.2, 0.2, 0.1, 0.1, 0.02, 0) # lambda_dip, lambda_tet, mu_dip, mu_tet, q_dt # make trees targetnumtrees <- 50 thetrees <- NULL attempt <- 1 while (length(thetrees) < targetnumtrees) { #maxt <- sample(21:60, size=1) phy <- tree.bisse(pars, max.t=26, x0=0) ok <- TRUE if (is.null(phy)) { cat("Tree attempt", attempt, "died\n") ok <- FALSE } if (ok) { states <- phy$tip.state n0 <- length(which(states==0)) n1 <- length(which(states==1)) if (n0==0 || n1==0) { cat("Tree attempt", attempt, "only has one state at tips\n") ok <- FALSE } } if (ok) { if (n0 + n1 > 1000) { cat("Tree attempt", attempt, "too big with", n0+n1, "tips\n") ok <- FALSE } if (n0 + n1 < 10) { cat("Tree attempt", attempt, "too small with", n0+n1, "tips\n") ok <- FALSE } } if (ok) { cat("Tree attempt ", attempt, "has", n0, "+", n1, "=", n0+n1, "tips.\n") thetrees <- c(thetrees, list(phy)) } attempt <- attempt + 1 } # summary info about collection of trees numtrees <- length(thetrees) totaln0 <- 0 totaln1 <- 0 ntips0 <- rep(-1, numtrees) ntips1 <- rep(-1, numtrees) for (i in 1:numtrees) { tr <- thetrees[[i]] states <- tr$tip.state n0 <- length(which(states==0)) n1 <- length(which(states==1)) ntips0[i] <- n0 ntips1[i] <- n1 totaln0 <- totaln0 + n0 totaln1 <- totaln1 + n1 htr <- history.from.sim.discrete(tr, 0:1) num01s <- 0 for (j in 1:nrow(tr$edge)) { if (nrow(htr$history[[j]]) > 1) { num01s <- num01s + 1 } } cat("Tree", i, "has", n0, "+", n1, "=", n0+n1, "tips. ") cat("There are", num01s, "state transitions\n") } cat("Total state 0 tips:", totaln0, "\n") cat("Total state 1 tips:", totaln1, "\n") write.table(cbind(ntips0,ntips1), file=paste0(outdir, "/tipcounts", seed, ".txt"), quote=FALSE, row.names=FALSE) plot(sqrt(sort(ntips0+ntips1))) # run MCMC and use find.mle to estimate params mle.pars <- matrix(0, nrow=numtrees, ncol=5) colnames(mle.pars) <- c("lambda0", "lambda1", "mu0", "mu1", "q01") prior <- make.prior.exponential(1 / (2 * .1)) nmcmcsteps <- 500 burnin <- 100 for (i in 1:numtrees) { tr <- thetrees[[i]] states <- tr$tip.state lik <- make.bisse(tr, states) lik.l <- constrain(lik, q10 ~ 0) cat("MCMC on tree", i, "\n") allsamples <- mcmc(lik.l, pars[1:5], nsteps=nmcmcsteps, prior=prior, lower=0, w=c(.25,.25,.25,.25,.25), print.every=100) samples <- allsamples[ burnin : nrow(allsamples), ] write.table(samples, file=paste0(outdir, "/bisse-mcmc", "_", addtofilename+i, ".txt"), quote=FALSE, row.names=FALSE) fit.l <- find.mle(func=lik.l, x.init=pars[1:5]) cat("MLE done, code = ", fit.l$convergence, "\n") mle.pars[i,] <- fit.l$par[1:5] } write.table(mle.pars, file=paste0(outdir, "/bisse-mles", seed, ".txt"), quote=FALSE, row.names=FALSE) ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ########################## lookat-bisse.r #################################### # For Fig 1 in paper, It reads results written by bisse.r outdir <- "C:/Users/Work/AAA/Programming/ProgramOutput/HybNetSimR/seed148641/" numtrees <- 63 probs <- rep(0, numtrees) for (i in 1:numtrees) { x <- read.table(file=paste0(outdir, "bisse-mcmc_", i, ".txt"), header=TRUE) lambda0s <- x[,"lambda0"] lambda1s <- x[,"lambda1"] mu0s <- x[,"mu0"] mu1s <- x[,"mu1"] div0s <- lambda0s - mu0s div1s <- lambda1s - mu1s probs[i] <- length(which(div0s > div1s)) / length(div0s) } # AJB. Prepare figures at the final size desired: 1 column (8.9 cm [3.5 in]), # 1.5 column (12.7-15.3 cm [5-6 in]), or 2 columns (18.4 cm [7.25 in]) # wide and less than the length of the page (23 cm [9 in]). pdf(paste0(outdir, "/bisse-hist.pdf"), height=2.5, width=3.5) par(mar=c(5,5,1,1)+.1) hist(probs, breaks=seq(0.0,1.0,length.out=21), col="gray", ylab="Number of trees", xlab="Prob(diploids diversify faster)", main="") dev.off() ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ########################## lookat-bissebig.r ################################## # Makes graphs to demonstrate behaviour on larger trees outdir <- "C:/Users/Work/AAA/Programming/ProgramOutput/HybNetSimR/multirunBIG/" numtrees <- 280 true.q01 <- 0.01 plot.param <- function(param, trueval, ylab, main) { plot(tiptotals, param, xlab="Number of tips", ylab=ylab, main=main) abline(trueval, 0) } tipcounts <- read.table(file=paste0(outdir, "tipcounts.txt"), header=TRUE) lambda0.means <- rep(0, numtrees) lambda1.means <- rep(0, numtrees) mu0.means <- rep(0, numtrees) mu1.means <- rep(0, numtrees) q01.means <- rep(0, numtrees) probs <- rep(0, numtrees) for (i in 1:numtrees) { x <- read.table(file=paste0(outdir, "bisse-mcmc_", i, ".txt"), header=TRUE) lambda0s <- x[,"lambda0"] lambda1s <- x[,"lambda1"] mu0s <- x[,"mu0"] mu1s <- x[,"mu1"] q01s <- x[,"q01"] div0s <- lambda0s - mu0s div1s <- lambda1s - mu1s lambda0.means[i] <- mean(lambda0s) lambda1.means[i] <- mean(lambda1s) mu0.means[i] <- mean(mu0s) mu1.means[i] <- mean(mu1s) q01.means[i] <- mean(q01s) probs[i] <- length(which(div0s > div1s)) / length(div0s) } tiptotals <- tipcounts[,"ntips0"] + tipcounts[,"ntips1"] difr.means <- lambda0.means - mu0.means - lambda1.means + mu1.means pdf(file=paste0(outdir, "bigpostmeans.pdf"), width=7, height=10) op <- par(mfrow=c(3,2)) plot.param(lambda0.means, 0.2, expression(paste(lambda[D], " means")), expression(paste("Posterior means of ", lambda[D]))) plot.param(lambda1.means, 0.2, expression(paste(lambda[P], " means")), expression(paste("Posterior means of ", lambda[P]))) plot.param(mu0.means, 0.1, expression(paste(mu[D], " means")), expression(paste("Posterior means of ", mu[D]))) plot.param(mu1.means, 0.1, expression(paste(mu[P], " means")), expression(paste("Posterior means of ", mu[P]))) plot.param(q01.means,true.q01, expression(paste(q[DP], " means")), expression(paste("Posterior means of ", q[DP]))) plot.param(difr.means, 0.0, expression(paste("(", lambda[D]-mu[D]-lambda[P]+mu[P], ") means")), "Posterior means of differences\n between divergence rates") par(op) dev.off() ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ########################## lookat-bisse-manymayrose.r ######################## # Makes several graphs and some Latex using a larger number of "Mayrose-size" trees outdir <- "C:/Users/Work/AAA/Programming/ProgramOutput/HybNetSimR/multirunMayrose/" numtrees <- 200 true.q01 <- 0.02 plot.param <- function(param, trueval, ylab, main) { plot(tiptotals, param, xlab="number of tips", ylab=ylab, main=main) abline(trueval, 0) abline(mean(param), 0, lty="34") abline(median(param), 0, lty="11") } make.latex.table.row <- function(words, x) { x <- round(x, digits=4) minx <- round(min(x), digits=4) meanx <- round(mean(x), digits=4) sdx <- round(sd(x), digits=4) medianx <- round(median(x), digits=4) maxx <- round(max(x), digits=4) cat(words, " & ", minx, " & ", meanx, " & ", sdx, " & ", medianx, " & ", maxx, "\\\\\n") } tipcounts <- read.table(file=paste0(outdir, "tipcounts.txt"), header=TRUE) mles <- read.table(file=paste0(outdir, "bisse-mles.txt"), header=TRUE) lambda0.means <- rep(0, numtrees) lambda1.means <- rep(0, numtrees) mu0.means <- rep(0, numtrees) mu1.means <- rep(0, numtrees) q01.means <- rep(0, numtrees) p.means <- rep(0, numtrees) probs <- rep(0, numtrees) div0.means <- rep(0, numtrees) div1.means <- rep(0, numtrees) reldeath0.means <- rep(0, numtrees) reldeath1.means <- rep(0, numtrees) all.lambda0s <- NULL all.lambda1s <- NULL all.mu0s <- NULL all.mu1s <- NULL all.q01s <- NULL all.ps <- NULL for (i in 1:numtrees) { x <- read.table(file=paste0(outdir, "bisse-mcmc_", i, ".txt"), header=TRUE) lambda0s <- x[,"lambda0"] lambda1s <- x[,"lambda1"] mu0s <- x[,"mu0"] mu1s <- x[,"mu1"] q01s <- x[,"q01"] postps <- x[,"p"] div0s <- lambda0s - mu0s div1s <- lambda1s - mu1s reldeath0s <- mu0s / lambda0s reldeath1s <- mu1s / lambda1s all.lambda0s <- c(all.lambda0s, lambda0s) all.lambda1s <- c(all.lambda1s, lambda1s) all.mu0s <- c(all.mu0s, mu0s) all.mu1s <- c(all.mu1s, mu1s) all.q01s <- c(all.q01s, q01s) all.ps <- c(all.ps, postps) lambda0.means[i] <- mean(lambda0s) lambda1.means[i] <- mean(lambda1s) mu0.means[i] <- mean(mu0s) mu1.means[i] <- mean(mu1s) q01.means[i] <- mean(q01s) p.means[i] <- mean(postps) div0.means[i] <- mean(div0s) div1.means[i] <- mean(div1s) reldeath0.means[i] <- mean(reldeath0s) reldeath1.means[i] <- mean(reldeath1s) probs[i] <- length(which(div0s > div1s)) / length(div0s) } tiptotals <- tipcounts[,"ntips0"] + tipcounts[,"ntips1"] difr.means <- lambda0.means - mu0.means - lambda1.means + mu1.means difr.mles <- mles[,"lambda0"] - mles[,"mu0"] - mles[,"lambda1"] + mles[,"mu1"] pdf(file=paste0(outdir, "postmeans.pdf"), width=7, height=10) op <- par(mfrow=c(3,2)) plot.param(lambda0.means, 0.2, expression(paste(lambda[D], " means")), expression(paste("Posterior means of ", lambda[D]))) plot.param(lambda1.means, 0.2, expression(paste(lambda[P], " means")), expression(paste("Posterior means of ", lambda[P]))) plot.param(mu0.means, 0.1, expression(paste(mu[D], " means")), expression(paste("Posterior means of ", mu[D]))) plot.param(mu1.means, 0.1, expression(paste(mu[P], " means")), expression(paste("Posterior means of ", mu[P]))) plot.param(q01.means,true.q01, expression(paste(q[DP], " means")), expression(paste("Posterior means of ", q[DP]))) plot.param(difr.means, 0.0, expression(paste("(", lambda[D]-mu[D]-lambda[P]+mu[P], ") means")), "Posterior means of differences\n between divergence rates") par(op) dev.off() pdf(file=paste0(outdir, "MLEs.pdf"), width=7, height=10) op <- par(mfrow=c(3,2), mar=c(5,5,4,1)) plot.param(mles[,"lambda0"], 0.2, expression(hat(lambda[D])), expression(paste("MLEs of ", lambda[D]))) plot.param(mles[,"lambda1"], 0.2, expression(hat(lambda[P])), expression(paste("MLEs of ", lambda[P]))) plot.param(mles[,"mu0"], 0.1, expression(hat(mu[D])), expression(paste("MLEs of ", mu[D]))) plot.param(mles[,"mu1"], 0.1, expression(hat(mu[P])), expression(paste("MLEs of ", mu[P]))) plot.param(mles[,"q01"], true.q01, expression(hat(q[DP])), expression(paste("MLEs of ", q[DP]))) plot.param(difr.mles, 0.0, expression(paste(hat(lambda[D]), "-", hat(mu[D]), "-", hat(lambda[P]), "+", hat(mu[P]))), "MLEs of differences\n between divergence rates") par(op) dev.off() sink(paste0(outdir, "summaries.txt")) make.latex.table.row("posterior mean of $\\lambda_D$", lambda0.means) make.latex.table.row("MLE of $\\lambda_D$", mles[,"lambda0"]) make.latex.table.row("posterior mean of $\\lambda_P$", lambda1.means) make.latex.table.row("MLE of $\\lambda_P$", mles[,"lambda1"]) make.latex.table.row("posterior mean of $\\mu_D$", mu0.means) make.latex.table.row("MLE of $\\mu_D$", mles[,"mu0"]) make.latex.table.row("posterior mean of $\\mu_P$", mu1.means) make.latex.table.row("MLE of $\\mu_P$", mles[,"mu1"]) make.latex.table.row("posterior mean of $q_{DP}$", q01.means) make.latex.table.row("MLE of $q_{DP}$", mles[,"q01"]) make.latex.table.row("posterior mean of $r_D-r_P$", difr.means) make.latex.table.row("MLE of $r_D-r_P$", difr.mles) sink(NULL) ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ############################################################################## ######################### simple-analogy.r #################################### # To demonstrate a simpler but similar example of bias X.ssize <- 10 Y.ssize <- 20 u <- .2 lhood <- function(x, n, smean) { (1-x)^n * exp(-n*(1-x)*smean) } lhood.x <- function(x, n, smean) { x * lhood(x, n, smean) } posterior.mean <- function(n, smean) { top <- integrate(lhood.x, lower=0, upper=1, n=n, smean=smean) bot <- integrate(lhood, lower=0, upper=1, n=n, smean=smean) top$value / bot$value } sample.pms <- function() { X.bar <- mean(rexp(X.ssize, rate=1-u)) Y.bar <- mean(rexp(Y.ssize, rate=1-u)) list(mX=posterior.mean(X.ssize, X.bar), nY=posterior.mean(Y.ssize, Y.bar)) } M <- 100 # number of trials in which a p-value is found N <- 63 # number of times (X,Y) is sampled per trial pvals <- rep(0,M) for (m in 1:M) { pms <- matrix(0, nrow=N,ncol=2) for (i in 1:N) { spm <- sample.pms() pms[i,] <- c(spm$mX, spm$nY) } diff <- pms[,1]-pms[,2] ttest <- t.test(diff, mu=0, alternative="greater") pvals[m] <- ttest$p.value } plot(pvals, log="y") abline(h=.05) abline(h=.01) abline(h=.001) length(which(pvals<0.05))