7. Raymond Pearl's Famous Gaff

# Recreating Raymond Pearl's famous mistake

# First we will take aggregate population counts for the United States
# 1790-1930, taken from the decennial census

usa1930 <- c(3.929214, 5.308483, 7.239881, 9.638453, 12.866020,
17.866020, 23.191876, 31.443321, 38.558371, 50.189209, 62.979766,
76.212168, 92.228496, 106.021537, 123.202624)

# in millions

# We can use R's expression() function to create a formula for the
# solution to the logistic growth equation:

logistic.int <- expression(n0 * exp(p[1] * t)/(1 + n0 * (exp(p[1] * t) - 1)/p[2]))

# given an initial population size n0, a 2-element vector p=(r,K), and
# a time since the starting time, t, logistic.int will calculate the
# solution to the logistic equation. It will work on vector values of
# t, of course.

# Now we will try to find the best-fitting parameters (r,K) for the
# USA population series, 1790-1930. To do this we will use R's
# optimization routine, optim(). We need to pass optim() a function
# that we want it to optimize. We'll use a least-squares minimization
# criterion provided by the following short function:

fit.logistic <- function(p,y){

# p is a 2-element vector holding the parameters (r,K)
# y holds our actual data points

n0 <- y[1]

# need to pass t to our function expression "logistic.int"
# the data span the years 1790-1930 in 10-year intervals
# this is 140 years so:

t <- seq(0,140,10)

# calculate the sum of squared deviations

sumsq <- sum((y - eval(logistic.int))^2)


}

# To use optim(), we need to provide a starting guess to the
# parameters. I will use the difference in the log population sizes
# in 1930 and 1790, divided by the number of intervening years (140)
# as an estimate of r. For K, I will use the population size in 1930.
# Try different things. See how sensitive the routine is.

r.guess <- (log(usa1930[15])-log(usa1930[1]))/140
k.guess <- usa1930[15]
par <- c(r.guess,k.guess)

# Now find the best-fitting parameters!

usa1930.fit <- optim(par,fit.logistic,y=usa1930)


# Here's what I get:
# $par
# [1] 0.03126604 198.55566623
#
# $value
# [1] 4.830206
#
# $counts
# function gradient
# 115 NA
#
# $convergence
# [1] 0
#
# $message
# NULL

# This means that the best-fitting r=0.031 and K=198.6. Don't worry
# about the other stuff (as long as convergence=0!). Check out ?optim
# if you're curious.

# Let's see how the fit looks visually. First plot the data

year <- seq(1790,1930,by=10)
plot(year,usa1930, col="blue", xlab="Year", ylab="Population Size")

# Now draw the fitted curve. Need a couple things first: (1) convert
# calendar years to 0,10,20,...,140.

t <- year - 1790

# (2) use our best-fit estimates for the vector p which we pass to
# logistic.int

p <- usa1930.fit$par

# define n0

n0 <- usa1930[1]

# Now plot the best-fit line on the same plot using lines() (click on the image for a larger version):

lines(year,eval(logistic.int),col="magenta")
usa-logistic1930.jpg


# Perfect! Population growth in the USA *must* be governed by a
# logistic growth law, right?

# Let's plot the USA population series for a nice round 200 years,
# 1790-1990:

usa <- c(usa1930, 132.164569, 151.325798, 179.323175, 203.302031,
226.542199, 248.709873)

# new years:
long.year <- seq(1790,1990,by=10)
plot(long.year,usa, col="blue", xlab="Year", ylab="Population Size")

# need to re-do t

t <- long.year-1790

# fit the same line as before, just now for the full 200 years

lines(long.year,eval(logistic.int),col="magenta")

usa-logistic1990.jpg

# OK, so maybe not...

Posted by jhj1 at August 26, 2005 08:42 AM

Comments

Post a comment

Thanks for signing in, . Now you can comment. (sign out)

(If you haven't left a comment here before, you may need to be approved by the site owner before your comment will appear. Until then, it won't appear on the entry. Thanks for waiting.)


Remember me?