7. Raymond Pearl's Famous Gaff
# Recreating Raymond Pearl's famous mistake
# First we will take aggregate population counts for the United States
# 1790-1930, taken from the decennial census
usa1930 <- c(3.929214, 5.308483, 7.239881, 9.638453, 12.866020,
17.866020, 23.191876, 31.443321, 38.558371, 50.189209, 62.979766,
76.212168, 92.228496, 106.021537, 123.202624)
# in millions
# We can use R's expression() function to create a formula for the
# solution to the logistic growth equation:
logistic.int <- expression(n0 * exp(p[1] * t)/(1 + n0 * (exp(p[1] * t) - 1)/p[2]))
# given an initial population size n0, a 2-element vector p=(r,K), and
# a time since the starting time, t, logistic.int will calculate the
# solution to the logistic equation. It will work on vector values of
# t, of course.
# Now we will try to find the best-fitting parameters (r,K) for the
# USA population series, 1790-1930. To do this we will use R's
# optimization routine, optim(). We need to pass optim() a function
# that we want it to optimize. We'll use a least-squares minimization
# criterion provided by the following short function:
fit.logistic <- function(p,y){
# p is a 2-element vector holding the parameters (r,K)
# y holds our actual data points
n0 <- y[1]
# need to pass t to our function expression "logistic.int"
# the data span the years 1790-1930 in 10-year intervals
# this is 140 years so:t <- seq(0,140,10)
# calculate the sum of squared deviationssumsq <- sum((y - eval(logistic.int))^2)
}
# To use optim(), we need to provide a starting guess to the
# parameters. I will use the difference in the log population sizes
# in 1930 and 1790, divided by the number of intervening years (140)
# as an estimate of r. For K, I will use the population size in 1930.
# Try different things. See how sensitive the routine is.
r.guess <- (log(usa1930[15])-log(usa1930[1]))/140
k.guess <- usa1930[15]
par <- c(r.guess,k.guess)
# Now find the best-fitting parameters!
usa1930.fit <- optim(par,fit.logistic,y=usa1930)
# Here's what I get:
# $par
# [1] 0.03126604 198.55566623
#
# $value
# [1] 4.830206
#
# $counts
# function gradient
# 115 NA
#
# $convergence
# [1] 0
#
# $message
# NULL
# This means that the best-fitting r=0.031 and K=198.6. Don't worry
# about the other stuff (as long as convergence=0!). Check out ?optim
# if you're curious.
# Let's see how the fit looks visually. First plot the data
year <- seq(1790,1930,by=10)
plot(year,usa1930, col="blue", xlab="Year", ylab="Population Size")
# Now draw the fitted curve. Need a couple things first: (1) convert
# calendar years to 0,10,20,...,140.
t <- year - 1790
# (2) use our best-fit estimates for the vector p which we pass to
# logistic.int
p <- usa1930.fit$par
# define n0
n0 <- usa1930[1]
# Now plot the best-fit line on the same plot using lines() (click on the image for a larger version):
lines(year,eval(logistic.int),col="magenta")
![]()
# Perfect! Population growth in the USA *must* be governed by a
# logistic growth law, right?
# Let's plot the USA population series for a nice round 200 years,
# 1790-1990:
usa <- c(usa1930, 132.164569, 151.325798, 179.323175, 203.302031,
226.542199, 248.709873)
# new years:
long.year <- seq(1790,1990,by=10)
plot(long.year,usa, col="blue", xlab="Year", ylab="Population Size")
# need to re-do t
t <- long.year-1790
# fit the same line as before, just now for the full 200 years
lines(long.year,eval(logistic.int),col="magenta")
# OK, so maybe not...
Posted by jhj1 at August 26, 2005 08:42 AM
Comments
Post a comment
Thanks for signing in, . Now you can comment. (sign out)
(If you haven't left a comment here before, you may need to be approved by the site owner before your comment will appear. Until then, it won't appear on the entry. Thanks for waiting.)