## Example of a function to compute the probability distribution of correct ## picks when there are n numbers and k selected without replacement lotto.nk <- function(n, k) { ## Create 3x(k+1) matrix to hold results ## rows are the # of matching numbers ## columns are: the matching numbers (y=0:k), probability: p(y), cdf:F(y) lotto.probs <- matrix(rep(0, 3*(k+1)), ncol=3) ## Obtain the total number of possible k number selections possible <- factorial(n) / (factorial(k) * factorial(n-k)) ## Obtain y, p(y), F(y) for y=0:k and save to matrix of results ## Although y=0:k, rows of matrix range from 1:(k+1) for (i in 0:k) { lotto.probs[i+1, 1] <- i lotto.probs[i+1, 2] <- (factorial(k) / (factorial(i) * factorial(k-i))) * (factorial(n-k) / (factorial(n-2*k+i) * factorial(k-i))) / possible if (i == 0) lotto.probs[i+1, 3] <- lotto.probs[i+1, 2] else lotto.probs[i+1, 3] <- lotto.probs[i, 3] + lotto.probs[i+1, 2] } return(lotto.probs) ## return the matrix of results } lotto.nk(53, 6) # run function with specific n and k ## Print out results in nicer manner lotto.out <- lotto.nk(53, 6) colnames(lotto.out) <- c("Winning #s", "prob", "cum prob") round(lotto.out, 10) ## Theoretical mean/covariance of order stats & Simulating lottery results ## Distribution of ordered winning numbers in a lottery with n #s, k selected lotto.nk.ord <- function(n, k) { true.mean <- matrix(rep(0, k), ncol=1) true.COV <- matrix(rep(0, k^2), ncol=k) for (i1 in 1:k) { true.mean[i1] <- i1 * (n+1) / (k+1) for (i2 in 1:k) { true.COV[i1,i2] <- (n-k)*min(i1,i2)*(n+1)*(k+1-max(i1,i2)) / ((k+1)^2*(k+2)) } } mean.COV <- list("mean"=true.mean, "COV"=true.COV) return(mean.COV) } n <- 53; k <- 6 meanCOV <- lotto.nk.ord(n, k) true.mean <- meanCOV$mean true.COV <- meanCOV$COV A <- matrix(c(rep(1/k, k), c(-1, rep(0, k-2), 1)), byrow=T, ncol=k) A %*% true.mean A %*% true.COV %*% t(A) ## Simulate 100000 lotto drawings set.seed(109238) n <- 53; k <- 6 num.sim <- 100000 ## Of drawings num.draw <- matrix(rep(0,num.sim*k), ncol=k) ## Save the numbers drawn for (i in 1:num.sim) { sample.num <- sample(n, k) ## Sample k from 1:n w/out replace num.draw[i, ] <- sort(sample.num) ## Save sorted k #s from drawing } sim.mean <- colMeans(num.draw) sim.COV <- cov(num.draw) A %*% sim.mean A %*% sim.COV %*% t(A) par(mfrow=c(2,3)) for (i in 1:6) { hist(num.draw[,i], breaks=seq(-0.5,53.5, 1), ylim=c(0,12000), main="") title(main=paste(i,"th Ordered number")) }