# The front cover of the book "{a history of} pi" by shows # the number pi with 322 digits. Here they are digits.pi = c( 1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5,0,2,8,8,4,1,9,7,1,6,9,3,9,9,3,7,5,1,0,5,8,2,0,9,7,4,9,4,4,5,9,2,3,0,7,8,1,6,4,0,6,2,8,6,2,0,8,9,9,8,6,2,8,0,3,4,8,2,5,3,4,2,1,1,7,0,6,7,9,8,2,1,4,8,0,8,6,5,1,3,2,8,2,3,0,6,6,4,7,0,9,3,8,4,4,6,0,9,5,5,0,5,8,2,2,3,1,7,2,5,3,5,9,4,0,8,1,2,8,4,8,1,1,1,7,4,5,0,2,8,4,1,0,2,7,0,1,9,3,8,5,2,1,1,0,5,5,5,9,6,4,4,6,2,2,9,4,8,9,5,4,9,3,0,3,8,1,9,6,4,4,2,8,8,1,0,9,7,5,6,6,5,9,3,3,4,4,6,1,2,8,4,7,5,6,4,8,2,3,3,7,8,6,7,8,3,1,6,5,2,7,1,2,0,1,9,0,9,1,4,5,6,4,8,5,6,6,9,2,3,4,6,0,3,4,8,6,1,0,4,5,4,3,2,6,6,4,8,2,1,3,3,9,3,6,0,7,2,6,0,2,4,9,1,4,1,2,7,3,7,2,4,5,8,7,0,0,6,6,0,6,3,1,5,5,8,8,1,7,4,8) n = length(digits.pi) prop = matrix(0,n,10) for (i in 0:9) prop[,i+1] = cumsum(digits.pi==i)/1:n lessthan5 = apply(prop[,1:5],1,sum) par(mfrow=c(1,1)) ts.plot(prop,col=c(1:5,1:5),lty=c(rep(1,5),rep(2,5)),lwd=2,ylim=c(0,0.3), xlab="Digit of pi",ylab="Relative frequency") legend("topright",legend=0:9,bty="n",col=c(1:5,1:5),lty=c(rep(1,5),rep(2,5)),lwd=2) abline(h=0.1,col=6,lwd=3) ts.plot(lessthan5,lwd=2,ylim=c(0.25,0.75), xlab="Digit of pi",ylab="Relative frequency") title("Digit of pi is less than 5") abline(h=0.5,col=6,lwd=2) digits.pi.tenK = scan("https://hedibert.org/wp-content/uploads/2026/05/digits-of-pi.txt") n1 = length(digits.pi.tenK) prop.tenK = matrix(0,n1,10) for (i in 0:9) prop.tenK[,i+1] = cumsum(digits.pi.tenK==i)/1:n1 lessthan5.tenK = apply(prop.tenK[,1:5],1,sum) ts.plot(prop.tenK,col=c(1:5,1:5),lty=c(rep(1,5),rep(2,5)),ylim=c(0.05,0.15), xlab="Digit of pi",ylab="Relative frequency") legend("topright",legend=0:9,bty="n",col=c(1:5,1:5),lty=c(rep(1,5),rep(2,5)),lwd=2) abline(h=0.1,col=6,lwd=3) ts.plot(lessthan5.tenK,lwd=2,ylim=c(0.475,0.525), xlab="Digit of pi",ylab="Relative frequency") title("Digit of pi is less than 5") abline(h=0.5,col=6,lwd=2) # If less than 5 you win $1, but if greater than or equal to 5 you loose $1 par(mfrow=c(1,3)) ind = 1:(365*10) plot(ind,cumsum(ifelse(lessthan5.tenK<0.5,1,-1))[ind],ylim=c(-1000,7000),type="l",xlab="Day",ylab="Cumulative reward") ind = 1:(365*20) abline(h=0,lty=3) plot(ind,cumsum(ifelse(lessthan5.tenK<0.5,1,-1))[ind],ylim=c(-1000,7000),type="l",xlab="Day",ylab="Cumulative reward") abline(v=365*10,lty=2) abline(h=0,lty=3) ind = 1:(365*30) plot(ind,cumsum(ifelse(lessthan5.tenK<0.5,1,-1))[ind],ylim=c(-1000,7000),type="l", xlab="Day",ylab="Cumulative reward") abline(v=365*10,lty=2) abline(v=365*20,lty=2) abline(h=0,lty=3) # Sequential learning in Bayesian inference # Let us focus on the event A="digit is less than 5". par(mfrow=c(1,3)) # Initially, my prior is theta=Pr(A) is U(0,1) = Beta(1,1) # Then we observe the frequencies based on the cover of the book (n=322) # The likelihood is the kernel of a count|theta ~ binomial(n,theta) # evaluated at count = 168 qne n=322 # Therefore, the posterior is Beta(168+1,322-168+1)=Beta(169,155) theta = seq(0.4,0.65,length=1000) post = dbeta(theta,169,155) plot(theta,post,type="l",xlab="Probability digit is less than 5") abline(v=0.5,lty=2) title(paste("n=322 digits\n Pr(theta>0.5)=",round(1-pbeta(0.5,169,155),3),sep="")) # Now, my prior is the above posterior. # The likelihood is the kernel of a count|theta ~ binomial(n,theta) # evaluated at count = 5001 and n=9999 # Therefore, the posterior is Beta(169+5001,155+4998)=Beta(5170,5153) theta = seq(0.48,0.52,length=1000) prior = post post = dbeta(theta,5170,5153) plot(theta,post,type="l",xlab="Probability digit is less than 5") abline(v=0.5,lty=2) title(paste("n=9999 digits\n Pr(theta>0.5)=",round(1-pbeta(0.5,5170,5153),3),sep="")) # Finally, my new prior is the above posterior. # Then we observe the frequencies based on the first 1,000,001 digits of pi (n=1000001). # I was not willing to download one million digits myself! # I asked Google AI to compute the frequencies for me. # It gave back the following counts for digits 0,1,...,9: # count = c(99959, 99758, 100026, 100230, 100230, 100359, 99548, 99800, 99985, 100106) # The likelihood is the kernel of a count|theta ~ binomial(n,theta) # evaluated at count = 500203 and n=1000001 # Therefore, the posterior is Beta(5170+500203,5153+499798)=Beta(505373,504951) theta = seq(0.4975,0.5025,length=1000) prior = post post = dbeta(theta,505373,504951) plot(theta,post,type="l",xlab="Probability digit is less than 5") abline(v=0.5,lty=2) title(paste("n=1000001 digits\n Pr(theta>0.5)=",round(1-pbeta(0.5,505373,504951),3),sep=""))