# Probability Distributions
# May 8, 2012
# Stat560

## Let's plot pdf/cdf functions of a Gaussian rv
x <- seq(-1,1,by=0.001)
pdf_n <- dnorm(x,0,1)  ## we need a mean and sd
cdf_n <- pnorm(x,0,1)

plot(x,pdf_n)

# Let's draw as a line

plot(x,pdf_n,"l")
plot(x,cdf_n,"l")

# How about we view multiple plots together?
#  Use the command par()
par( mfrow = c( 1, 2 ) )
plot(x,pdf_n,"l")
plot(x,cdf_n,"l")


## Poisson distribution
x <- seq(0,20)
pmf_p <- dpois(x,4)  ## we need a lambda value
cmf_p <- ppois(x,4)

par( mfrow = c( 1, 2 ) )
plot(x,pmf_p,"l") 
grid()
plot(x,cmf_p,"l") 
grid()

## Binomial distribution
x <- seq(0,20)
pmf_b <- dbinom(x,20,0.2)  ## we need # trials n and change of success p
cmf_b <- pbinom(x,20,0.2)

par( mfrow = c( 1, 2 ) )
plot(x,pmf_b,"l") 
grid()
plot(x,cmf_b,"l") 
grid()

# How is the pmf different between Poisson and Binomial?

plot(x,pmf_p,type="l",col="red",xlim=c(0,20),ylim=c(0,0.3))
lines(x,pmf_b,col="blue")
grid()

# Poission distribution is supposed to approximate Binomial when n is very large and p is very small
# Let's try smaller n maintaining the same lambda ( =4 )
# n = 10, p = 0.4
pmf_b2 <- dbinom(x, 10, 0.4)
lines(x,pmf_b2, col="green")


## Hypergeometric distribution
x <- seq(0,20)
pmf_h <- dhyper(x,10,30,20)  ## 10 blue balls, 30 white balls in the urn.  You take out 20 balls
cmf_h <- phyper(x,10,30,20)

par( mfrow = c( 1, 2 ) )
plot(x,pmf_b,"l") 
grid()
plot(x,cmf_b,"l") 
grid()

## how about binomial distribution?
# Let's say n = 20, p = 0.25
x <- seq(0,20)
pmf_b2 <- dbinom(x,20,0.25)

plot(x, pmf_h, type="l", col='red')
lines(x, pmf_b2, col='blue');
grid()