Homework 6

Open Libraries

library(ggplot2) # for graphics

## Warning: package 'ggplot2' was built under R version 4.1.2

library(MASS) # for maximum likelihood estimation

My Own Data

z <- read.table("Cleaned2021VegetationData.csv",header=TRUE,sep=",")
str(z)

## 'data.frame':    288 obs. of  6 variables:
##  $ Date             : chr  "10/4/2021" "10/4/2021" "10/4/2021" "10/4/2021" ...
##  $ PlotID           : chr  "1A" "1A" "1A" "1A" ...
##  $ Plant            : chr  "Milkweed" "Milkweed" "Coneflower" "Coneflower" ...
##  $ VegID            : chr  "M1" "M2" "C1" "C2" ...
##  $ Height_cm        : num  16.5 24 12.5 19 64 36.5 21.5 23 21 17 ...
##  $ PercentGreenCover: num  5.91 NA NA NA NA NA 4.49 NA NA NA ...

summary(z)

##      Date              PlotID             Plant              VegID          
##  Length:288         Length:288         Length:288         Length:288        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Height_cm    PercentGreenCover
##  Min.   : 8.0   Min.   : 4.49    
##  1st Qu.:20.5   1st Qu.:10.82    
##  Median :27.0   Median :20.59    
##  Mean   :35.0   Mean   :23.35    
##  3rd Qu.:44.0   3rd Qu.:34.40    
##  Max.   :85.0   Max.   :52.13    
##                 NA's   :241

Read in Data Vector

# quick and dirty, a truncated normal distribution to work on the solution set

#z <- rnorm(n=3000,mean=0.2)
#z <- data.frame(1:3000,z)
#names(z) <- list("ID","myVar")
#z <- z[z$myVar>0,]
#str(z)
#summary(z$myVar)

Plot Histogram of Data

p1 <- ggplot(data=z, aes(x=Height_cm, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Add Emperical Density Curve

p1 <-  p1 +  geom_density(linetype="dotted",size=0.75)
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Get Maximum Likelihood Parameters for Normal

normPars <- fitdistr(z$Height_cm,"normal")
print(normPars)

##       mean          sd    
##   34.9982639   19.3204653 
##  ( 1.1384693) ( 0.8050194)

str(normPars)

## List of 5
##  $ estimate: Named num [1:2] 35 19.3
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 1.138 0.805
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 1.296 0 0 0.648
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 288
##  $ loglik  : num -1261
##  - attr(*, "class")= chr "fitdistr"

normPars$estimate["mean"] # note structure of getting a named attribute

##     mean 
## 34.99826

Plot `Normal` Probability Density

meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$Height_cm),len=length(z$Height_cm))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$Height_cm), args = list(mean = meanML, sd = sdML))
p1 + stat

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plot `Exponential` Probability Density

expoPars <- fitdistr(z$Height_cm,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$Height_cm), args = list(rate=rateML))
p1 + stat + stat2

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plot `Uniform` Probability Density

stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$Height_cm), args = list(min=min(z$myVar), max=max(z$myVar)))

## Warning in min(z$myVar): no non-missing arguments to min; returning Inf

## Warning in max(z$myVar): no non-missing arguments to max; returning -Inf

p1 + stat + stat2 + stat3

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning in (function (x, min = 0, max = 1, log = FALSE) : NaNs produced

## Warning: Removed 288 row(s) containing missing values (geom_path).

Plot `Gamma` Probability Density

gammaPars <- fitdistr(z$Height_cm,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$Height_cm), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning in (function (x, min = 0, max = 1, log = FALSE) : NaNs produced

## Warning: Removed 288 row(s) containing missing values (geom_path).

Plot `Beta` Probability Density

pSpecial <- ggplot(data=z, aes(x=Height_cm/(max(Height_cm + 0.1)), y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) + 
  xlim(c(0,1)) +
  geom_density(size=0.75,linetype="dotted")

betaPars <- fitdistr(x=z$Height_cm/max(z$Height_cm + 0.1),start=list(shape1=1,shape2=2),"beta")

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$Height_cm), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Warning: Removed 2 rows containing missing values (geom_bar).

Gamma is the best fitting distribution.

Simulate New Data Set

#generate data using gamma
z <- rgamma(n=288,
            shape=3.568224649,
            rate=0.101954357)
z <- data.frame(1:288,z)
names(z) <- list("ID","Height_cm")
z <- z[z$Height_cm>0,]
str(z)

## 'data.frame':    288 obs. of  2 variables:
##  $ ID       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Height_cm: num  16.1 35.1 30.1 23.3 74 ...

summary(z$Height_cm)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   7.245  22.132  33.393  36.396  45.732 115.216

#plot histogram
p1 <- ggplot(data=z, aes(x=Height_cm, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#add probability density curve
p1 <-  p1 +  geom_density(linetype="dotted",size=0.75)
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## Histogram of Original Data

z <- read.table("Cleaned2021VegetationData.csv",header=TRUE,sep=",")
p1 <- ggplot(data=z, aes(x=Height_cm, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 
print(p1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Questions

How do the two histogram profiles compare? Do you think the model is doing a good job of simulating realistic data that match your original measurements? Why or why not?

The actual data has more of a bimodal distribution so it deviates from the simulated one which is unimodal skewed right, but for the left hand side of the plot it is similar.

Homework 6

Sam Brewer

2/23/2022

Open Libraries

My Own Data

Read in Data Vector

Plot Histogram of Data

Add Emperical Density Curve

Get Maximum Likelihood Parameters for Normal

Plot `Normal` Probability Density

Plot `Exponential` Probability Density

Plot `Uniform` Probability Density

Plot `Gamma` Probability Density

Plot `Beta` Probability Density

Simulate New Data Set

Questions

Homework 6

Sam Brewer

2/23/2022

Open Libraries

My Own Data

Read in Data Vector

Plot Histogram of Data

Add Emperical Density Curve

Get Maximum Likelihood Parameters for Normal

Plot Normal Probability Density

Plot Exponential Probability Density

Plot Uniform Probability Density

Plot Gamma Probability Density

Plot Beta Probability Density

Simulate New Data Set

Questions

Plot `Normal` Probability Density

Plot `Exponential` Probability Density

Plot `Uniform` Probability Density

Plot `Gamma` Probability Density

Plot `Beta` Probability Density