Homework 12

#preliminaries
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(patchwork)

# reads my vegetation data from 2021
data <- read.table(file="Cleaned2021VegetationData.csv",
                   header=TRUE,
                   sep=",",
                   comment.char="#")
str(data)
## 'data.frame':    288 obs. of  6 variables:
##  $ Date             : chr  "10/4/2021" "10/4/2021" "10/4/2021" "10/4/2021" ...
##  $ PlotID           : chr  "1A" "1A" "1A" "1A" ...
##  $ Plant            : chr  "Milkweed" "Milkweed" "Coneflower" "Coneflower" ...
##  $ VegID            : chr  "M1" "M2" "C1" "C2" ...
##  $ Height_cm        : num  16.5 24 12.5 19 64 36.5 21.5 23 21 17 ...
##  $ PercentGreenCover: num  5.91 NA NA NA NA NA 4.49 NA NA NA ...
# basic histogram of percent green cover for all measurements
qplot(data$PercentGreenCover)

qplot(x=data$PercentGreenCover,
      fill=I("olivedrab3"),color=I("black"))

# reorder dates so they are chronological
data$Date <- factor(data$Date, levels = c("7/13/2021","8/4/2021", "8/27/2021","10/4/2021"))

#filtering to get data from Oct 4 2021
OctFourthPCG <- filter(data, Date=="10/4/2021")

#plotting percent green cover values from the date specified above
pgc4 <- ggplot(data=OctFourthPCG,
            mapping=aes(x=PercentGreenCover,y=PlotID)) + geom_point()
print(pgc4 + theme_solarized())

#filtering the data to get the heights of coneflowers over 4 measurement dates
OneAData <- filter(data, PlotID == "1A")
ConeflowerOneA <- filter(OneAData, Plant=="Coneflower")
str(ConeflowerOneA)
## 'data.frame':    8 obs. of  6 variables:
##  $ Date             : Factor w/ 4 levels "7/13/2021","8/4/2021",..: 4 4 3 3 2 2 1 1
##  $ PlotID           : chr  "1A" "1A" "1A" "1A" ...
##  $ Plant            : chr  "Coneflower" "Coneflower" "Coneflower" "Coneflower" ...
##  $ VegID            : chr  "C1" "C2" "C1" "C2" ...
##  $ Height_cm        : num  12.5 19 15.5 20 15.5 20 11 16.5
##  $ PercentGreenCover: num  NA NA NA NA NA NA NA NA
g1AC <- ggplot(data=ConeflowerOneA,
               mapping=aes(x=Date,
                           y=Height_cm,
                           fill=I("orchid4"),
                           color=I("black"))) +
  geom_bar(stat="summary", fun="mean") +
  theme(legend.position="none") +
  theme_classic(base_size=10,base_family="times") +
  plot_annotation('Plot 1A') +
  labs(y="Height (in cm)") # changes the y axis name
print(g1AC)

OneBData <- filter(data, PlotID == "1B")
ConeflowerOneB <- filter(OneBData, Plant=="Coneflower")
str(ConeflowerOneB)
## 'data.frame':    8 obs. of  6 variables:
##  $ Date             : Factor w/ 4 levels "7/13/2021","8/4/2021",..: 4 4 3 3 2 2 1 1
##  $ PlotID           : chr  "1B" "1B" "1B" "1B" ...
##  $ Plant            : chr  "Coneflower" "Coneflower" "Coneflower" "Coneflower" ...
##  $ VegID            : chr  "C1" "C2" "C1" "C2" ...
##  $ Height_cm        : num  21 17 23 17.5 22 18 17 10
##  $ PercentGreenCover: num  NA NA NA NA NA NA NA NA
g1BC <- ggplot(data=ConeflowerOneB,
               mapping=aes(x=Date,
                           y=Height_cm,
                           fill=I("orchid3"),
                           color=I("black"))) +
  geom_bar(stat="summary", fun="mean") +
  theme(legend.position="none") +
  theme_classic(base_size=10,base_family="times") +
  plot_annotation('Plot 1B') +
  labs(y="Height (in cm)")
print(g1BC)

OneCData <- filter(data, PlotID == "1C")
ConeflowerOneC <- filter(OneCData, Plant=="Coneflower")
str(ConeflowerOneC)
## 'data.frame':    8 obs. of  6 variables:
##  $ Date             : Factor w/ 4 levels "7/13/2021","8/4/2021",..: 4 4 3 3 2 2 1 1
##  $ PlotID           : chr  "1C" "1C" "1C" "1C" ...
##  $ Plant            : chr  "Coneflower" "Coneflower" "Coneflower" "Coneflower" ...
##  $ VegID            : chr  "C1" "C2" "C1" "C2" ...
##  $ Height_cm        : num  20 22.5 22 19.5 20 21.5 12.5 13.5
##  $ PercentGreenCover: num  NA NA NA NA NA NA NA NA
g1CC <- ggplot(data=ConeflowerOneC,
               mapping=aes(x=Date,
                           y=Height_cm,
                           fill=I("orchid2"),
                           color=I("black"))) +
  geom_bar(stat="summary", fun="mean") +
  theme(legend.position="none") +
  theme_classic(base_size=10,base_family="times") +
  plot_annotation('Plot 1C') +
  labs(y="Height (in cm)")
print(g1CC)

#place the 3 plots of the replicates for treatment 1 (A B and C) next to one another vertically
g1AC + g1BC + g1CC +
  plot_layout(ncol=1) +
  plot_annotation('Coneflower Heights Treatment 1')

####################################################

#reads winter water sample data
data <- read.table(file="CondensedWinterData.csv",
                   header=TRUE,
                   sep=",",
                   comment.char="#")

# shows data summary
glimpse(data)
## Rows: 48
## Columns: 5
## $ Date      <chr> "2/23/2022", "2/23/2022", "2/23/2022", "2/23/2022", "2/23/20~
## $ Treatment <int> 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, ~
## $ Bin       <int> 2, 4, 5, 3, 7, 1, 9, 11, 12, 6, 10, 8, 2, 4, 5, 3, 7, 1, 9, ~
## $ Nitrate   <dbl> 0.15400, 0.14700, 0.08610, NA, 0.10600, 0.12900, 0.43100, 0.~
## $ Phosphate <dbl> 17.00, 10.80, 10.70, NA, 8.26, 7.37, 63.70, 17.40, NA, 3.96,~
# remove rows with NA
dataClean <- data[complete.cases(data),]

# basic plot of all nitrate data distribution
#N
qplot(x=dataClean$Nitrate,
      fill=I("olivedrab3"),color=I("black"))

#P
qplot(x=dataClean$Phosphate,
      fill=I("olivedrab1"),color=I("black"))

#remove rows where value is less than detection limit
# 0.14 mg/L for N
NDataClean <- filter(dataClean, Nitrate > 0.14)
str(NDataClean)
## 'data.frame':    4 obs. of  5 variables:
##  $ Date     : chr  "2/23/2022" "2/23/2022" "2/23/2022" "2/23/2022"
##  $ Treatment: int  1 1 3 3
##  $ Bin      : int  2 4 9 11
##  $ Nitrate  : num  0.154 0.147 0.431 0.186
##  $ Phosphate: num  17 10.8 63.7 17.4
# 7 microg/L for P
PDataClean <- filter(dataClean, Phosphate > 7)
str(PDataClean)
## 'data.frame':    38 obs. of  5 variables:
##  $ Date     : chr  "2/23/2022" "2/23/2022" "2/23/2022" "2/23/2022" ...
##  $ Treatment: int  1 1 1 2 2 3 3 4 4 1 ...
##  $ Bin      : int  2 4 5 7 1 9 11 10 8 2 ...
##  $ Nitrate  : num  0.154 0.147 0.0861 0.106 0.129 0.431 0.186 0.072 0.00744 0.00674 ...
##  $ Phosphate: num  17 10.8 10.7 8.26 7.37 63.7 17.4 22.4 9.7 8.82 ...
# plots
# N
plot1 <- ggplot(data=NDataClean,
               mapping=aes(x=Treatment,
                           y=Nitrate,
                           fill=I("orchid4"),
                           color=I("black"))) +
  geom_bar(stat="summary", fun="mean") +
  theme(legend.position="none") +
  theme_classic(base_size=15,base_family="times")
print(plot1)

# P
plot2 <- ggplot(data=PDataClean,
                mapping=aes(x=Treatment,
                            y=Phosphate,
                            fill=I("orchid1"),
                            color=I("black"))) +
  geom_bar(stat="summary", fun="mean") +
  theme(legend.position="none") +
  theme_classic(base_size=15,base_family="times")
print(plot2)

# multiple bars in plot over all dates
# P
plot3 <- ggplot(PDataClean,aes(x=Date,y=Phosphate, fill=Treatment)) + geom_col(position="dodge2",color="black",size=0.5) +
  scale_fill_viridis_c(option="plasma")
print(plot3)