# Description --------------------------------------
# Homework 9, reorganizing homework 8 data evaluation into the structured programming format
# 23 Mar 2022
# SKB

# Initialize ---------------------------------------
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Load Functions -----------------------------------

# functions are included in this script

# Global Variables ---------------------------------

beta1 <- 53.74 # slope
beta0 <- 47.04 # intercept

# Program Body -------------------------------------

Functions:

####################################################
# FUNCTION: p_capacity
# purpose: create the simulated data
# input: n, mean, and sd
# output: data frame of x and y values
# ----------------------------------
p_capacity <- function(size=100,avg=0.5,stdev=0.050) {
  noise <- rnorm(n=size, mean=0, sd=0.05) # adds variation to the data set
  x_var <- rnorm(n=size,
             mean=avg,
             sd=stdev)
  y_var <- beta0 + beta1*x_var + noise
  df <- data.frame(x_var, y_var)
  return(df)
}

####################################################
# FUNCTION: linear_reg
# purpose: to determine the similarity of the simulated data to known modeled equation and plot linear regression
# input: x and y from d_frame
# output: regression plot
# ----------------------------------
linear_reg <- function(df=df1) {
  reg_model <- lm(y_var~x_var, data=df)

  slope <- summary(reg_model)$coefficients[1,1]
  intercept <- summary(reg_model)$coefficients[1,2]
  message("The slope for the sim data is the same as model equation ")
  if(slope == beta0) {
  message("The intercept for the sim data is the same as model equation ")
    }
  if(intercept == beta1) {
  message("The intercept for the sim data is the same as model equation ")
    }

  reg_plot <- ggplot(data=df,aes(x=x_var, y=y_var)) +
                    geom_point() +
                    stat_smooth(method=lm,se=0.95)
  return(reg_plot)
}

Run Functions:

p_capacity()
##         x_var    y_var
## 1   0.5180358 74.85814
## 2   0.5546184 76.74505
## 3   0.5175869 74.84154
## 4   0.5778567 78.14019
## 5   0.4967273 73.75845
## 6   0.4838592 73.01425
## 7   0.4982206 73.85466
## 8   0.4527397 71.43010
## 9   0.5278278 75.28295
## 10  0.5668546 77.47461
## 11  0.4750359 72.61588
## 12  0.4424004 70.80157
## 13  0.5029828 74.09084
## 14  0.4998361 73.92454
## 15  0.5274419 75.40307
## 16  0.4111149 69.14972
## 17  0.4736078 72.51138
## 18  0.4905487 73.36323
## 19  0.5843464 78.51280
## 20  0.5940664 78.90991
## 21  0.5447219 76.34467
## 22  0.5786857 78.11794
## 23  0.4464327 70.99690
## 24  0.5113240 74.51717
## 25  0.5137701 74.64603
## 26  0.4940946 73.52933
## 27  0.5429625 76.26666
## 28  0.4505466 71.32776
## 29  0.5450253 76.35632
## 30  0.4547097 71.40877
## 31  0.4941172 73.61929
## 32  0.5403782 75.98855
## 33  0.4342423 70.33925
## 34  0.4885985 73.24127
## 35  0.4992013 73.85917
## 36  0.5742341 77.89740
## 37  0.4918787 73.32461
## 38  0.5095274 74.36830
## 39  0.4511689 71.36023
## 40  0.3911684 68.06978
## 41  0.5051197 74.21889
## 42  0.5178607 74.95116
## 43  0.5168864 74.80238
## 44  0.5123867 74.50099
## 45  0.6037886 79.43787
## 46  0.5294004 75.47456
## 47  0.6807898 83.55731
## 48  0.4791323 72.71144
## 49  0.5454933 76.34365
## 50  0.4857347 73.21437
## 51  0.5564816 76.89368
## 52  0.4866903 73.13059
## 53  0.4202535 69.59948
## 54  0.4387944 70.58058
## 55  0.5557714 77.01581
## 56  0.4015286 68.68317
## 57  0.4687620 72.31885
## 58  0.4994510 73.93739
## 59  0.4662940 72.14908
## 60  0.4988470 73.91748
## 61  0.5309544 75.52479
## 62  0.4086611 69.00477
## 63  0.5818163 78.30540
## 64  0.4463129 71.00734
## 65  0.4833092 72.97570
## 66  0.5268007 75.26292
## 67  0.5145786 74.80947
## 68  0.6359902 81.19279
## 69  0.4623221 71.78581
## 70  0.5300537 75.57068
## 71  0.5287231 75.43994
## 72  0.5091072 74.45384
## 73  0.5418225 76.19338
## 74  0.4650593 72.03910
## 75  0.5042449 74.15404
## 76  0.5650611 77.47120
## 77  0.5462920 76.41122
## 78  0.4651059 72.11650
## 79  0.5654006 77.43171
## 80  0.5191665 74.90877
## 81  0.5094724 74.41746
## 82  0.5372213 75.91281
## 83  0.5012034 73.86513
## 84  0.6354265 81.20483
## 85  0.5243351 75.22560
## 86  0.5743814 77.91545
## 87  0.4596294 71.79115
## 88  0.4999557 73.81889
## 89  0.4857341 73.11661
## 90  0.5887414 78.62688
## 91  0.5235513 75.13788
## 92  0.4693508 72.31210
## 93  0.4776389 72.73302
## 94  0.5164440 74.79103
## 95  0.4479119 71.05076
## 96  0.4776953 72.64293
## 97  0.4458958 70.98352
## 98  0.4392288 70.63667
## 99  0.4312879 70.20084
## 100 0.5062293 74.20304
df1 <- p_capacity()
linear_reg()
## The slope for the sim data is the same as model equation
## `geom_smooth()` using formula 'y ~ x'

Additional Input:

####################################################
# FUNCTION: gen_mean
# purpose: generate a random mean value for phosphorus capacity
# input: NULL
# output: mean
# ----------------------------------
gen_mean <- function() {
  
  avg <- runif(1)
  if(avg < 0.5000) {message("mean is below half phosphorus capacity")}
    else {message("mean is above half phosphorus capacity")}
  return(avg)
}

####################################################
# FUNCTION: gen_sd
# purpose: generate a random standard deviation value
# input: "high" or "low"
# output: sd
# ----------------------------------
gen_sd <- function(variation=NULL) {
  if(variation == "high") {
  stdev <- runif(1, min=0.0501, max=0.200)
  }
  if(variation == "low") {
    stdev <- runif(1, min=0.005, max=0.050)
    }
  return(stdev)
}
gen_mean()
## mean is above half phosphorus capacity
## [1] 0.9362598
gen_sd(variation="low")
## [1] 0.03823578
p_capacity()
##         x_var    y_var
## 1   0.5269262 75.28735
## 2   0.6060153 79.67563
## 3   0.5332728 75.62907
## 4   0.5367909 75.87698
## 5   0.4375825 70.51126
## 6   0.5751160 77.96914
## 7   0.4977993 73.78619
## 8   0.5300781 75.55194
## 9   0.5405697 76.09849
## 10  0.5068473 74.27207
## 11  0.4827984 72.96870
## 12  0.4583734 71.67462
## 13  0.4766115 72.52521
## 14  0.4824793 73.04230
## 15  0.4763685 72.70802
## 16  0.4523210 71.38654
## 17  0.5627036 77.30020
## 18  0.5852808 78.59037
## 19  0.4246904 69.86142
## 20  0.4192467 69.58575
## 21  0.5359192 75.77128
## 22  0.5972933 79.15611
## 23  0.4855501 73.13136
## 24  0.5452722 76.25074
## 25  0.4830711 73.04774
## 26  0.4622416 71.87034
## 27  0.5094622 74.42604
## 28  0.3893717 67.95196
## 29  0.5005856 73.91068
## 30  0.4984344 73.84219
## 31  0.3869885 67.85742
## 32  0.5349107 75.78537
## 33  0.4779968 72.66689
## 34  0.3954283 68.15055
## 35  0.5256242 75.30428
## 36  0.5240379 75.21865
## 37  0.4249895 69.88823
## 38  0.4970734 73.77878
## 39  0.5115621 74.55372
## 40  0.4804858 72.89225
## 41  0.5012121 74.00644
## 42  0.5555222 76.89398
## 43  0.4669135 72.11258
## 44  0.5306840 75.53202
## 45  0.3677030 66.77039
## 46  0.6296929 80.89289
## 47  0.5164926 74.74675
## 48  0.4938682 73.55850
## 49  0.5004570 73.94891
## 50  0.5023363 74.00113
## 51  0.4786452 72.75112
## 52  0.4828756 72.94990
## 53  0.6009469 79.36787
## 54  0.5610787 77.13309
## 55  0.5302698 75.57750
## 56  0.5024608 74.03708
## 57  0.5557391 76.82715
## 58  0.5281576 75.52431
## 59  0.5144752 74.76358
## 60  0.4867256 73.28302
## 61  0.5161923 74.81089
## 62  0.5039333 74.17722
## 63  0.4461430 71.12416
## 64  0.6137303 79.98604
## 65  0.5401536 76.04194
## 66  0.4899739 73.38951
## 67  0.4760801 72.71685
## 68  0.5008595 73.90842
## 69  0.5828743 78.38020
## 70  0.4464035 71.06490
## 71  0.4990957 73.88361
## 72  0.4825169 72.91973
## 73  0.4971882 73.78825
## 74  0.4402168 70.74613
## 75  0.4956488 73.68846
## 76  0.5013553 73.93977
## 77  0.5023023 74.08855
## 78  0.5017791 73.91822
## 79  0.6109330 79.89892
## 80  0.5423321 76.15299
## 81  0.4600774 71.80110
## 82  0.5114504 74.49291
## 83  0.5292165 75.49252
## 84  0.3933579 68.22838
## 85  0.4912990 73.46799
## 86  0.5973788 79.16429
## 87  0.5014950 73.98623
## 88  0.4292708 70.07301
## 89  0.5332926 75.65853
## 90  0.5338488 75.69218
## 91  0.4762337 72.63328
## 92  0.4480906 71.15952
## 93  0.5819036 78.36884
## 94  0.4488410 71.18580
## 95  0.4618006 71.92496
## 96  0.5078937 74.34153
## 97  0.4671268 72.19793
## 98  0.4905029 73.36865
## 99  0.5303353 75.50542
## 100 0.5290212 75.49404
df1 <- p_capacity()
linear_reg()
## The slope for the sim data is the same as model equation
## `geom_smooth()` using formula 'y ~ x'