# Description --------------------------------------
# Homework 9, reorganizing homework 8 data evaluation into the structured programming format
# 23 Mar 2022
# SKB
# Initialize ---------------------------------------
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Load Functions -----------------------------------
# functions are included in this script
# Global Variables ---------------------------------
beta1 <- 53.74 # slope
beta0 <- 47.04 # intercept
# Program Body -------------------------------------
Functions:
####################################################
# FUNCTION: p_capacity
# purpose: create the simulated data
# input: n, mean, and sd
# output: data frame of x and y values
# ----------------------------------
p_capacity <- function(size=100,avg=0.5,stdev=0.050) {
noise <- rnorm(n=size, mean=0, sd=0.05) # adds variation to the data set
x_var <- rnorm(n=size,
mean=avg,
sd=stdev)
y_var <- beta0 + beta1*x_var + noise
df <- data.frame(x_var, y_var)
return(df)
}
####################################################
# FUNCTION: linear_reg
# purpose: to determine the similarity of the simulated data to known modeled equation and plot linear regression
# input: x and y from d_frame
# output: regression plot
# ----------------------------------
linear_reg <- function(df=df1) {
reg_model <- lm(y_var~x_var, data=df)
slope <- summary(reg_model)$coefficients[1,1]
intercept <- summary(reg_model)$coefficients[1,2]
message("The slope for the sim data is the same as model equation ")
if(slope == beta0) {
message("The intercept for the sim data is the same as model equation ")
}
if(intercept == beta1) {
message("The intercept for the sim data is the same as model equation ")
}
reg_plot <- ggplot(data=df,aes(x=x_var, y=y_var)) +
geom_point() +
stat_smooth(method=lm,se=0.95)
return(reg_plot)
}
Run Functions:
p_capacity()
## x_var y_var
## 1 0.5180358 74.85814
## 2 0.5546184 76.74505
## 3 0.5175869 74.84154
## 4 0.5778567 78.14019
## 5 0.4967273 73.75845
## 6 0.4838592 73.01425
## 7 0.4982206 73.85466
## 8 0.4527397 71.43010
## 9 0.5278278 75.28295
## 10 0.5668546 77.47461
## 11 0.4750359 72.61588
## 12 0.4424004 70.80157
## 13 0.5029828 74.09084
## 14 0.4998361 73.92454
## 15 0.5274419 75.40307
## 16 0.4111149 69.14972
## 17 0.4736078 72.51138
## 18 0.4905487 73.36323
## 19 0.5843464 78.51280
## 20 0.5940664 78.90991
## 21 0.5447219 76.34467
## 22 0.5786857 78.11794
## 23 0.4464327 70.99690
## 24 0.5113240 74.51717
## 25 0.5137701 74.64603
## 26 0.4940946 73.52933
## 27 0.5429625 76.26666
## 28 0.4505466 71.32776
## 29 0.5450253 76.35632
## 30 0.4547097 71.40877
## 31 0.4941172 73.61929
## 32 0.5403782 75.98855
## 33 0.4342423 70.33925
## 34 0.4885985 73.24127
## 35 0.4992013 73.85917
## 36 0.5742341 77.89740
## 37 0.4918787 73.32461
## 38 0.5095274 74.36830
## 39 0.4511689 71.36023
## 40 0.3911684 68.06978
## 41 0.5051197 74.21889
## 42 0.5178607 74.95116
## 43 0.5168864 74.80238
## 44 0.5123867 74.50099
## 45 0.6037886 79.43787
## 46 0.5294004 75.47456
## 47 0.6807898 83.55731
## 48 0.4791323 72.71144
## 49 0.5454933 76.34365
## 50 0.4857347 73.21437
## 51 0.5564816 76.89368
## 52 0.4866903 73.13059
## 53 0.4202535 69.59948
## 54 0.4387944 70.58058
## 55 0.5557714 77.01581
## 56 0.4015286 68.68317
## 57 0.4687620 72.31885
## 58 0.4994510 73.93739
## 59 0.4662940 72.14908
## 60 0.4988470 73.91748
## 61 0.5309544 75.52479
## 62 0.4086611 69.00477
## 63 0.5818163 78.30540
## 64 0.4463129 71.00734
## 65 0.4833092 72.97570
## 66 0.5268007 75.26292
## 67 0.5145786 74.80947
## 68 0.6359902 81.19279
## 69 0.4623221 71.78581
## 70 0.5300537 75.57068
## 71 0.5287231 75.43994
## 72 0.5091072 74.45384
## 73 0.5418225 76.19338
## 74 0.4650593 72.03910
## 75 0.5042449 74.15404
## 76 0.5650611 77.47120
## 77 0.5462920 76.41122
## 78 0.4651059 72.11650
## 79 0.5654006 77.43171
## 80 0.5191665 74.90877
## 81 0.5094724 74.41746
## 82 0.5372213 75.91281
## 83 0.5012034 73.86513
## 84 0.6354265 81.20483
## 85 0.5243351 75.22560
## 86 0.5743814 77.91545
## 87 0.4596294 71.79115
## 88 0.4999557 73.81889
## 89 0.4857341 73.11661
## 90 0.5887414 78.62688
## 91 0.5235513 75.13788
## 92 0.4693508 72.31210
## 93 0.4776389 72.73302
## 94 0.5164440 74.79103
## 95 0.4479119 71.05076
## 96 0.4776953 72.64293
## 97 0.4458958 70.98352
## 98 0.4392288 70.63667
## 99 0.4312879 70.20084
## 100 0.5062293 74.20304
df1 <- p_capacity()
linear_reg()
## The slope for the sim data is the same as model equation
## `geom_smooth()` using formula 'y ~ x'
Additional Input:
####################################################
# FUNCTION: gen_mean
# purpose: generate a random mean value for phosphorus capacity
# input: NULL
# output: mean
# ----------------------------------
gen_mean <- function() {
avg <- runif(1)
if(avg < 0.5000) {message("mean is below half phosphorus capacity")}
else {message("mean is above half phosphorus capacity")}
return(avg)
}
####################################################
# FUNCTION: gen_sd
# purpose: generate a random standard deviation value
# input: "high" or "low"
# output: sd
# ----------------------------------
gen_sd <- function(variation=NULL) {
if(variation == "high") {
stdev <- runif(1, min=0.0501, max=0.200)
}
if(variation == "low") {
stdev <- runif(1, min=0.005, max=0.050)
}
return(stdev)
}
gen_mean()
## mean is above half phosphorus capacity
## [1] 0.9362598
gen_sd(variation="low")
## [1] 0.03823578
p_capacity()
## x_var y_var
## 1 0.5269262 75.28735
## 2 0.6060153 79.67563
## 3 0.5332728 75.62907
## 4 0.5367909 75.87698
## 5 0.4375825 70.51126
## 6 0.5751160 77.96914
## 7 0.4977993 73.78619
## 8 0.5300781 75.55194
## 9 0.5405697 76.09849
## 10 0.5068473 74.27207
## 11 0.4827984 72.96870
## 12 0.4583734 71.67462
## 13 0.4766115 72.52521
## 14 0.4824793 73.04230
## 15 0.4763685 72.70802
## 16 0.4523210 71.38654
## 17 0.5627036 77.30020
## 18 0.5852808 78.59037
## 19 0.4246904 69.86142
## 20 0.4192467 69.58575
## 21 0.5359192 75.77128
## 22 0.5972933 79.15611
## 23 0.4855501 73.13136
## 24 0.5452722 76.25074
## 25 0.4830711 73.04774
## 26 0.4622416 71.87034
## 27 0.5094622 74.42604
## 28 0.3893717 67.95196
## 29 0.5005856 73.91068
## 30 0.4984344 73.84219
## 31 0.3869885 67.85742
## 32 0.5349107 75.78537
## 33 0.4779968 72.66689
## 34 0.3954283 68.15055
## 35 0.5256242 75.30428
## 36 0.5240379 75.21865
## 37 0.4249895 69.88823
## 38 0.4970734 73.77878
## 39 0.5115621 74.55372
## 40 0.4804858 72.89225
## 41 0.5012121 74.00644
## 42 0.5555222 76.89398
## 43 0.4669135 72.11258
## 44 0.5306840 75.53202
## 45 0.3677030 66.77039
## 46 0.6296929 80.89289
## 47 0.5164926 74.74675
## 48 0.4938682 73.55850
## 49 0.5004570 73.94891
## 50 0.5023363 74.00113
## 51 0.4786452 72.75112
## 52 0.4828756 72.94990
## 53 0.6009469 79.36787
## 54 0.5610787 77.13309
## 55 0.5302698 75.57750
## 56 0.5024608 74.03708
## 57 0.5557391 76.82715
## 58 0.5281576 75.52431
## 59 0.5144752 74.76358
## 60 0.4867256 73.28302
## 61 0.5161923 74.81089
## 62 0.5039333 74.17722
## 63 0.4461430 71.12416
## 64 0.6137303 79.98604
## 65 0.5401536 76.04194
## 66 0.4899739 73.38951
## 67 0.4760801 72.71685
## 68 0.5008595 73.90842
## 69 0.5828743 78.38020
## 70 0.4464035 71.06490
## 71 0.4990957 73.88361
## 72 0.4825169 72.91973
## 73 0.4971882 73.78825
## 74 0.4402168 70.74613
## 75 0.4956488 73.68846
## 76 0.5013553 73.93977
## 77 0.5023023 74.08855
## 78 0.5017791 73.91822
## 79 0.6109330 79.89892
## 80 0.5423321 76.15299
## 81 0.4600774 71.80110
## 82 0.5114504 74.49291
## 83 0.5292165 75.49252
## 84 0.3933579 68.22838
## 85 0.4912990 73.46799
## 86 0.5973788 79.16429
## 87 0.5014950 73.98623
## 88 0.4292708 70.07301
## 89 0.5332926 75.65853
## 90 0.5338488 75.69218
## 91 0.4762337 72.63328
## 92 0.4480906 71.15952
## 93 0.5819036 78.36884
## 94 0.4488410 71.18580
## 95 0.4618006 71.92496
## 96 0.5078937 74.34153
## 97 0.4671268 72.19793
## 98 0.4905029 73.36865
## 99 0.5303353 75.50542
## 100 0.5290212 75.49404
df1 <- p_capacity()
linear_reg()
## The slope for the sim data is the same as model equation
## `geom_smooth()` using formula 'y ~ x'