3/19/25
1: Run sample code
a: Read in/generate data vector
library(ggplot2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
#z <- rnorm(n=3000, mean=0.2)
#z <- data.frame(1:3000, z)
#names(z) <- list("ID", "myVar")
#z <- z[z$myVar>0,]
#str(z)
#summary(z$myVar)
b: Plot histogram
#p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
#geom_histogram(color="grey60",fill="cornsilk",size=0.2)
#print(p1)
c: Add empirical density curve
#p1 <- p1 + geom_density(linetype="dotted",size=0.75)
#print(p1)
d: Get maximum likelihood parameters for normal
#normPars <- fitdistr(z$myVar,"normal")
#print(normPars)
#str(normPars)
#normPars$estimate["mean"]
e: Plot normal probability density
#meanML <- normPars$estimate["mean"]
#sdML <- normPars$estimate["sd"]
#xval <- seq(0,max(z$myVar),len=length(z$myVar))
# stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
#p1 + stat
f: Plot exponential probability density
#expoPars <- fitdistr(z$myVar,"exponential")
#rateML <- expoPars$estimate["rate"]
#stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$myVar), args = list(rate=rateML))
#p1 + stat + stat2
g: Plot uniform probability density
#stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$myVar), args = list(min=min(z$myVar), max=max(z$myVar)))
#p1 + stat + stat2 + stat3
h: Plot gamma probability density
#gammaPars <- fitdistr(z$myVar,"gamma")
#shapeML <- gammaPars$estimate["shape"]
#rateML <- gammaPars$estimate["rate"]
#stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
# p1 + stat + stat2 + stat3 + stat4
g: Plot beta probability density
#pSpecial <- ggplot(data=z, aes(x=myVar/(max(myVar + 0.1)), y=..density..)) +
# geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
# xlim(c(0,1)) +
# geom_density(size=0.75,linetype="dotted")
#betaPars <- fitdistr(x=z$myVar/max(z$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")
#shape1ML <- betaPars$estimate["shape1"]
#shape2ML <- betaPars$estimate["shape2"]
#statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
#pSpecial + statSpecial
2: Sample dataset
a:Read in data
z <- read.table("antcountydata.csv", header=TRUE, sep=",")
str(z)
## 'data.frame': 67 obs. of 25 variables:
## $ n.species : int 15 50 62 36 63 42 59 32 10 31 ...
## $ n.samples : int 53 192 354 106 454 284 975 99 16 109 ...
## $ lat.centroid : num 41.3 41.8 41.8 41.5 41.4 ...
## $ long.centroid : num -73.4 -72.7 -73.2 -72.5 -72.9 ...
## $ elev.centroid.m : num 176.9 42.8 376.9 69.9 140.6 ...
## $ area.km2 : int 1621 1906 2383 956 1570 1725 1062 1329 1287 17687 ...
## $ mean.ann.temp : num 9.6 9.8 7.8 9.8 9.7 9.8 8.3 8.9 7.1 3.2 ...
## $ mean.diurnal.temp.range: num 10.8 11.8 11.5 10.6 10.7 11.1 11.6 12.1 11.3 12.5 ...
## $ isothermality : num 0.3 0.31 0.3 0.3 0.3 0.31 0.31 0.32 0.28 0.28 ...
## $ temp.seasonality.sd : num 86.9 90.7 90.2 86.5 87.3 ...
## $ max.temp : num 27.4 29.1 26.3 27.7 27.7 27.6 26.8 27.6 26.7 24.6 ...
## $ min.temp : num -8 -8.6 -10.8 -7.5 -7.6 -7.8 -10.1 -9.8 -12.7 -19.4 ...
## $ temp.range : num 35.4 37.7 37.1 35.2 35.3 35.4 36.9 37.4 39.4 44 ...
## $ mean.temp.wettest.Q : num 8.3 14.4 12.4 5.7 8.3 1.2 4.1 4.6 2.6 15.2 ...
## $ mean.temp.driest.Q : num -1.9 -1.1 -3.1 20.7 -0.7 20.5 -2.4 -1.8 18.4 -9.9 ...
## $ mean.temp.warmest.Q : num 20.5 21.3 19 20.7 20.8 20.5 19.4 19.9 19.3 16.4 ...
## $ mean.temp.coldest.Q : num -1.9 -2.3 -4.3 -1.6 -1.8 -1.4 -3.5 -2.9 -5.7 -11.3 ...
## $ ann.precip : int 1250 1167 1246 1239 1222 1229 1233 1218 1134 966 ...
## $ precip.wettest.month : int 117 106 113 118 113 121 116 117 120 103 ...
## $ precipldriest.month : int 88 81 88 91 88 87 87 89 83 53 ...
## $ precip.cv : int 8 7 7 7 7 9 7 7 11 19 ...
## $ precip.wettest.Q : int 338 309 328 331 328 337 326 329 325 296 ...
## $ precip.driest.Q : int 290 266 280 292 287 279 284 290 253 179 ...
## $ precip.warmest.Q : int 304 290 323 292 293 279 306 291 260 292 ...
## $ precip.coldest.Q : int 290 270 284 302 289 310 289 297 279 195 ...
names(z) <- list("ID", "myVar")
z <- z[z$myVar>0,]
str(z)
## 'data.frame': 67 obs. of 25 variables:
## $ ID : int 15 50 62 36 63 42 59 32 10 31 ...
## $ myVar: int 53 192 354 106 454 284 975 99 16 109 ...
## $ NA : num 41.3 41.8 41.8 41.5 41.4 ...
## $ NA : num -73.4 -72.7 -73.2 -72.5 -72.9 ...
## $ NA : num 176.9 42.8 376.9 69.9 140.6 ...
## $ NA : int 1621 1906 2383 956 1570 1725 1062 1329 1287 17687 ...
## $ NA : num 9.6 9.8 7.8 9.8 9.7 9.8 8.3 8.9 7.1 3.2 ...
## $ NA : num 10.8 11.8 11.5 10.6 10.7 11.1 11.6 12.1 11.3 12.5 ...
## $ NA : num 0.3 0.31 0.3 0.3 0.3 0.31 0.31 0.32 0.28 0.28 ...
## $ NA : num 86.9 90.7 90.2 86.5 87.3 ...
## $ NA : num 27.4 29.1 26.3 27.7 27.7 27.6 26.8 27.6 26.7 24.6 ...
## $ NA : num -8 -8.6 -10.8 -7.5 -7.6 -7.8 -10.1 -9.8 -12.7 -19.4 ...
## $ NA : num 35.4 37.7 37.1 35.2 35.3 35.4 36.9 37.4 39.4 44 ...
## $ NA : num 8.3 14.4 12.4 5.7 8.3 1.2 4.1 4.6 2.6 15.2 ...
## $ NA : num -1.9 -1.1 -3.1 20.7 -0.7 20.5 -2.4 -1.8 18.4 -9.9 ...
## $ NA : num 20.5 21.3 19 20.7 20.8 20.5 19.4 19.9 19.3 16.4 ...
## $ NA : num -1.9 -2.3 -4.3 -1.6 -1.8 -1.4 -3.5 -2.9 -5.7 -11.3 ...
## $ NA : int 1250 1167 1246 1239 1222 1229 1233 1218 1134 966 ...
## $ NA : int 117 106 113 118 113 121 116 117 120 103 ...
## $ NA : int 88 81 88 91 88 87 87 89 83 53 ...
## $ NA : int 8 7 7 7 7 9 7 7 11 19 ...
## $ NA : int 338 309 328 331 328 337 326 329 325 296 ...
## $ NA : int 290 266 280 292 287 279 284 290 253 179 ...
## $ NA : int 304 290 323 292 293 279 306 291 260 292 ...
## $ NA : int 290 270 284 302 289 310 289 297 279 195 ...
summary(z$myVar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 54.0 112.0 365.2 453.5 3498.0
print(z)
## ID myVar NA NA NA NA NA NA NA NA NA NA
## 1 15 53 41.27037 -73.38868 176.92 1621 9.6 10.8 0.30 86.88 27.4 -8.0
## 2 50 192 41.80604 -72.73271 42.76 1906 9.8 11.8 0.31 90.70 29.1 -8.6
## 3 62 354 41.79207 -73.24551 376.85 2383 7.8 11.5 0.30 90.18 26.3 -10.8
## 4 36 106 41.46371 -72.53628 69.94 956 9.8 10.6 0.30 86.46 27.7 -7.5
## 5 63 454 41.41133 -72.93209 140.57 1570 9.7 10.7 0.30 87.34 27.7 -7.6
## 6 42 284 41.48932 -72.10095 63.75 1725 9.8 11.1 0.31 84.95 27.6 -7.8
## 7 59 975 41.85441 -72.33728 227.79 1062 8.3 11.6 0.31 88.35 26.8 -10.1
## 8 32 99 41.82970 -71.98752 105.35 1329 8.9 12.1 0.32 88.13 27.6 -9.8
## 9 10 16 44.16497 -70.20523 79.43 1287 7.1 11.3 0.28 96.23 26.7 -12.7
## 10 31 109 46.65539 -68.59930 243.43 17687 3.2 12.5 0.28 107.02 24.6 -19.4
## 11 36 88 43.84401 -70.39729 96.06 3152 7.1 11.5 0.29 93.89 26.3 -12.6
## 12 13 26 44.97659 -70.43745 770.43 4517 3.0 11.4 0.28 99.72 22.7 -17.7
## 13 66 658 44.66924 -68.35556 44.26 6089 6.5 11.2 0.29 93.41 25.7 -12.8
## 14 47 231 44.40802 -69.76055 83.26 2463 7.0 11.8 0.29 97.08 27.0 -13.2
## 15 20 93 44.14914 -69.17305 49.65 2958 7.4 11.1 0.29 91.52 26.2 -11.5
## 16 11 23 44.07397 -69.54331 48.33 1813 7.5 11.0 0.29 92.30 26.3 -11.5
## 17 36 75 44.49497 -70.75481 279.09 5633 5.3 12.5 0.30 99.79 25.7 -15.9
## 18 48 140 45.39688 -68.64795 71.43 9210 5.3 12.5 0.29 101.07 26.0 -16.2
## 19 26 55 45.83410 -69.28309 292.78 11336 3.8 12.0 0.28 104.03 24.5 -17.8
## 20 10 12 43.97070 -69.86159 0.00 958 7.7 10.8 0.28 92.63 26.3 -11.4
## 21 31 60 45.51495 -69.95762 511.85 10606 3.2 11.8 0.28 103.03 23.5 -18.3
## 22 33 314 44.50350 -69.14654 114.31 2209 6.7 11.6 0.29 95.27 26.2 -13.2
## 23 54 556 45.02797 -67.63465 70.62 8430 5.8 11.5 0.29 94.05 25.2 -13.8
## 24 60 3498 43.47897 -70.71433 69.59 3292 7.7 12.7 0.31 93.29 27.7 -12.0
## 25 63 482 41.72768 -70.29937 3.14 1026 9.7 8.7 0.27 79.81 25.6 -5.9
## 26 44 703 42.37096 -73.20682 553.36 2411 6.0 10.8 0.29 91.59 24.6 -12.4
## 27 18 66 41.80235 -71.11502 2.74 1440 9.9 10.6 0.30 85.14 27.8 -7.1
## 28 77 512 41.39729 -70.65680 20.00 269 10.1 8.7 0.27 79.52 25.8 -5.3
## 29 73 438 42.67141 -70.95485 18.76 1290 9.2 11.2 0.30 88.46 27.9 -8.8
## 30 47 475 42.58620 -72.59739 71.66 1818 8.0 13.1 0.32 94.57 28.2 -12.3
## 31 57 648 42.13508 -72.63105 20.21 1601 9.5 12.4 0.31 93.06 29.4 -9.6
## 32 46 535 42.33992 -72.66962 99.05 1370 8.4 12.7 0.32 93.39 28.3 -11.2
## 33 82 453 42.48592 -71.39202 53.30 2134 8.9 11.9 0.31 90.14 27.9 -9.7
## 34 62 2874 41.27764 -70.07367 13.93 124 9.7 7.6 0.27 73.51 23.8 -4.3
## 35 58 460 42.15997 -71.21388 38.17 1036 9.6 11.4 0.31 87.45 28.3 -8.3
## 36 84 1700 41.95324 -70.81294 32.97 1712 9.9 10.3 0.29 84.61 27.5 -7.1
## 37 66 2526 42.33051 -71.07642 5.36 150 9.8 10.8 0.29 87.38 28.3 -7.8
## 38 69 1283 42.35115 -71.90534 276.89 3919 7.7 11.5 0.30 91.25 26.5 -11.1
## 39 17 17 43.50802 -71.42890 292.02 1039 6.0 12.8 0.31 94.74 26.2 -14.3
## 40 28 45 43.88417 -71.20140 168.53 2419 6.1 13.5 0.32 96.68 26.8 -15.1
## 41 31 81 42.91974 -72.25241 208.70 1834 6.8 13.1 0.32 93.97 27.1 -13.4
## 42 34 144 44.68964 -71.30268 538.61 4665 3.7 12.2 0.29 99.83 23.9 -17.3
## 43 39 64 43.93915 -71.81918 543.50 4439 4.7 11.9 0.29 96.24 24.4 -15.4
## 44 36 85 42.91463 -71.71639 232.39 2269 7.4 12.1 0.31 93.66 27.0 -12.0
## 45 36 66 43.29706 -71.67808 132.51 2419 7.1 13.0 0.31 95.65 27.6 -13.5
## 46 50 111 42.98557 -71.12877 54.44 1800 8.3 12.4 0.31 91.74 28.0 -10.9
## 47 53 200 43.29834 -71.03009 115.91 956 7.5 12.8 0.32 92.57 27.6 -12.2
## 48 23 46 43.36073 -72.22246 210.27 1391 6.7 12.6 0.30 96.63 27.1 -13.8
## 49 4 11 41.71513 -71.28277 9.67 117 10.0 10.2 0.29 85.07 27.6 -6.8
## 50 7 7 41.67121 -71.59372 75.70 487 9.4 10.7 0.30 85.09 27.1 -7.8
## 51 2 4 41.55612 -71.23806 7.62 813 10.2 9.4 0.28 82.90 27.0 -5.9
## 52 14 21 41.87276 -71.58127 108.33 1129 9.3 11.4 0.31 87.34 27.7 -8.7
## 53 42 216 41.47244 -71.62310 28.67 1458 9.8 10.1 0.30 81.90 26.6 -6.7
## 54 18 109 44.02813 -73.12987 115.68 1995 7.0 12.1 0.29 99.94 27.5 -13.7
## 55 33 80 43.03342 -73.09395 585.98 1751 5.6 11.6 0.29 94.47 25.0 -13.8
## 56 12 148 44.46520 -72.10204 291.32 1686 5.1 12.1 0.28 101.87 25.8 -16.0
## 57 45 613 44.45956 -73.05299 160.25 1396 6.6 11.1 0.27 101.30 26.8 -13.9
## 58 15 152 44.72892 -71.73763 502.39 1722 3.8 12.1 0.29 101.22 24.1 -17.3
## 59 15 118 44.85492 -72.89071 199.25 1650 5.8 11.1 0.27 102.04 25.8 -15.1
## 60 5 8 44.82464 -73.29546 39.41 215 6.8 11.1 0.27 102.72 26.8 -14.1
## 61 23 131 44.60564 -72.64163 176.09 1194 5.9 11.6 0.28 101.51 26.3 -14.9
## 62 2 2 44.00610 -72.37720 557.57 1785 4.3 11.7 0.29 96.31 24.0 -15.6
## 63 6 11 44.82645 -72.24683 311.37 1805 5.1 11.7 0.27 103.17 25.7 -16.1
## 64 13 29 43.57756 -73.03166 167.01 2414 7.2 12.6 0.30 98.60 27.9 -13.4
## 65 23 168 44.27138 -72.61659 212.38 1787 5.7 11.8 0.28 98.94 26.0 -14.8
## 66 7 45 42.99163 -72.71206 404.96 2044 6.3 12.4 0.31 94.93 26.2 -13.7
## 67 25 112 43.57914 -72.58602 481.86 2515 5.2 12.3 0.30 96.18 25.2 -15.1
## NA NA NA NA NA NA NA NA NA NA NA NA NA
## 1 35.4 8.3 -1.9 20.5 -1.9 1250 117 88 8 338 290 304 290
## 2 37.7 14.4 -1.1 21.3 -2.3 1167 106 81 7 309 266 290 270
## 3 37.1 12.4 -3.1 19.0 -4.3 1246 113 88 7 328 280 323 284
## 4 35.2 5.7 20.7 20.7 -1.6 1239 118 91 7 331 292 292 302
## 5 35.3 8.3 -0.7 20.8 -1.8 1222 113 88 7 328 287 293 289
## 6 35.4 1.2 20.5 20.5 -1.4 1229 121 87 9 337 279 279 310
## 7 36.9 4.1 -2.4 19.4 -3.5 1233 116 87 7 326 284 306 289
## 8 37.4 4.6 -1.8 19.9 -2.9 1218 117 89 7 329 290 291 297
## 9 39.4 2.6 18.4 19.3 -5.7 1134 120 83 11 325 253 260 279
## 10 44.0 15.2 -9.9 16.4 -11.3 966 103 53 19 296 179 292 195
## 11 38.9 2.7 18.2 19.0 -5.4 1162 127 83 13 340 251 258 290
## 12 40.4 15.4 -9.2 15.4 -10.4 1122 112 68 14 317 227 317 240
## 13 38.5 2.4 18.2 18.2 -6.0 1162 130 79 15 349 245 245 305
## 14 40.2 2.4 -4.8 19.1 -6.0 1088 113 73 11 308 244 263 250
## 15 37.7 3.3 18.9 18.9 -4.8 1181 131 80 15 355 249 249 297
## 16 37.8 3.3 19.1 19.1 -4.8 1145 124 79 13 336 249 249 284
## 17 41.6 0.6 -6.8 17.7 -8.2 1079 111 68 12 298 224 292 235
## 18 42.2 0.6 -7.0 17.9 -8.4 1043 114 72 13 300 219 272 244
## 19 42.3 16.7 -9.0 16.7 -10.2 1015 103 59 16 301 193 301 209
## 20 37.7 3.4 18.6 19.3 -4.7 1126 120 77 13 327 242 245 284
## 21 41.8 16.0 -9.4 16.0 -10.7 1034 103 59 16 304 196 304 214
## 22 39.4 2.2 -4.8 18.6 -6.1 1151 126 80 13 341 259 259 278
## 23 39.0 1.7 17.6 17.6 -6.8 1165 129 82 14 348 250 250 304
## 24 39.7 3.2 18.6 19.5 -4.7 1166 131 84 14 346 254 259 290
## 25 31.5 2.2 17.1 19.8 -0.6 1131 116 74 12 319 242 246 297
## 26 37.0 15.4 -5.1 17.5 -6.2 1218 114 84 9 333 266 331 270
## 27 34.9 1.4 18.3 20.8 -1.1 1192 118 85 10 334 264 272 312
## 28 31.1 2.6 17.4 20.2 -0.1 1157 115 75 12 326 250 253 306
## 29 36.7 5.1 20.5 20.5 -2.4 1147 121 82 11 326 256 256 293
## 30 40.5 13.1 -3.4 19.8 -4.8 1121 103 78 8 299 250 294 258
## 31 39.0 14.3 -1.7 21.3 -2.9 1104 100 77 7 292 249 284 254
## 32 39.5 13.4 -2.8 20.1 -4.1 1131 103 80 7 300 254 294 260
## 33 37.6 4.5 20.3 20.3 -3.1 1117 115 85 9 310 257 257 283
## 34 28.1 3.1 16.0 19.1 0.4 1094 112 67 13 317 225 248 305
## 35 36.6 0.7 18.4 20.7 -1.9 1166 118 84 9 326 259 265 303
## 36 34.6 1.5 18.2 20.7 -1.1 1229 123 85 10 346 267 272 325
## 37 36.1 1.0 18.6 21.0 -1.6 1122 113 79 10 315 250 252 295
## 38 37.6 3.3 -3.3 19.2 -4.5 1179 112 85 6 312 276 294 279
## 39 40.5 1.3 -5.4 17.8 -6.8 1153 118 83 10 319 253 295 267
## 40 41.9 1.4 -5.5 18.2 -7.0 1221 125 89 10 344 275 303 295
## 41 40.5 18.5 -4.6 18.5 -5.9 1058 100 73 9 289 233 289 239
## 42 41.2 16.1 -8.5 16.1 -9.7 1123 120 67 16 336 216 336 234
## 43 39.8 16.6 -7.0 16.6 -8.3 1151 115 74 13 327 231 327 245
## 44 39.0 2.8 -3.8 19.2 -5.1 1130 114 85 8 313 262 278 273
## 45 41.1 2.4 -4.4 19.1 -5.8 1037 103 73 9 285 227 267 241
## 46 38.9 3.8 -2.7 19.9 -4.0 1098 117 81 10 313 253 258 266
## 47 39.8 3.0 -3.6 19.2 -4.9 1127 123 84 11 326 259 263 272
## 48 40.9 18.7 -5.0 18.7 -6.3 1001 96 68 11 273 211 273 221
## 49 34.4 1.5 18.3 20.9 -1.0 1181 118 82 11 332 258 265 310
## 50 34.9 0.9 17.8 20.2 -1.7 1213 121 85 10 337 269 272 312
## 51 32.9 2.2 18.2 20.8 -0.4 1159 116 78 12 329 251 255 308
## 52 36.4 0.4 18.0 20.4 -2.2 1209 120 88 9 332 274 279 307
## 53 33.3 1.7 17.6 20.2 -0.9 1184 119 80 12 331 257 261 306
## 54 41.2 18.3 -5.2 19.4 -6.5 924 105 50 20 282 164 281 173
## 55 38.8 17.4 -5.9 17.4 -7.0 1271 122 84 11 345 271 345 280
## 56 41.8 17.7 -7.3 17.7 -8.7 998 111 56 19 312 185 312 202
## 57 40.7 19.2 -5.8 19.2 -7.1 928 107 47 22 294 158 294 164
## 58 41.4 16.3 -8.5 16.3 -9.9 1102 125 62 20 349 202 349 221
## 59 40.9 18.3 -6.8 18.3 -8.1 1015 114 52 21 316 177 316 186
## 60 40.9 19.4 -5.9 19.4 -7.2 834 97 43 23 265 143 265 149
## 61 41.2 18.5 -6.5 18.5 -7.8 991 111 53 20 309 178 309 189
## 62 39.6 16.2 -7.4 16.2 -8.6 1142 118 71 14 327 230 327 243
## 63 41.8 17.8 -7.5 17.8 -8.9 1050 117 59 19 328 196 328 212
## 64 41.3 19.5 -4.8 19.5 -6.1 1010 111 58 17 299 192 299 200
## 65 40.8 17.9 -6.4 17.9 -7.7 976 108 56 18 296 184 296 195
## 66 39.9 11.4 -5.2 18.1 -6.5 1217 112 86 8 320 273 319 283
## 67 40.3 17.2 -6.5 17.2 -7.7 1166 114 76 11 316 245 316 257
summary(z)
## ID myVar NA NA
## Min. : 2.00 Min. : 2.0 Min. :41.27 Min. :-73.39
## 1st Qu.:16.00 1st Qu.: 54.0 1st Qu.:41.91 1st Qu.:-72.59
## Median :34.00 Median : 112.0 Median :43.30 Median :-71.59
## Mean :35.75 Mean : 365.2 Mean :43.27 Mean :-71.42
## 3rd Qu.:51.50 3rd Qu.: 453.5 3rd Qu.:44.43 3rd Qu.:-70.69
## Max. :84.00 Max. :3498.0 Max. :46.66 Max. :-67.63
## NA NA NA NA
## Min. : 0.00 Min. : 117 Min. : 3.000 Min. : 7.60
## 1st Qu.: 48.99 1st Qu.: 1288 1st Qu.: 5.850 1st Qu.:11.10
## Median :114.31 Median : 1785 Median : 7.200 Median :11.60
## Mean :181.65 Mean : 2622 Mean : 7.281 Mean :11.53
## 3rd Qu.:260.16 3rd Qu.: 2419 3rd Qu.: 9.350 3rd Qu.:12.25
## Max. :770.43 Max. :17687 Max. :10.200 Max. :13.50
## NA NA NA NA
## Min. :0.2700 Min. : 73.51 Min. :22.70 Min. :-19.40
## 1st Qu.:0.2850 1st Qu.: 88.24 1st Qu.:25.75 1st Qu.:-14.55
## Median :0.2900 Median : 93.66 Median :26.70 Median :-12.40
## Mean :0.2949 Mean : 93.20 Mean :26.50 Mean :-11.98
## 3rd Qu.:0.3100 3rd Qu.: 97.84 3rd Qu.:27.60 3rd Qu.: -8.75
## Max. :0.3200 Max. :107.02 Max. :29.40 Max. : -4.30
## NA NA NA NA
## Min. :28.10 Min. : 0.400 Min. :-9.90 Min. :15.40
## 1st Qu.:36.80 1st Qu.: 2.300 1st Qu.:-5.90 1st Qu.:17.90
## Median :39.00 Median : 4.500 Median :-3.30 Median :19.20
## Mean :38.48 Mean : 8.488 Mean : 3.21 Mean :18.96
## 3rd Qu.:40.85 3rd Qu.:16.150 3rd Qu.:18.10 3rd Qu.:20.20
## Max. :44.00 Max. :19.500 Max. :20.70 Max. :21.30
## NA NA NA NA
## Min. :-11.30 Min. : 834 Min. : 96.0 Min. :43.00
## 1st Qu.: -7.15 1st Qu.:1084 1st Qu.:111.0 1st Qu.:68.00
## Median : -5.40 Median :1142 Median :115.0 Median :79.00
## Mean : -5.16 Mean :1124 Mean :114.9 Mean :75.24
## 3rd Qu.: -2.35 3rd Qu.:1181 3rd Qu.:120.0 3rd Qu.:84.00
## Max. : 0.40 Max. :1271 Max. :131.0 Max. :91.00
## NA NA NA NA
## Min. : 6.00 Min. :265.0 Min. :143.0 Min. :245.0
## 1st Qu.: 9.00 1st Qu.:308.5 1st Qu.:224.5 1st Qu.:260.5
## Median :11.00 Median :326.0 Median :250.0 Median :284.0
## Mean :12.21 Mean :319.6 Mean :240.2 Mean :284.9
## 3rd Qu.:14.00 3rd Qu.:332.5 3rd Qu.:263.0 3rd Qu.:303.5
## Max. :23.00 Max. :355.0 Max. :292.0 Max. :349.0
## NA
## Min. :149.0
## 1st Qu.:239.5
## Median :278.0
## Mean :263.0
## 3rd Qu.:296.0
## Max. :325.0
b: Plot histogram
p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
## Warning: Using `size` aesthetic for lines was
## deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8
## hours.
## Call `lifecycle::last_lifecycle_warnings()`
## to see where this warning was generated.
print(p1)
## Warning: The dot-dot notation (`..density..`) was
## deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8
## hours.
## Call `lifecycle::last_lifecycle_warnings()`
## to see where this warning was generated.
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
c: Add empirical density curve
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
d: Get maximum likelihood parameters for normal
normPars <- fitdistr(z$myVar,"normal")
print(normPars)
## mean sd
## 365.22388 647.02810
## ( 79.04706) ( 55.89471)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 365 647
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 79 55.9
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 6248 0 0 3124
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 67
## $ loglik : num -529
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"]
## mean
## 365.2239
e: Plot normal probability density
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$myVar),len=length(z$myVar))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
f: Plot exponential probability density
expoPars <- fitdistr(z$myVar,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$myVar), args = list(rate=rateML))
p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
g: Plot uniform probability density
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$myVar), args = list(min=min(z$myVar), max=max(z$myVar)))
p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
h: Plot gamma probability density
gammaPars <- fitdistr(z$myVar,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
g: Plot beta probability density
pSpecial <- ggplot(data=z, aes(x=myVar/(max(myVar + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$myVar/max(z$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
## Warning: Removed 2 rows containing missing values or
## values outside the scale range
## (`geom_bar()`).
3: Best-fitting distribution
The gamma density distribution is the most fitting for this sample data.
4: New data set
library(ggplot2)
library(MASS)
z <- rnorm(n=1718, mean=0.2)
z <- data.frame(1:1718, z)
names(z) <- list("ID", "myVar")
z <- z[z$myVar>0,]
str(z)
## 'data.frame': 984 obs. of 2 variables:
## $ ID : int 1 2 4 8 9 11 14 15 16 17 ...
## $ myVar: num 0.915 0.347 0.45 1.077 0.494 ...
summary(z$myVar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0005 0.3417 0.7392 0.8571 1.2572 3.5104
b: Plot histogram
p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
d: Get maximum likelihood parameters for normal
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$myVar),len=length(z$myVar))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
h: Plot gamma probability density
gammaPars <- fitdistr(z$myVar,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
data <- data.frame(x = rnorm(1718))
p <- ggplot(data, aes(x = x)) +
geom_histogram(aes(y = ..density..), binwidth = 0.2, color = "grey60", fill = "cornsilk", size = 0.2) +
stat_function(fun = dnorm, args = list(mean = mean(data$x), sd = sd(data$x)), colour = "red")
print(p)
Original data histogram plot
p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
Original data probability curve
gammaPars <- fitdistr(z$myVar,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better
## value with `binwidth`.
The two histogram profiles differ, the original being skewed right, and the similated one representing a normal distribution. Therefore, it doesn’t seem like the model is accurately simulating realistic data that matches that of the data set I imported.