R Markdown Notebooks
This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code. Code can be placed in code chunks and run indepedently.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
Note: comments start with a #. It’s a great idea to comment your # code.
# simple R code chunk
x <- 2
y <- 5
x + y
[1] 7
# Load necessary libraries
# most of the data we will use exists inside these 2 libraries
library('alr4')
library('ISLR')
library('ggplot2')
# store myheight (mother-daugher height data) as a global variable
myH <- Heights
# Note: for the UN problem on the homework, the data file is UN11
# Summary Statistics
# var() calculates the variance
# $ accesses variables/columns of data
var(myH$dheight)
[1] 6.760274
# mean() calculates the mean
mean(myH$mheight)
[1] 62.4528
# creating a summary graph (predictor vs response)
plot(myH$dheight, myH$mheight)
Now, let’s shift our attention to fuel2001
data.
# save myfuel as global variable
myfuel <- fuel2001
# Plotting predictor vs response
# Size
plot(myfuel$Income, myfuel$Drivers) # default plot
plot(myfuel$Income, myfuel$Drivers, xlim=c(0,40000))
# Transformations
plot(myfuel$Income, myfuel$Miles )
plot(myfuel$Income, log(myfuel$Miles) )
# Scatterplot Matrices
plot(myfuel)
# Adding new variables (based on the predictors)
# to the data. These are called regressors.
myfuel$FuelPerP <- 1000 * myfuel$FuelC / myfuel$Pop
# Note: we will not be using FuelC or Pop to predict/model Fuel
# --- Here is a good place to stop by Monday
# One of the most powerful functions
?which
# Find all (pop1 stores these indices)
pop1 <- which(myfuel$Pop > 5000000)
# Note: million could also be written as 5e6
# plot
plot(myfuel$Pop[pop1], myfuel$Tax[pop1])
# more advanced plotting
ggplot(data = myfuel,
aes(x = Pop, y = Tax, label=rownames(myfuel))) +
geom_point() #+
#geom_text() +
#ylim(c(20, 30))
LS0tCnRpdGxlOiAiQXBwbGllZCBMaW5lYXIgU3RhdGlzdGljYWwgTW9kZWxzIgpzdWJ0aXRsZTogIjAxIC0gSW50cm9kdWN0aW9uIHRvIFIgYW5kIFJTdHVkaW8iCmF1dGhvcjogPGI+IDxhIGhyZWY9Imh0dHA6Ly93d3cubWJnbWF0aC5jb20iPiBNYXJpbyBCYW51ZWxvcyA8L2E+IDwvYj4KI2RhdGU6IG1iYW51ZWxvczIyQGNzdWZyZXNuby5lZHUKb3V0cHV0OiAKICAjIHVuY29tbWVudCB0byB0b2dnbGUgYmV0d2VlbiBwcmVzZW50YXRpb24gYW5kIGh0bWxfbm90ZWJvb2sKICBodG1sX25vdGVib29rCiAgI2lvc2xpZGVzX3ByZXNlbnRhdGlvbgotLS0KCjwhLS0Kc29tZSB1c2VmdWwgY29sb3JzIGhleCB2YWx1ZXM6CiAgZGFyayBzb2xhcml6ZWQ6ICMwMDJiMzYKICBvcmFuZ2U6ICNmZmMyMGEKICBzb2xhcml6ZWQgcmVkOiAjZGMzMjJmCi0tPgoKIyMgV2hhdCBpcyBSPwpSIGlzIGEgZnJlZSwgb3Blbi1zb3VyY2Ugc29mdHdhcmUgYW5kIHByb2dyYW1taW5nIGxhbmd1YWdlIGRldmVsb3BlZCBpbiAxOTk1IGF0IHRoZSBVbml2ZXJzaXR5IG9mIEF1Y2tsYW5kIGFzIGFuIGVudmlyb25tZW50IGZvciBzdGF0aXN0aWNhbCBjb21wdXRpbmcgYW5kIGdyYXBoaWNzIDxhIGhyZWY9Imh0dHBzOi8vd3d3LnN0YXQuYXVja2xhbmQuYWMubnovfmloYWthL2Rvd25sb2Fkcy9SLXBhcGVyLnBkZiI+IChJa2FoYSBhbmQgR2VudGxlbWFuLCAxOTk2KSA8L2E+LiBTaW5jZSB0aGVuLCBSIGhhcyBiZWNvbWUgYSBwb3B1bGFyIHNvZnR3YXJlIGVudmlyb25tZW50IGZvciBkYXRhIGFuYWx5c2lzIGluIGEgdmFyaWV0eSBvZiBkaXNjaXBsaW5lcy4KCiMjIyBXaHkgU2hvdWxkIEkgTGVhcm4gUj8KCiogSXQgaXMgZnJlZSBhbmQgaXQgaXMgYSB3aWRlbHkgdXNlZCBzdGF0aXN0aWNhbCBwcm9ncmFtbWluZyBsYW5ndWFnZSEKKiBSZXByb2R1Y2libGUgUmVzZWFyY2gKKiA8YSBocmVmPSJodHRwczovL2NyYW4uci1wcm9qZWN0Lm9yZy93ZWIvdmlld3MvIj4gQ29tbXVuaXR5IFJlc291cmNlcyA8L2E+CiogTGVhcm5pbmcgUmVzb3VyY2VzCiAgKiBJU0xSIFRleHRib29rCiAgKiA8YSBocmVmPSJodHRwczovL3JzdHVkaW8uY29tL3Jlc291cmNlcy9jaGVhdHNoZWV0cy8iPiBSU3R1ZGlvIENoZWF0IFNoZWV0cyA8L2E+CiAgKiA8YSBocmVmPSJodHRwczovL3d3dy51ZGVteS5jb20vY291cnNlL3ItYmFzaWNzLyI+IFIgQmFzaWNzIDwvYT4KICAqIDxhIGhyZWY9Imh0dHBzOi8vZ2dwbG90Mi50aWR5dmVyc2Uub3JnL2luZGV4Lmh0bWwiPiBnZ3Bsb3QyIDwvYT4KCiMjIFJTdHVkaW8KClJTdHVkaW8gaGFzIDQgbWFpbiB3aW5kb3cgcGFuZXMgdGhhdCBhbGxvd3MgZm9yIAoKICogKlNjcmlwdCAvIEVkaXRpbmcgUGFuZSoKICAgICogVGhpcyBhcmVhIGFsbG93cyB5b3UgdG8gd3JpdGUgc2NyaXB0cyBvciBlbnRpcmUgUiBOb3RlYm9va3MKICogKkNvbnNvbGUqCiAgICAqIFRoaXMgaW50ZXJhY3RpdmUgYXJlYSBleGVjdXRlcyBjb2RlIChsaW5lLWJ5LWxpbmUpIGFzIHlvdSB0eXBlIGl0CiAqICpGaWxlIE5hdmlnYXRvciAvIFBsb3QgVmlld2VyIC8gSGVscCBQYW5lKgogICAgKiBTaG93cyB5b3VyIGN1cnJlbnQgd29ya2luZyBmb2xkZXIgYW5kIGRpc3BsYXlzIHBsb3RzICsgaGVscAogKiAqRW52aXJvbm1lbnQgUGFuZSoKICAgICogU2hvd3MgYW55IGdsb2JhbCB2YXJpYWJsZXMgdGhhdCB5b3UgY2FuIGFjY2VzcyAvIG1hbmlwdWxhdGUKICAgIAogICAgCiMjIyBSIC0gQ29uc29sZQpXZSB3aWxsIHdyaXRlIHNvbWUgY29kZSB0byBleGVjdXRlIGluIHRoZSBSIENvbnNvbGUsIGNvdmVyaW5nOgoKICAqIFNpbXBsZSBBcml0aG1ldGljCiAgKiBWYXJpYWJsZXMgYW5kIFZhcmlhYmxlIEFzc2lnbm1lbnQKICAqIEZ1bmN0aW9ucywgUGxvdHRpbmcsIGFuZCBIZWxwCiAgKiBJbmRleGluZyBhbmQgdGhlIEdsb2JhbCBFbnZpcm9ubWVudAoKLS0tCgoqKlByYWN0aWNlOioqIFVzZSBgUmAgKGluIHRoZSBjb25zb2xlKSB0byBjb21wbGV0ZSB0aGUgZm9sbG93aW5nOgoKICAxLiBDcmVhdGUgYSB2YXJpYWJsZSBgeGAgYW5kIHZhcmlhYmxlIGB5YCBhbmQgYWRkIHRoZW0gdG9nZXRoZXIuIFNhdmUgdGhlIHJlc3VsdCB0byBhIG5ldyB2YXJpYWJsZSBgemAKICAyLiBDcmVhdGUgYSAqc3VtbWFyeSBncmFwaCogb2YgeW91ciBHUEEgdnMgIyBvZiBVbml0cyB5b3UgaGF2ZSB0YWtlbiBhcyBhIHN0dWRlbnQuCgojIyMgUiBTY3JpcHRzCkxldCdzIGNvbWJpbmUgYWxsIHdlIGNvdmVyZWQgaW50byBhIHNjcmlwdCAoc28gdGhhdCB3ZSBjYW4gcnVuIGl0IGxhdGVyKS4gV2UgY2FuIGNob29zZSB0byBydW4gc2NyaXB0czoKCiogbGluZSBieSBsaW5lCiogaW4gc2VjdGlvbnMKKiBlbnRpcmVseQoKIyMgUiBNYXJrZG93biBOb3RlYm9va3MKClRoaXMgaXMgYW4gW1IgTWFya2Rvd25dKGh0dHA6Ly9ybWFya2Rvd24ucnN0dWRpby5jb20pIE5vdGVib29rLiBXaGVuIHlvdSBleGVjdXRlIGNvZGUgd2l0aGluIHRoZSBub3RlYm9vaywgdGhlIHJlc3VsdHMgYXBwZWFyIGJlbmVhdGggdGhlIGNvZGUuIENvZGUgY2FuIGJlIHBsYWNlZCBpbiBjb2RlIGNodW5rcyBhbmQgcnVuIGluZGVwZWRlbnRseS4KClRyeSBleGVjdXRpbmcgdGhpcyBjaHVuayBieSBjbGlja2luZyB0aGUgKlJ1biogYnV0dG9uIHdpdGhpbiB0aGUgY2h1bmsgb3IgYnkgcGxhY2luZyB5b3VyIGN1cnNvciBpbnNpZGUgaXQgYW5kIHByZXNzaW5nICpDbWQrU2hpZnQrRW50ZXIqLiAKCmBgYHtyfQpwbG90KGNhcnMpCmBgYAoKQWRkIGEgbmV3IGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqSW5zZXJ0IENodW5rKiBidXR0b24gb24gdGhlIHRvb2xiYXIgb3IgYnkgcHJlc3NpbmcgKkNtZCtPcHRpb24rSSouCgpXaGVuIHlvdSBzYXZlIHRoZSBub3RlYm9vaywgYW4gSFRNTCBmaWxlIGNvbnRhaW5pbmcgdGhlIGNvZGUgYW5kIG91dHB1dCB3aWxsIGJlIHNhdmVkIGFsb25nc2lkZSBpdCAoY2xpY2sgdGhlICpQcmV2aWV3KiBidXR0b24gb3IgcHJlc3MgKkNtZCtTaGlmdCtLKiB0byBwcmV2aWV3IHRoZSBIVE1MIGZpbGUpLiAKClRoZSBwcmV2aWV3IHNob3dzIHlvdSBhIHJlbmRlcmVkIEhUTUwgY29weSBvZiB0aGUgY29udGVudHMgb2YgdGhlIGVkaXRvci4gQ29uc2VxdWVudGx5LCB1bmxpa2UgKktuaXQqLCAqUHJldmlldyogZG9lcyBub3QgcnVuIGFueSBSIGNvZGUgY2h1bmtzLiBJbnN0ZWFkLCB0aGUgb3V0cHV0IG9mIHRoZSBjaHVuayB3aGVuIGl0IHdhcyBsYXN0IHJ1biBpbiB0aGUgZWRpdG9yIGlzIGRpc3BsYXllZC4KCk5vdGU6IGNvbW1lbnRzIHN0YXJ0IHdpdGggYSAjLiBJdCdzIGEgZ3JlYXQgaWRlYSB0byBjb21tZW50IHlvdXIgIyBjb2RlLgoKYGBge3J9CiMgc2ltcGxlIFIgY29kZSBjaHVuawp4IDwtIDIKeSA8LSA1CnggKyB5CmBgYAoKCmBgYHtyIG1lc3NhZ2U9RkFMU0V9CiMgTG9hZCBuZWNlc3NhcnkgbGlicmFyaWVzCiMgbW9zdCBvZiB0aGUgZGF0YSB3ZSB3aWxsIHVzZSBleGlzdHMgaW5zaWRlIHRoZXNlIDIgbGlicmFyaWVzCmxpYnJhcnkoJ2FscjQnKQpsaWJyYXJ5KCdJU0xSJykKbGlicmFyeSgnZ2dwbG90MicpCgojIHN0b3JlIG15aGVpZ2h0IChtb3RoZXItZGF1Z2hlciBoZWlnaHQgZGF0YSkgYXMgYSBnbG9iYWwgdmFyaWFibGUKbXlIIDwtIEhlaWdodHMKCiMgTm90ZTogZm9yIHRoZSBVTiBwcm9ibGVtIG9uIHRoZSBob21ld29yaywgdGhlIGRhdGEgZmlsZSBpcyBVTjExCgojIFN1bW1hcnkgU3RhdGlzdGljcwojIHZhcigpIGNhbGN1bGF0ZXMgdGhlIHZhcmlhbmNlCiMgJCBhY2Nlc3NlcyB2YXJpYWJsZXMvY29sdW1ucyBvZiBkYXRhCnZhcihteUgkZGhlaWdodCkgCgojIG1lYW4oKSBjYWxjdWxhdGVzIHRoZSBtZWFuCm1lYW4obXlIJG1oZWlnaHQpCgojIGNyZWF0aW5nIGEgc3VtbWFyeSBncmFwaCAocHJlZGljdG9yIHZzIHJlc3BvbnNlKQpwbG90KG15SCRkaGVpZ2h0LCBteUgkbWhlaWdodCkKYGBgCgpOb3csIGxldCdzIHNoaWZ0IG91ciBhdHRlbnRpb24gdG8gYGZ1ZWwyMDAxYCBkYXRhLgoKYGBge3IsIG1lc3NhZ2UgPSBGQUxTRX0KIyBzYXZlIG15ZnVlbCBhcyBnbG9iYWwgdmFyaWFibGUKbXlmdWVsIDwtIGZ1ZWwyMDAxCgojIFBsb3R0aW5nIHByZWRpY3RvciB2cyByZXNwb25zZQojIFNpemUKcGxvdChteWZ1ZWwkSW5jb21lLCBteWZ1ZWwkRHJpdmVycykgIyBkZWZhdWx0IHBsb3QKcGxvdChteWZ1ZWwkSW5jb21lLCBteWZ1ZWwkRHJpdmVycywgeGxpbT1jKDAsNDAwMDApKSAKCiMgVHJhbnNmb3JtYXRpb25zCnBsb3QobXlmdWVsJEluY29tZSwgbXlmdWVsJE1pbGVzICkKcGxvdChteWZ1ZWwkSW5jb21lLCBsb2cobXlmdWVsJE1pbGVzKSApCgojIFNjYXR0ZXJwbG90IE1hdHJpY2VzCnBsb3QobXlmdWVsKQojIEFkZGluZyBuZXcgdmFyaWFibGVzIChiYXNlZCBvbiB0aGUgcHJlZGljdG9ycykKIyB0byB0aGUgZGF0YS4gVGhlc2UgYXJlIGNhbGxlZCByZWdyZXNzb3JzLgpteWZ1ZWwkRnVlbFBlclAgPC0gMTAwMCAqIG15ZnVlbCRGdWVsQyAvIG15ZnVlbCRQb3AKCiMgTm90ZTogd2Ugd2lsbCBub3QgYmUgdXNpbmcgRnVlbEMgb3IgUG9wIHRvIHByZWRpY3QvbW9kZWwgRnVlbAoKIyAtLS0gSGVyZSBpcyBhIGdvb2QgcGxhY2UgdG8gc3RvcCBieSBNb25kYXkKCiMgT25lIG9mIHRoZSBtb3N0IHBvd2VyZnVsIGZ1bmN0aW9ucwo/d2hpY2gKCiMgRmluZCBhbGwgKHBvcDEgc3RvcmVzIHRoZXNlIGluZGljZXMpCnBvcDEgPC0gd2hpY2gobXlmdWVsJFBvcCA+IDUwMDAwMDApIAojIE5vdGU6ICBtaWxsaW9uIGNvdWxkIGFsc28gYmUgd3JpdHRlbiBhcyA1ZTYKCiMgcGxvdCAKcGxvdChteWZ1ZWwkUG9wW3BvcDFdLCBteWZ1ZWwkVGF4W3BvcDFdKQoKIyBtb3JlIGFkdmFuY2VkIHBsb3R0aW5nCmdncGxvdChkYXRhID0gbXlmdWVsLCAKICAgICAgIGFlcyh4ID0gUG9wLCB5ID0gVGF4LCBsYWJlbD1yb3duYW1lcyhteWZ1ZWwpKSkgKwogIGdlb21fcG9pbnQoKSAjKwogI2dlb21fdGV4dCgpICsKICN5bGltKGMoMjAsIDMwKSkKYGBgCgo=