## ----load-data, eval = TRUE---------------------------------------------- cdc <- read.csv("http://www.jkarreth.net/files/cdc.csv") ## ----names, eval = TRUE-------------------------------------------------- names(cdc) ## ----head, eval = TRUE--------------------------------------------------- head(cdc) ## ----tail, eval = TRUE--------------------------------------------------- tail(cdc) ## ----summary-weight, eval = TRUE----------------------------------------- summary(cdc$weight) ## ----weight-range-arith, eval = TRUE------------------------------------- 190 - 140 ## ----weight-mean-var-median, eval = TRUE--------------------------------- mean(cdc$weight) var(cdc$weight) median(cdc$weight) ## ----table-smoke, eval = TRUE-------------------------------------------- table(cdc$smoke100) ## ----table-smoke-prop, eval = TRUE--------------------------------------- table(cdc$smoke100) / nrow(cdc) ## ----table-smoke-barplot, eval = TRUE------------------------------------ barplot(table(cdc$smoke100)) ## ----table-smoke-barplot-twosteps, eval = TRUE--------------------------- smoke <- table(cdc$smoke100) barplot(smoke) ## ----table-smoke-gender, eval = TRUE------------------------------------- table(cdc$gender, cdc$smoke100) ## ----mosaic-smoke-gender, eval = TRUE------------------------------------ mosaicplot(table(cdc$gender, cdc$smoke100)) ## ----proptable-smoke-gender, eval = TRUE--------------------------------- prop.table(table(cdc$gender, cdc$smoke100), margin = 1) ## ----dim, eval = TRUE---------------------------------------------------- dim(cdc) ## ----cdc-row567-column6, eval = TRUE------------------------------------- cdc[567, 6] ## ----names-again, eval = TRUE-------------------------------------------- names(cdc) ## ----first-10-rows-sixth-column, eval = TRUE----------------------------- cdc[1:10, 6] ## ----numbers-1to10, eval = TRUE------------------------------------------ 1:10 ## ----first-10-rows, eval = TRUE------------------------------------------ cdc[1:10, ] ## ----6th-column, eval = FALSE-------------------------------------------- ## cdc[ ,6] ## ----weight, eval = FALSE------------------------------------------------ ## cdc$weight ## ----weight-567, eval = TRUE--------------------------------------------- cdc$weight[567] ## ----weight-first10, eval = TRUE----------------------------------------- cdc$weight[1:10] ## ----true-male, eval = FALSE--------------------------------------------- ## cdc$gender == "m" ## ----true-over30, eval = FALSE------------------------------------------- ## cdc$age > 30 ## ----males, eval = TRUE-------------------------------------------------- mdata <- subset(cdc, cdc$gender == "m") ## ----head-males, eval = TRUE--------------------------------------------- head(mdata) ## ----males-and-over30, eval = TRUE--------------------------------------- m_and_over30 <- subset(cdc, gender == "m" & age > 30) ## ----males-or-over30, eval = TRUE---------------------------------------- m_or_over30 <- subset(cdc, gender == "m" | age > 30) ## ----boxplot-height, eval = TRUE----------------------------------------- boxplot(cdc$height) ## ----summary-height, eval = TRUE----------------------------------------- summary(cdc$height) ## ----boxplot-height-gender, eval = TRUE---------------------------------- boxplot(cdc$height ~ cdc$gender) ## ----boxplot-bmi, eval = TRUE-------------------------------------------- cdc$bmi <- (cdc$weight / cdc$height^2) * 703 boxplot(cdc$bmi ~ cdc$genhlth) ## ----hist-age, eval = TRUE----------------------------------------------- hist(cdc$age) ## ----hist-bmi, eval = TRUE----------------------------------------------- hist(cdc$bmi) hist(cdc$bmi, breaks = 50) ## ----hgt-wgt_base, eval = TRUE------------------------------------------- with(cdc[cdc$gender == "m", ], plot(x = height, y = weight, col = "blue")) with(cdc[cdc$gender == "f", ], points(x = height, y = weight, col = "red")) ## ----hgt-wgt_gg, eval = TRUE--------------------------------------------- library(ggplot2) ggplot(data = cdc, aes(x = height, y = weight, color = gender)) + geom_point()