flights13<-fread("flights13.csv") #(1) library(data.table) #(a) flights13<-flights13[,-1] #(b) flights13<-na.omit(flights13) #(c) ans<-100*flights13[, sum( (arr_delay+ dep_delay) < 0 )]/nrow(flights13) ans #(d) ans<-flights13[, .(perc_trips = 100*.N/nrow(flights13)), by = origin] ans #(e) ans<-flights13[, .(perc_trips = 100*.N/nrow(flights13)), keyby = .(origin, month)] ans #(f) ans[order(perc_trips, decreasing=T)] #(g) ans<-flights13[, .(mean.delay=mean( (arr_delay+ dep_delay))), by = carrier] ans[order(mean.delay, decreasing=T)] #(h) ans<-flights13[carrier == "AS",.(mean.arr.delay=mean(arr_delay), mean.dep.delay=mean(dep_delay)), by = .(origin, dest, month)][order(month)] ans #(i) ans<-flights13[carrier == "UA",lapply(.SD, mean),by = .(origin, month),.SDcols = c("arr_delay", "dep_delay")][order(arr_delay, dep_delay)] ans #(j) ans1<-flights13[month == 12 & day==24 & dest == "LAX" & sched_dep_time < 1800] ans2<-flights13[month == 12 & day==24 & dest == "LAX" & sched_dep_time < 1800, .(total_delay=arr_delay+ dep_delay)] ans<-cbind(ans1, ans2) ans<-ans[order(total_delay)] ans #(2) #(a) ggplot(ans, aes(x = arr_delay, y = dep_delay, color = carrier)) + geom_point() + geom_smooth(method="lm", se=FALSE) #(b) ggplot(ans, aes(x = arr_delay, y = dep_delay)) + geom_point() + geom_smooth(method="lm", se=FALSE) + facet_grid(~carrier) # (c) ans$total_delay_bin<-cut(ans$total_delay, breaks=c(-Inf, 0, Inf), labels = c("< 0", ">=0")) ggplot(ans, aes(x = carrier, fill = total_delay_bin)) + geom_bar(position = "fill") ggplot(ans, aes(x = carrier, fill = total_delay_bin)) + geom_bar(position = "dodge") # (d) library(dplyr) library(ggrepel) best_in_carrier <- ans %>% group_by(carrier) %>% filter(row_number(total_delay) == 1) ggplot(ans, aes(x = arr_delay, y = dep_delay)) + geom_point(aes(color = carrier)) + geom_text_repel(data = best_in_carrier, aes(label = flight)) # (e) ggplot(ans, aes(x = arr_delay, y = dep_delay)) + geom_jitter(aes(col=carrier, size=total_delay)) # (f) flights13$total_delay<-flights13$arr_delay+flights13$dep_delay ggplot(flights13[dest == "LAX"], aes(carrier, total_delay)) + geom_boxplot(varwidth=T, fill="plum") # (g) ggplot(flights13[dest == "LAX"], aes(carrier, total_delay)) + geom_boxplot(aes(fill=factor(origin))) # (h) ggplot(flights13[dest == "LAX"], aes(x = "", fill = factor(carrier))) + geom_bar(width = 1) + coord_polar(theta = "y", start=0) # (i) new<-flights13[dest == "LAX", .(mean.delay=mean(total_delay)), by=month] ggplot(new, aes(x=month)) + geom_line(aes(y=mean.delay)) + scale_x_discrete(name ="Month", limits=1:12) # (j) new2<-flights13[dest == "LAX", .(mean.delay=mean(total_delay)), by=.(month, carrier)] ggplot(new2, aes(x=month)) + geom_line(aes(y=mean.delay, col=carrier)) + scale_x_discrete(name ="Month", limits=1:12)