#!/usr/bin/Rscript
data <- data.frame(t(read.table("ex43_microarray_processing.data", header=T, row.names=1)))
fm <- lm(data$Costs ~ data$Arrays)
plot(data, pch=19)
abline(fm, col="red")
print(summary(fm))
cat("a) Use linear regression to estimate the cost of processing a single array.\n\n ",
"cost(n) =", coef(fm)[1], "+", "n *", coef(fm)[2], "\n\n")
cat("b) Interpret each component of the regression equation. What does the y-intercept mean in the
context of this problem? What does the slope mean in the context of this problem? How can you
use this information to get a more complete picture of the cost of microarry processing?
intercept: fixed costs, regardless of the amount of processed arrays
slope: slope is lower than 1, so if more arrays get processed then it is less expensive for each array.
")
estimated_cost <- function(n) coef(fm)[1] + n*coef(fm)[2]
cat("c) How much will it cost to process 643 arrays in one month? What error do you expect\n",
" for your prediction?\n\n ",
"cost(643) =", round(estimated_cost(643)), "( +/-", round(sd(resid(fm))), "[= standard deviation])\n\n")