Cross-Validation is a data partitioning method that can be used to:

Modelling the correct functional form

library(DAAG);attach(ironslag);summary(chemical);summary(magnetic)
The following objects are masked from ironslag (pos = 5):

    chemical, magnetic
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   10.0    16.0    21.0    21.1    25.0    32.0 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   10.0    16.0    20.0    20.8    23.0    40.0 
a <- seq(10, 40, .1)    #sequence for plotting fits
L1 <- lm(magnetic ~ chemical)
yhat1 <- L1$coef[1] + L1$coef[2] * a
L2 <- lm(magnetic ~ chemical + I(chemical^2))
yhat2 <- L2$coef[1] + L2$coef[2] * a + L2$coef[3] * a^2
L3 <- lm(log(magnetic) ~ chemical)
logyhat3 <- L3$coef[1] + L3$coef[2] * a
yhat3 <- exp(logyhat3)
L4 <- lm(log(magnetic) ~ log(chemical))
yhat4 <- L4$coef[1] + L4$coef[2] * log(a)
par(mfrow = c(2, 2))    #layout for graphs
plot(chemical, magnetic, main="Linear", pch=16)
lines(a, yhat1, lwd=2)
plot(chemical, magnetic, main="Quadratic", pch=16)
lines(a, yhat2, lwd=2)
plot(chemical, magnetic, main="Exponential", pch=16)
lines(a, yhat3, lwd=2)
plot(log(chemical), log(magnetic), main="Log-Log", pch=16)
lines(log(a), yhat4, lwd=2)
par(mfrow = c(1, 1))    #restore display

Model selection (functional form): N-fold cross-validation with leave-one-out samples

    n <- length(magnetic)   #in DAAG ironslag
    e1 <- e2 <- e3 <- e4 <- numeric(n)
    # for n-fold cross validation
    # fit models on leave-one-out samples
    for (k in 1:n) {
        y <- magnetic[-k]
        x <- chemical[-k]
        J1 <- lm(y ~ x)
        yhat1 <- J1$coef[1] + J1$coef[2] * chemical[k]
        e1[k] <- magnetic[k] - yhat1
        J2 <- lm(y ~ x + I(x^2))
        yhat2 <- J2$coef[1] + J2$coef[2] * chemical[k] +
                J2$coef[3] * chemical[k]^2
        e2[k] <- magnetic[k] - yhat2
        J3 <- lm(log(y) ~ x)
        logyhat3 <- J3$coef[1] + J3$coef[2] * chemical[k]
        yhat3 <- exp(logyhat3)
        e3[k] <- magnetic[k] - yhat3
        J4 <- lm(log(y) ~ log(x))
        logyhat4 <- J4$coef[1] + J4$coef[2] * log(chemical[k])
        yhat4 <- exp(logyhat4)
        e4[k] <- magnetic[k] - yhat4
    }

Best model based on RMSE: quadratic model2

    c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2)) 
[1] 19.56 17.85 18.44 20.45

Model selection (functional form): Leave-One-Out Cross-Validation (LOOC, library: boot)

library(ISLR)
library(boot)
glm.fit=glm(mpg~horsepower, data=Auto)
summary(glm.fit)

Call:
glm(formula = mpg ~ horsepower, data = Auto)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-13.571   -3.259   -0.344    2.763   16.924  

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 39.93586    0.71750    55.7   <2e-16 ***
horsepower  -0.15784    0.00645   -24.5   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for gaussian family taken to be 24.07)

    Null deviance: 23819.0  on 391  degrees of freedom
Residual deviance:  9385.9  on 390  degrees of freedom
AIC: 2363

Number of Fisher Scoring iterations: 2
cv.glm(Auto,glm.fit)$delta 
[1] 24.23 24.23
##Faster implementation function using leverage residuals
locv=function(fit){
  h=lm.influence(fit)$h
  mean((residuals(fit)/(1-h))^2)
}
##Now we try it out
cv.error=rep(0,4)
degree=1:4
for(d in degree){
  glm.fit=glm(mpg~poly(horsepower,d), data=Auto)
  cv.error[d]=locv(glm.fit)
}
plot(degree,cv.error,type="b",col="blue")

cv.error
[1] 24.23 19.25 19.33 19.42

Model selection (functional form):k-fold ==> 10-fold CV

degree=1:4
cv.error10=rep(0,4)
for(d in degree){
  glm.fit=glm(mpg~poly(horsepower,d), data=Auto)
  cv.error10[d]=cv.glm(Auto,glm.fit,K=10)$delta[1]
}
cv.error10
[1] 24.27 19.25 19.41 19.66
plot(degree,cv.error,type="b",col="blue")
lines(degree,cv.error10,type="b",col="red")