#/# PART 1 -Dataset for OSSC-Hazard Ratio/. |
Input =(" |
Age Treat Dist Survival Time Hazard |
59 1 1 0 48 0.2019 |
62 1 1 0 132 0.1895 |
66 1 2 1 13 0.3341 |
53 1 1 0 132 0.2292 |
60 1 1 0 120 0.1977 |
30 1 1 1 8 0.3729 |
|
|
63 1 1 0 72 0.1855 |
63 0 1 1 2 0.5061 |
71 0 1 1 1 0.4273 |
81 0 1 1 12 0.3459 |
43 0 1 0 24 0.7726 |
71 1 1 0 24 0.1566 |
69 1 1 0 24 0.1634 |
57 1 1 1 6 0.2106 |
78 1 1 0 24 0.1351 |
") |
data = read.table(textConnection(Input),header=TRUE) |
print (data) |
# PART 2 -PERFORMING BOOTSTRAP PROCEDURE |
mydata <- rbind.data.frame(data, stringsAsFactors = FALSE) |
iboot <- sample(1:nrow(mydata), size=1000, replace = TRUE) |
bootdata <- mydata[iboot,] |
print (bootdata) |
# PART 3- RANDOMLY SPLIT THE DATA INTO 70:30 |
#70 PERCENT OF THE DATA AT OUR DISPOSAL TO TRAIN DATASET |
#30 PERCENT TO TEST DATASET |
#THIS PROCEDURE INVOLVING THE BOOTSTRAP DATA |
index = sample(1:nrow(bootdata),round(0.70nrow(bootdata))) |
train_data <- as.data.frame(bootdata[index,]) |
test_data <- as.data.frame(bootdata[-index,]) |
# PART 4-BUILD THE MODEL ON TRAINING DATA |
# Prediction MSE of the model using the testing dataset |
Model3 <- lm(Hazard~Age+Treat+Dist, data=train_data) # build the model |
summary(Model3) |
test <- data[-index,] |
predict_lm <- predict(Model3,test) |
MSE.lm <- sum((predict_lm - test$Hazard)^2)/nrow(test) |
MSE.lm |
# PART 5- MODEL EVALUATION |
test_data$PredictedHazard <- predict(Model3, test_data) |
distPred <- predict(Model3, test_data) |
preds <- predict(Model3, test_data) |
modelEval <- cbind(test_data$Hazard, preds) |
colnames(modelEval) <- c(‘Actual’,’Predicted’) |
modelEval <- as.data.frame(modelEval) |
print (modelEval,max=15) |