Setting

rm(list=ls(all=TRUE))
setwd('C:/Users/sitdo/Documents/GitHub/IBD-EDA/paper1/')

Loading Data

library(dplyr)

载入程辑包:‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
data <- read.csv("./data_preprocessed/data.csv") %>% select(-1)

Installing Packages

library(e1071)

Method I: Splitting Data

set.seed(123)
splitting_ratio <- 0.7

indices <- 1:nrow(data)
shuffled_indices <- sample(indices) 
train_size <- floor(splitting_ratio * length(indices))

train_indices <- shuffled_indices[1:train_size]
test_indices <- shuffled_indices[(train_size + 1):length(indices)]

train_data <- data[train_indices, ]
test_data <- data[test_indices, ]

Building Model

linear_svm_model <- svm(dod ~ ., data = train_data, kernel = "linear")

predictions <- predict(linear_svm_model, newdata = test_data)

Performance

Confusion Matrix

confusion_matrix <- table(
  as.numeric(test_data$dod), as.numeric(ifelse(predictions > 0.5, 1, 0))
)

TP <- confusion_matrix[1, 1]
TN <- confusion_matrix[2, 2]
FP <- confusion_matrix[2, 1]
FN <- confusion_matrix[1, 2]

## Calculate Accuracy
accuracy <- (TP + TN) / (TP + FP + TN + FN)
cat("Accuracy:", accuracy, "\n")
Accuracy: 0.8595041 
## Calculate Recall
recall <- TP / (TP + FN)
cat("Recall:", recall, "\n")
Recall: 0.9641694 
## Calculate Precision
precision <- TP / (TP + FP)
cat("Precision:", precision, "\n")
Precision: 0.8809524 
## Calculate Specificity
specificity <- TN / (TN + FP)
cat("Specificity:", specificity, "\n")
Specificity: 0.2857143 
## Calculate F1 Score
f1_score <- 2 * (precision * recall) / (precision + recall)
cat("F1 Score:", f1_score, "\n")
F1 Score: 0.9206843 

ROC Curve

library(pROC)
Type 'citation("pROC")' for a citation.

载入程辑包:‘pROC’

The following objects are masked from ‘package:stats’:

    cov, smooth, var
# Calculate ROC curve using the actual values and predictions
roc_obj <- roc(
  as.numeric(test_data$dod), predictions
)
Setting levels: control = 0, case = 1
Setting direction: controls < cases
# Plot the ROC curve
plot(
  roc_obj,
  col = "blue",
  main = "ROC Curve - Linear SVM",
  legacy.axes = TRUE,
  print.auc = TRUE,
  print.thres = TRUE,
  grid = c(0.2, 0.2),
  grid.col = c("green", "orange")
)

Method II: Cross Validation

# Perform 10-fold cross-validation
num_folds <- 10
folds <- cut(seq(1, nrow(data)), breaks = num_folds, labels = FALSE)

# Create empty vectors to store the predictions and actual values
all_predictions <- vector()
all_actuals <- vector()

for (i in 1:num_folds) {
  # Split the data into training and test sets for the current fold
  train_data <- data[folds != i, ]
  test_data <- data[folds == i, ]
  
  train_X <- as.matrix(train_data[, -1])
  train_y <- train_data[, 1]
  test_X <- as.matrix(test_data[, -1])
  test_y <- test_data[, 1]

  # Train a Linear SVM model
  linear_svm_model <- svm(dod ~ ., data = train_data, kernel = "linear")
  
  # Make predictions using the trained models
  predictions <- predict(linear_svm_model, newdata = test_data)
  
  # Append the predictions and actual values to the vectors
  all_predictions <- c(all_predictions, predictions)
  all_actuals <- c(all_actuals, test_y)
}

Performance

Confusion Matrix

confusion_matrix <- table(
  as.numeric(all_actuals), 
  as.numeric(ifelse(all_predictions > 0.5, 1, 0))
)

TP <- confusion_matrix[1, 1]
TN <- confusion_matrix[2, 2]
FP <- confusion_matrix[2, 1]
FN <- confusion_matrix[1, 2]

## Calculate Accuracy
accuracy <- (TP + TN) / (TP + FP + TN + FN)
cat("Accuracy:", accuracy, "\n")
Accuracy: 0.8465039 
## Calculate Recall
recall <- TP / (TP + FN)
cat("Recall:", recall, "\n")
Recall: 0.9567992 
## Calculate Precision
precision <- TP / (TP + FP)
cat("Precision:", precision, "\n")
Precision: 0.8732079 
## Calculate Specificity
specificity <- TN / (TN + FP)
cat("Specificity:", specificity, "\n")
Specificity: 0.2552632 
## Calculate F1 Score
f1_score <- 2 * (precision * recall) / (precision + recall)
cat("F1 Score:", f1_score, "\n")
F1 Score: 0.9130944 

ROC Curve

# Calculate ROC curve using the actual values and predictions
roc_obj <- roc(
  as.numeric(all_actuals), all_predictions
)
Setting levels: control = 0, case = 1
Setting direction: controls < cases
# Plot the ROC curve
plot(
  roc_obj,
  col = "blue",
  main = "ROC Curve - Linear SVM (Cross Validation)",
  legacy.axes = TRUE,
  print.auc = TRUE,
  print.thres = TRUE,
  grid = c(0.2, 0.2),
  grid.col = c("green", "orange")
)

LS0tDQp0aXRsZTogIkxpbmVhclNWTSINCm91dHB1dDogDQogIGh0bWxfbm90ZWJvb2s6IA0KICAgIHRvYzogdHJ1ZQ0KICAgIHRoZW1lOiBjb3Ntbw0KLS0tDQoNCiMgU2V0dGluZw0KDQpgYGB7cn0NCnJtKGxpc3Q9bHMoYWxsPVRSVUUpKQ0Kc2V0d2QoJ0M6L1VzZXJzL3NpdGRvL0RvY3VtZW50cy9HaXRIdWIvSUJELUVEQS9wYXBlcjEvJykNCmBgYA0KDQojIExvYWRpbmcgRGF0YQ0KDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpDQoNCmRhdGEgPC0gcmVhZC5jc3YoIi4vZGF0YV9wcmVwcm9jZXNzZWQvZGF0YS5jc3YiKSAlPiUgc2VsZWN0KC0xKQ0KYGBgDQoNCiMgSW5zdGFsbGluZyBQYWNrYWdlcw0KDQpgYGB7cn0NCmxpYnJhcnkoZTEwNzEpDQpgYGANCg0KIyBNZXRob2QgSTogU3BsaXR0aW5nIERhdGENCg0KYGBge3J9DQpzZXQuc2VlZCgxMjMpDQpzcGxpdHRpbmdfcmF0aW8gPC0gMC43DQoNCmluZGljZXMgPC0gMTpucm93KGRhdGEpDQpzaHVmZmxlZF9pbmRpY2VzIDwtIHNhbXBsZShpbmRpY2VzKSANCnRyYWluX3NpemUgPC0gZmxvb3Ioc3BsaXR0aW5nX3JhdGlvICogbGVuZ3RoKGluZGljZXMpKQ0KDQp0cmFpbl9pbmRpY2VzIDwtIHNodWZmbGVkX2luZGljZXNbMTp0cmFpbl9zaXplXQ0KdGVzdF9pbmRpY2VzIDwtIHNodWZmbGVkX2luZGljZXNbKHRyYWluX3NpemUgKyAxKTpsZW5ndGgoaW5kaWNlcyldDQoNCnRyYWluX2RhdGEgPC0gZGF0YVt0cmFpbl9pbmRpY2VzLCBdDQp0ZXN0X2RhdGEgPC0gZGF0YVt0ZXN0X2luZGljZXMsIF0NCmBgYA0KDQojIyBCdWlsZGluZyBNb2RlbA0KDQpgYGB7cn0NCmxpbmVhcl9zdm1fbW9kZWwgPC0gc3ZtKGRvZCB+IC4sIGRhdGEgPSB0cmFpbl9kYXRhLCBrZXJuZWwgPSAibGluZWFyIikNCg0KcHJlZGljdGlvbnMgPC0gcHJlZGljdChsaW5lYXJfc3ZtX21vZGVsLCBuZXdkYXRhID0gdGVzdF9kYXRhKQ0KYGBgDQoNCiMjIFBlcmZvcm1hbmNlDQoNCiMjIyBDb25mdXNpb24gTWF0cml4DQoNCmBgYHtyfQ0KY29uZnVzaW9uX21hdHJpeCA8LSB0YWJsZSgNCiAgYXMubnVtZXJpYyh0ZXN0X2RhdGEkZG9kKSwgYXMubnVtZXJpYyhpZmVsc2UocHJlZGljdGlvbnMgPiAwLjUsIDEsIDApKQ0KKQ0KDQpUUCA8LSBjb25mdXNpb25fbWF0cml4WzEsIDFdDQpUTiA8LSBjb25mdXNpb25fbWF0cml4WzIsIDJdDQpGUCA8LSBjb25mdXNpb25fbWF0cml4WzIsIDFdDQpGTiA8LSBjb25mdXNpb25fbWF0cml4WzEsIDJdDQoNCiMjIENhbGN1bGF0ZSBBY2N1cmFjeQ0KYWNjdXJhY3kgPC0gKFRQICsgVE4pIC8gKFRQICsgRlAgKyBUTiArIEZOKQ0KY2F0KCJBY2N1cmFjeToiLCBhY2N1cmFjeSwgIlxuIikNCg0KIyMgQ2FsY3VsYXRlIFJlY2FsbA0KcmVjYWxsIDwtIFRQIC8gKFRQICsgRk4pDQpjYXQoIlJlY2FsbDoiLCByZWNhbGwsICJcbiIpDQoNCiMjIENhbGN1bGF0ZSBQcmVjaXNpb24NCnByZWNpc2lvbiA8LSBUUCAvIChUUCArIEZQKQ0KY2F0KCJQcmVjaXNpb246IiwgcHJlY2lzaW9uLCAiXG4iKQ0KDQojIyBDYWxjdWxhdGUgU3BlY2lmaWNpdHkNCnNwZWNpZmljaXR5IDwtIFROIC8gKFROICsgRlApDQpjYXQoIlNwZWNpZmljaXR5OiIsIHNwZWNpZmljaXR5LCAiXG4iKQ0KDQojIyBDYWxjdWxhdGUgRjEgU2NvcmUNCmYxX3Njb3JlIDwtIDIgKiAocHJlY2lzaW9uICogcmVjYWxsKSAvIChwcmVjaXNpb24gKyByZWNhbGwpDQpjYXQoIkYxIFNjb3JlOiIsIGYxX3Njb3JlLCAiXG4iKQ0KYGBgDQoNCiMjIyBST0MgQ3VydmUNCg0KYGBge3J9DQpsaWJyYXJ5KHBST0MpDQojIENhbGN1bGF0ZSBST0MgY3VydmUgdXNpbmcgdGhlIGFjdHVhbCB2YWx1ZXMgYW5kIHByZWRpY3Rpb25zDQpyb2Nfb2JqIDwtIHJvYygNCiAgYXMubnVtZXJpYyh0ZXN0X2RhdGEkZG9kKSwgcHJlZGljdGlvbnMNCikNCg0KIyBQbG90IHRoZSBST0MgY3VydmUNCnBsb3QoDQogIHJvY19vYmosDQogIGNvbCA9ICJibHVlIiwNCiAgbWFpbiA9ICJST0MgQ3VydmUgLSBMaW5lYXIgU1ZNIiwNCiAgbGVnYWN5LmF4ZXMgPSBUUlVFLA0KICBwcmludC5hdWMgPSBUUlVFLA0KICBwcmludC50aHJlcyA9IFRSVUUsDQogIGdyaWQgPSBjKDAuMiwgMC4yKSwNCiAgZ3JpZC5jb2wgPSBjKCJncmVlbiIsICJvcmFuZ2UiKQ0KKQ0KYGBgDQoNCiMgTWV0aG9kIElJOiBDcm9zcyBWYWxpZGF0aW9uDQoNCmBgYHtyfQ0KIyBQZXJmb3JtIDEwLWZvbGQgY3Jvc3MtdmFsaWRhdGlvbg0KbnVtX2ZvbGRzIDwtIDEwDQpmb2xkcyA8LSBjdXQoc2VxKDEsIG5yb3coZGF0YSkpLCBicmVha3MgPSBudW1fZm9sZHMsIGxhYmVscyA9IEZBTFNFKQ0KDQojIENyZWF0ZSBlbXB0eSB2ZWN0b3JzIHRvIHN0b3JlIHRoZSBwcmVkaWN0aW9ucyBhbmQgYWN0dWFsIHZhbHVlcw0KYWxsX3ByZWRpY3Rpb25zIDwtIHZlY3RvcigpDQphbGxfYWN0dWFscyA8LSB2ZWN0b3IoKQ0KDQpmb3IgKGkgaW4gMTpudW1fZm9sZHMpIHsNCiAgIyBTcGxpdCB0aGUgZGF0YSBpbnRvIHRyYWluaW5nIGFuZCB0ZXN0IHNldHMgZm9yIHRoZSBjdXJyZW50IGZvbGQNCiAgdHJhaW5fZGF0YSA8LSBkYXRhW2ZvbGRzICE9IGksIF0NCiAgdGVzdF9kYXRhIDwtIGRhdGFbZm9sZHMgPT0gaSwgXQ0KICANCiAgdHJhaW5fWCA8LSBhcy5tYXRyaXgodHJhaW5fZGF0YVssIC0xXSkNCiAgdHJhaW5feSA8LSB0cmFpbl9kYXRhWywgMV0NCiAgdGVzdF9YIDwtIGFzLm1hdHJpeCh0ZXN0X2RhdGFbLCAtMV0pDQogIHRlc3RfeSA8LSB0ZXN0X2RhdGFbLCAxXQ0KDQogICMgVHJhaW4gYSBMaW5lYXIgU1ZNIG1vZGVsDQogIGxpbmVhcl9zdm1fbW9kZWwgPC0gc3ZtKGRvZCB+IC4sIGRhdGEgPSB0cmFpbl9kYXRhLCBrZXJuZWwgPSAibGluZWFyIikNCiAgDQogICMgTWFrZSBwcmVkaWN0aW9ucyB1c2luZyB0aGUgdHJhaW5lZCBtb2RlbHMNCiAgcHJlZGljdGlvbnMgPC0gcHJlZGljdChsaW5lYXJfc3ZtX21vZGVsLCBuZXdkYXRhID0gdGVzdF9kYXRhKQ0KICANCiAgIyBBcHBlbmQgdGhlIHByZWRpY3Rpb25zIGFuZCBhY3R1YWwgdmFsdWVzIHRvIHRoZSB2ZWN0b3JzDQogIGFsbF9wcmVkaWN0aW9ucyA8LSBjKGFsbF9wcmVkaWN0aW9ucywgcHJlZGljdGlvbnMpDQogIGFsbF9hY3R1YWxzIDwtIGMoYWxsX2FjdHVhbHMsIHRlc3RfeSkNCn0NCg0KYGBgDQoNCiMjIFBlcmZvcm1hbmNlDQoNCiMjIyBDb25mdXNpb24gTWF0cml4DQoNCmBgYHtyfQ0KY29uZnVzaW9uX21hdHJpeCA8LSB0YWJsZSgNCiAgYXMubnVtZXJpYyhhbGxfYWN0dWFscyksIA0KICBhcy5udW1lcmljKGlmZWxzZShhbGxfcHJlZGljdGlvbnMgPiAwLjUsIDEsIDApKQ0KKQ0KDQpUUCA8LSBjb25mdXNpb25fbWF0cml4WzEsIDFdDQpUTiA8LSBjb25mdXNpb25fbWF0cml4WzIsIDJdDQpGUCA8LSBjb25mdXNpb25fbWF0cml4WzIsIDFdDQpGTiA8LSBjb25mdXNpb25fbWF0cml4WzEsIDJdDQoNCiMjIENhbGN1bGF0ZSBBY2N1cmFjeQ0KYWNjdXJhY3kgPC0gKFRQICsgVE4pIC8gKFRQICsgRlAgKyBUTiArIEZOKQ0KY2F0KCJBY2N1cmFjeToiLCBhY2N1cmFjeSwgIlxuIikNCg0KIyMgQ2FsY3VsYXRlIFJlY2FsbA0KcmVjYWxsIDwtIFRQIC8gKFRQICsgRk4pDQpjYXQoIlJlY2FsbDoiLCByZWNhbGwsICJcbiIpDQoNCiMjIENhbGN1bGF0ZSBQcmVjaXNpb24NCnByZWNpc2lvbiA8LSBUUCAvIChUUCArIEZQKQ0KY2F0KCJQcmVjaXNpb246IiwgcHJlY2lzaW9uLCAiXG4iKQ0KDQojIyBDYWxjdWxhdGUgU3BlY2lmaWNpdHkNCnNwZWNpZmljaXR5IDwtIFROIC8gKFROICsgRlApDQpjYXQoIlNwZWNpZmljaXR5OiIsIHNwZWNpZmljaXR5LCAiXG4iKQ0KDQojIyBDYWxjdWxhdGUgRjEgU2NvcmUNCmYxX3Njb3JlIDwtIDIgKiAocHJlY2lzaW9uICogcmVjYWxsKSAvIChwcmVjaXNpb24gKyByZWNhbGwpDQpjYXQoIkYxIFNjb3JlOiIsIGYxX3Njb3JlLCAiXG4iKQ0KDQpgYGANCg0KIyMjIFJPQyBDdXJ2ZQ0KDQpgYGB7cn0NCiMgQ2FsY3VsYXRlIFJPQyBjdXJ2ZSB1c2luZyB0aGUgYWN0dWFsIHZhbHVlcyBhbmQgcHJlZGljdGlvbnMNCnJvY19vYmogPC0gcm9jKA0KICBhcy5udW1lcmljKGFsbF9hY3R1YWxzKSwgYWxsX3ByZWRpY3Rpb25zDQopDQoNCiMgUGxvdCB0aGUgUk9DIGN1cnZlDQpwbG90KA0KICByb2Nfb2JqLA0KICBjb2wgPSAiYmx1ZSIsDQogIG1haW4gPSAiUk9DIEN1cnZlIC0gTGluZWFyIFNWTSAoQ3Jvc3MgVmFsaWRhdGlvbikiLA0KICBsZWdhY3kuYXhlcyA9IFRSVUUsDQogIHByaW50LmF1YyA9IFRSVUUsDQogIHByaW50LnRocmVzID0gVFJVRSwNCiAgZ3JpZCA9IGMoMC4yLCAwLjIpLA0KICBncmlkLmNvbCA9IGMoImdyZWVuIiwgIm9yYW5nZSIpDQopDQpgYGANCg==