Setting
rm(list=ls(all=TRUE))
setwd('C:/Users/sitdo/Documents/GitHub/IBD-EDA/paper1/')
Loading Data
library(dplyr)
载入程辑包:‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
data <- read.csv("./data_preprocessed/data.csv") %>% select(-1)
Installing Packages
library(randomForest)
randomForest 4.7-1.1
Type rfNews() to see new features/changes/bug fixes.
载入程辑包:‘randomForest’
The following object is masked from ‘package:dplyr’:
combine
Method I: Splitting Data
set.seed(123)
splitting_ratio <- 0.7
indices <- 1:nrow(data)
shuffled_indices <- sample(indices)
train_size <- floor(splitting_ratio * length(indices))
train_indices <- shuffled_indices[1:train_size]
test_indices <- shuffled_indices[(train_size + 1):length(indices)]
train_data <- data[train_indices, ]
test_data <- data[test_indices, ]
train_X <- as.matrix(train_data[, -1])
train_y <- train_data[, 1]
test_X <- as.matrix(test_data[, -1])
test_y <- test_data[, 1]
Building Model
bagged_tree <- randomForest(train_X, train_y, ntree = 100)
Warning: The response has five or fewer unique values. Are you sure you want to do regression?
predictions <- predict(bagged_tree, test_X)
Method II: Cross Validation
# Perform 10-fold cross-validation
num_folds <- 10
folds <- cut(seq(1, nrow(data)), breaks = num_folds, labels = FALSE)
# Create empty vectors to store the predictions and actual values
all_predictions <- vector()
all_actuals <- vector()
for (i in 1:num_folds) {
# Split the data into training and test sets for the current fold
train_data <- data[folds != i, ]
test_data <- data[folds == i, ]
train_X <- as.matrix(train_data[, -1])
train_y <- train_data[, 1]
test_X <- as.matrix(test_data[, -1])
test_y <- test_data[, 1]
# Train the Bagged Tree model
bagged_tree <- randomForest(train_X, train_y, ntree = 100)
# Make predictions on the test set
predictions <- predict(bagged_tree, test_X)
# Append the predictions and actual values to the vectors
all_predictions <- c(all_predictions, predictions)
all_actuals <- c(all_actuals, test_y)
}
Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?Warning: The response has five or fewer unique values. Are you sure you want to do regression?
LS0tDQp0aXRsZTogIkJhZ2dlZFRyZWUiDQpvdXRwdXQ6IA0KICBodG1sX25vdGVib29rOiANCiAgICB0b2M6IHRydWUNCnRoZW1lOiBjb3Ntbw0KLS0tDQogIA0KIyBTZXR0aW5nDQoNCmBgYHtyfQ0Kcm0obGlzdD1scyhhbGw9VFJVRSkpDQpzZXR3ZCgnQzovVXNlcnMvc2l0ZG8vRG9jdW1lbnRzL0dpdEh1Yi9JQkQtRURBL3BhcGVyMS8nKQ0KYGBgDQoNCiMgTG9hZGluZyBEYXRhDQoNCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCg0KZGF0YSA8LSByZWFkLmNzdigiLi9kYXRhX3ByZXByb2Nlc3NlZC9kYXRhLmNzdiIpICU+JSBzZWxlY3QoLTEpDQpgYGANCg0KIyBJbnN0YWxsaW5nIFBhY2thZ2VzDQoNCmBgYHtyfQ0KbGlicmFyeShyYW5kb21Gb3Jlc3QpDQpgYGANCg0KIyBNZXRob2QgSTogU3BsaXR0aW5nIERhdGENCg0KYGBge3J9DQpzZXQuc2VlZCgxMjMpDQpzcGxpdHRpbmdfcmF0aW8gPC0gMC43DQoNCmluZGljZXMgPC0gMTpucm93KGRhdGEpDQpzaHVmZmxlZF9pbmRpY2VzIDwtIHNhbXBsZShpbmRpY2VzKSANCnRyYWluX3NpemUgPC0gZmxvb3Ioc3BsaXR0aW5nX3JhdGlvICogbGVuZ3RoKGluZGljZXMpKQ0KDQp0cmFpbl9pbmRpY2VzIDwtIHNodWZmbGVkX2luZGljZXNbMTp0cmFpbl9zaXplXQ0KdGVzdF9pbmRpY2VzIDwtIHNodWZmbGVkX2luZGljZXNbKHRyYWluX3NpemUgKyAxKTpsZW5ndGgoaW5kaWNlcyldDQoNCnRyYWluX2RhdGEgPC0gZGF0YVt0cmFpbl9pbmRpY2VzLCBdDQp0ZXN0X2RhdGEgPC0gZGF0YVt0ZXN0X2luZGljZXMsIF0NCmBgYA0KDQoNCmBgYHtyfQ0KdHJhaW5fWCA8LSBhcy5tYXRyaXgodHJhaW5fZGF0YVssIC0xXSkNCnRyYWluX3kgPC0gdHJhaW5fZGF0YVssIDFdDQp0ZXN0X1ggPC0gYXMubWF0cml4KHRlc3RfZGF0YVssIC0xXSkNCnRlc3RfeSA8LSB0ZXN0X2RhdGFbLCAxXQ0KYGBgDQoNCiMjIEJ1aWxkaW5nIE1vZGVsDQoNCmBgYHtyfQ0KYmFnZ2VkX3RyZWUgPC0gcmFuZG9tRm9yZXN0KHRyYWluX1gsIHRyYWluX3ksIG50cmVlID0gMTAwKQ0KDQpwcmVkaWN0aW9ucyA8LSBwcmVkaWN0KGJhZ2dlZF90cmVlLCB0ZXN0X1gpDQpgYGANCg0KIyMgUGVyZm9ybWFuY2UNCg0KIyMjIENvbmZ1c2lvbiBNYXRyaXgNCg0KYGBge3J9DQpjb25mdXNpb25fbWF0cml4IDwtIHRhYmxlKA0KICBhcy5udW1lcmljKHRlc3RfeSksIA0KICBhcy5udW1lcmljKGlmZWxzZShwcmVkaWN0aW9ucyA+IDAuNSwgMSwgMCkpDQopDQoNClRQIDwtIGNvbmZ1c2lvbl9tYXRyaXhbMSwgMV0NClROIDwtIGNvbmZ1c2lvbl9tYXRyaXhbMiwgMl0NCkZQIDwtIGNvbmZ1c2lvbl9tYXRyaXhbMiwgMV0NCkZOIDwtIGNvbmZ1c2lvbl9tYXRyaXhbMSwgMl0NCg0KIyMgQ2FsY3VsYXRlIEFjY3VyYWN5DQphY2N1cmFjeSA8LSAoVFAgKyBUTikgLyAoVFAgKyBGUCArIFROICsgRk4pDQpjYXQoIkFjY3VyYWN5OiIsIGFjY3VyYWN5LCAiXG4iKQ0KDQojIyBDYWxjdWxhdGUgUmVjYWxsDQpyZWNhbGwgPC0gVFAgLyAoVFAgKyBGTikNCmNhdCgiUmVjYWxsOiIsIHJlY2FsbCwgIlxuIikNCg0KIyMgQ2FsY3VsYXRlIFByZWNpc2lvbg0KcHJlY2lzaW9uIDwtIFRQIC8gKFRQICsgRlApDQpjYXQoIlByZWNpc2lvbjoiLCBwcmVjaXNpb24sICJcbiIpDQoNCiMjIENhbGN1bGF0ZSBTcGVjaWZpY2l0eQ0Kc3BlY2lmaWNpdHkgPC0gVE4gLyAoVE4gKyBGUCkNCmNhdCgiU3BlY2lmaWNpdHk6Iiwgc3BlY2lmaWNpdHksICJcbiIpDQoNCiMjIENhbGN1bGF0ZSBGMSBTY29yZQ0KZjFfc2NvcmUgPC0gMiAqIChwcmVjaXNpb24gKiByZWNhbGwpIC8gKHByZWNpc2lvbiArIHJlY2FsbCkNCmNhdCgiRjEgU2NvcmU6IiwgZjFfc2NvcmUsICJcbiIpDQpgYGANCg0KIyMjIFJPQyBDdXJ2ZQ0KDQpgYGB7cn0NCmxpYnJhcnkocFJPQykNCiMgQ2FsY3VsYXRlIFJPQyBjdXJ2ZSB1c2luZyB0aGUgYWN0dWFsIHZhbHVlcyBhbmQgcHJlZGljdGlvbnMNCnJvY19vYmogPC0gcm9jKA0KICBhcy5udW1lcmljKHRlc3RfZGF0YSRkb2QpLCBwcmVkaWN0aW9ucw0KKQ0KDQojIFBsb3QgdGhlIFJPQyBjdXJ2ZQ0KcGxvdCgNCiAgcm9jX29iaiwNCiAgY29sID0gImJsdWUiLA0KICBtYWluID0gIlJPQyBDdXJ2ZSAtIEJhZ2dlZCBUcmVlIiwNCiAgbGVnYWN5LmF4ZXMgPSBUUlVFLA0KICBwcmludC5hdWMgPSBUUlVFLA0KICBwcmludC50aHJlcyA9IFRSVUUsDQogIGdyaWQgPSBjKDAuMiwgMC4yKSwNCiAgZ3JpZC5jb2wgPSBjKCJncmVlbiIsICJvcmFuZ2UiKQ0KKQ0KYGBgDQoNCiMgTWV0aG9kIElJOiBDcm9zcyBWYWxpZGF0aW9uDQoNCmBgYHtyfQ0KIyBQZXJmb3JtIDEwLWZvbGQgY3Jvc3MtdmFsaWRhdGlvbg0KbnVtX2ZvbGRzIDwtIDEwDQpmb2xkcyA8LSBjdXQoc2VxKDEsIG5yb3coZGF0YSkpLCBicmVha3MgPSBudW1fZm9sZHMsIGxhYmVscyA9IEZBTFNFKQ0KDQojIENyZWF0ZSBlbXB0eSB2ZWN0b3JzIHRvIHN0b3JlIHRoZSBwcmVkaWN0aW9ucyBhbmQgYWN0dWFsIHZhbHVlcw0KYWxsX3ByZWRpY3Rpb25zIDwtIHZlY3RvcigpDQphbGxfYWN0dWFscyA8LSB2ZWN0b3IoKQ0KDQpmb3IgKGkgaW4gMTpudW1fZm9sZHMpIHsNCiAgIyBTcGxpdCB0aGUgZGF0YSBpbnRvIHRyYWluaW5nIGFuZCB0ZXN0IHNldHMgZm9yIHRoZSBjdXJyZW50IGZvbGQNCiAgdHJhaW5fZGF0YSA8LSBkYXRhW2ZvbGRzICE9IGksIF0NCiAgdGVzdF9kYXRhIDwtIGRhdGFbZm9sZHMgPT0gaSwgXQ0KICANCiAgdHJhaW5fWCA8LSBhcy5tYXRyaXgodHJhaW5fZGF0YVssIC0xXSkNCiAgdHJhaW5feSA8LSB0cmFpbl9kYXRhWywgMV0NCiAgdGVzdF9YIDwtIGFzLm1hdHJpeCh0ZXN0X2RhdGFbLCAtMV0pDQogIHRlc3RfeSA8LSB0ZXN0X2RhdGFbLCAxXQ0KDQogICMgVHJhaW4gdGhlIEJhZ2dlZCBUcmVlIG1vZGVsDQogIGJhZ2dlZF90cmVlIDwtIHJhbmRvbUZvcmVzdCh0cmFpbl9YLCB0cmFpbl95LCBudHJlZSA9IDEwMCkNCiAgDQogICMgTWFrZSBwcmVkaWN0aW9ucyBvbiB0aGUgdGVzdCBzZXQNCiAgcHJlZGljdGlvbnMgPC0gcHJlZGljdChiYWdnZWRfdHJlZSwgdGVzdF9YKQ0KICANCiAgIyBBcHBlbmQgdGhlIHByZWRpY3Rpb25zIGFuZCBhY3R1YWwgdmFsdWVzIHRvIHRoZSB2ZWN0b3JzDQogIGFsbF9wcmVkaWN0aW9ucyA8LSBjKGFsbF9wcmVkaWN0aW9ucywgcHJlZGljdGlvbnMpDQogIGFsbF9hY3R1YWxzIDwtIGMoYWxsX2FjdHVhbHMsIHRlc3RfeSkNCn0NCg0KYGBgDQoNCiMjIFBlcmZvcm1hbmNlDQoNCiMjIyBDb25mdXNpb24gTWF0cml4DQoNCmBgYHtyfQ0KY29uZnVzaW9uX21hdHJpeCA8LSB0YWJsZSgNCiAgYXMubnVtZXJpYyhhbGxfYWN0dWFscyksIA0KICBhcy5udW1lcmljKGlmZWxzZShhbGxfcHJlZGljdGlvbnMgPiAwLjUsIDEsIDApKQ0KKQ0KDQpUUCA8LSBjb25mdXNpb25fbWF0cml4WzEsIDFdDQpUTiA8LSBjb25mdXNpb25fbWF0cml4WzIsIDJdDQpGUCA8LSBjb25mdXNpb25fbWF0cml4WzIsIDFdDQpGTiA8LSBjb25mdXNpb25fbWF0cml4WzEsIDJdDQoNCiMjIENhbGN1bGF0ZSBBY2N1cmFjeQ0KYWNjdXJhY3kgPC0gKFRQICsgVE4pIC8gKFRQICsgRlAgKyBUTiArIEZOKQ0KY2F0KCJBY2N1cmFjeToiLCBhY2N1cmFjeSwgIlxuIikNCg0KIyMgQ2FsY3VsYXRlIFJlY2FsbA0KcmVjYWxsIDwtIFRQIC8gKFRQICsgRk4pDQpjYXQoIlJlY2FsbDoiLCByZWNhbGwsICJcbiIpDQoNCiMjIENhbGN1bGF0ZSBQcmVjaXNpb24NCnByZWNpc2lvbiA8LSBUUCAvIChUUCArIEZQKQ0KY2F0KCJQcmVjaXNpb246IiwgcHJlY2lzaW9uLCAiXG4iKQ0KDQojIyBDYWxjdWxhdGUgU3BlY2lmaWNpdHkNCnNwZWNpZmljaXR5IDwtIFROIC8gKFROICsgRlApDQpjYXQoIlNwZWNpZmljaXR5OiIsIHNwZWNpZmljaXR5LCAiXG4iKQ0KDQojIyBDYWxjdWxhdGUgRjEgU2NvcmUNCmYxX3Njb3JlIDwtIDIgKiAocHJlY2lzaW9uICogcmVjYWxsKSAvIChwcmVjaXNpb24gKyByZWNhbGwpDQpjYXQoIkYxIFNjb3JlOiIsIGYxX3Njb3JlLCAiXG4iKQ0KDQpgYGANCg0KIyMjIFJPQyBDdXJ2ZQ0KDQpgYGB7cn0NCiMgQ2FsY3VsYXRlIFJPQyBjdXJ2ZSB1c2luZyB0aGUgYWN0dWFsIHZhbHVlcyBhbmQgcHJlZGljdGlvbnMNCnJvY19vYmogPC0gcm9jKA0KICBhcy5udW1lcmljKGFsbF9hY3R1YWxzKSwgYWxsX3ByZWRpY3Rpb25zDQopDQoNCiMgUGxvdCB0aGUgUk9DIGN1cnZlDQpwbG90KA0KICByb2Nfb2JqLA0KICBjb2wgPSAiYmx1ZSIsDQogIG1haW4gPSAiUk9DIEN1cnZlIC0gQmFnZ2VkIFRyZWUgKENyb3NzIFZhbGlkYXRpb24pIiwNCiAgbGVnYWN5LmF4ZXMgPSBUUlVFLA0KICBwcmludC5hdWMgPSBUUlVFLA0KICBwcmludC50aHJlcyA9IFRSVUUsDQogIGdyaWQgPSBjKDAuMiwgMC4yKSwNCiAgZ3JpZC5jb2wgPSBjKCJncmVlbiIsICJvcmFuZ2UiKQ0KKQ0KYGBgDQo=