prc <- read.csv("Prostate_Cancer.csv"
stringsAsFactors = FALSE #This command helps to convert every character vector to a factor wherever it makes sense.
str(prc) #We use this command to see whether the data is structured or not.
prc <- prc[-1] #removes the first variable(id) from the data set.
table(prc$diagnosis_result) # it helps us to get the numbers of patients
prc$diagnosis <- factor(prc$diagnosis_result, levels = c("B", "M"), labels = c("Benign", "Malignant"))
round(prop.table(table(prc$
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x))) }
prc_n <- as.data.frame(lapply(prc[2:9], normalize))
summary(prc_n$radius)
prc_train <- prc_n[1:65,]
prc_test <- prc_n[66:100,]
prc_train_labels <- prc[1:65, 1]
prc_test_labels <- prc[66:100, 1] #This code takes the diagnosis factor in column 1 of the prc data frame and on turn creates prc_train_labels and prc_test_labels data frame.
library(class)
prc_test_pred <- knn(train = prc_train, test = prc_test,cl = prc_train_labels, k=10)
#install.packages("gmodels")
library(gmodels)
CrossTable(x=prc_test_labels,
# Run for K value from 1 to 10, to find the best K value.
KVALUE <- seq(from=1,to=10,by=1)
for (K in 1:length(KVALUE)){
prc_test_pred <- knn(train = prc_train, test = prc_test,cl = prc_train_labels, k=KVALUE[K])
#prédiction check
CrossTable(x=prc_test_labels,
}
No comments:
Post a Comment