Edit Template

Predicting Breast Cancer Using Support Vector Machine in R



Breast Cancer Prediction





Installing required package

Importing Libraries

library(devtools)
## Loading required package: usethis
library(readr)
library(knitr)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(naniar)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.4     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggcorrplot) # finding the correlation with variables 
library(caTools)# splitting data into training set test set 

Importing Data

data_cancer <- read.csv("breastcancer.csv")
head(data_cancer)
##         id diagnosis radius_mean texture_mean perimeter_mean area_mean
## 1   842302         M       17.99        10.38         122.80    1001.0
## 2   842517         M       20.57        17.77         132.90    1326.0
## 3 84300903         M       19.69        21.25         130.00    1203.0
## 4 84348301         M       11.42        20.38          77.58     386.1
## 5 84358402         M       20.29        14.34         135.10    1297.0
## 6   843786         M       12.45        15.70          82.57     477.1
##   smoothness_mean compactness_mean concavity_mean concave.points_mean
## 1         0.11840          0.27760         0.3001             0.14710
## 2         0.08474          0.07864         0.0869             0.07017
## 3         0.10960          0.15990         0.1974             0.12790
## 4         0.14250          0.28390         0.2414             0.10520
## 5         0.10030          0.13280         0.1980             0.10430
## 6         0.12780          0.17000         0.1578             0.08089
##   symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se
## 1        0.2419                0.07871    1.0950     0.9053        8.589
## 2        0.1812                0.05667    0.5435     0.7339        3.398
## 3        0.2069                0.05999    0.7456     0.7869        4.585
## 4        0.2597                0.09744    0.4956     1.1560        3.445
## 5        0.1809                0.05883    0.7572     0.7813        5.438
## 6        0.2087                0.07613    0.3345     0.8902        2.217
##   area_se smoothness_se compactness_se concavity_se concave.points_se
## 1  153.40      0.006399        0.04904      0.05373           0.01587
## 2   74.08      0.005225        0.01308      0.01860           0.01340
## 3   94.03      0.006150        0.04006      0.03832           0.02058
## 4   27.23      0.009110        0.07458      0.05661           0.01867
## 5   94.44      0.011490        0.02461      0.05688           0.01885
## 6   27.19      0.007510        0.03345      0.03672           0.01137
##   symmetry_se fractal_dimension_se radius_worst texture_worst perimeter_worst
## 1     0.03003             0.006193        25.38         17.33          184.60
## 2     0.01389             0.003532        24.99         23.41          158.80
## 3     0.02250             0.004571        23.57         25.53          152.50
## 4     0.05963             0.009208        14.91         26.50           98.87
## 5     0.01756             0.005115        22.54         16.67          152.20
## 6     0.02165             0.005082        15.47         23.75          103.40
##   area_worst smoothness_worst compactness_worst concavity_worst
## 1     2019.0           0.1622            0.6656          0.7119
## 2     1956.0           0.1238            0.1866          0.2416
## 3     1709.0           0.1444            0.4245          0.4504
## 4      567.7           0.2098            0.8663          0.6869
## 5     1575.0           0.1374            0.2050          0.4000
## 6      741.6           0.1791            0.5249          0.5355
##   concave.points_worst symmetry_worst fractal_dimension_worst
## 1               0.2654         0.4601                 0.11890
## 2               0.1860         0.2750                 0.08902
## 3               0.2430         0.3613                 0.08758
## 4               0.2575         0.6638                 0.17300
## 5               0.1625         0.2364                 0.07678
## 6               0.1741         0.3985                 0.12440
str(data_cancer)
## 'data.frame':    569 obs. of  32 variables:
##  $ id                     : int  842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 ...
##  $ diagnosis              : chr  "M" "M" "M" "M" ...
##  $ radius_mean            : num  18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean           : num  10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num  122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num  1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : num  0.1184 0.0847 0.1096 0.1425 0.1003 ...
##  $ compactness_mean       : num  0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num  0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave.points_mean    : num  0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num  0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num  0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num  1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num  0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num  8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num  153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num  0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num  0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num  0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave.points_se      : num  0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num  0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num  0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num  25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num  17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num  184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num  2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num  0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num  0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num  0.712 0.242 0.45 0.687 0.4 ...
##  $ concave.points_worst   : num  0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num  0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num  0.1189 0.089 0.0876 0.173 0.0768 ...

To visualize all the variable in the data frame

data_1 <- data_cancer %>%
  as.data.frame() %>%
  select_if(is.numeric) %>%
  gather(key = "variable", value = "value")

ggplot(data_1, aes(value)) +
  geom_density() +
  facet_wrap(~variable)

# This visualization reprsent which data require feature scaling : concave points, concave points, fractal dimensiona, #smoothness se,

We have all the data in the numeric form, except diagnosis which is M and B

Lets convert this into numeric only

data_cancer$diagnosis <- factor(data_cancer$diagnosis, levels = c("M","B"), labels = c(0,1))

now converting facrtors to character and then character to numeric, if we convert this directly to numeric it will

give errors

data_cancer$diagnosis <- as.character(data_cancer$diagnosis)

data_cancer$diagnosis <- as.numeric(data_cancer$diagnosis)

str(data_cancer)
## 'data.frame':    569 obs. of  32 variables:
##  $ id                     : int  842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 ...
##  $ diagnosis              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ radius_mean            : num  18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean           : num  10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num  122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num  1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : num  0.1184 0.0847 0.1096 0.1425 0.1003 ...
##  $ compactness_mean       : num  0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num  0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave.points_mean    : num  0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num  0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num  0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num  1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num  0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num  8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num  153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num  0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num  0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num  0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave.points_se      : num  0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num  0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num  0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num  25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num  17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num  184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num  2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num  0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num  0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num  0.712 0.242 0.45 0.687 0.4 ...
##  $ concave.points_worst   : num  0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num  0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num  0.1189 0.089 0.0876 0.173 0.0768 ...
view(data_cancer)

Changing the postiion of dependent variable ie. diagnosis to the extreme right of the data to avoid confusion

We will use this by uisng tidyverse function relocate() , .after(), .before() these are very handy function while changing

the position of the columns . Here we need to shift diagnosis column after fractal_dimension_worst

data_cancer <- data_cancer %>% relocate(diagnosis,.after= fractal_dimension_worst)

str(data_cancer)
## 'data.frame':    569 obs. of  32 variables:
##  $ id                     : int  842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 ...
##  $ radius_mean            : num  18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean           : num  10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num  122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num  1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : num  0.1184 0.0847 0.1096 0.1425 0.1003 ...
##  $ compactness_mean       : num  0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num  0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave.points_mean    : num  0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num  0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num  0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num  1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num  0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num  8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num  153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num  0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num  0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num  0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave.points_se      : num  0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num  0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num  0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num  25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num  17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num  184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num  2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num  0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num  0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num  0.712 0.242 0.45 0.687 0.4 ...
##  $ concave.points_worst   : num  0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num  0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num  0.1189 0.089 0.0876 0.173 0.0768 ...
##  $ diagnosis              : num  0 0 0 0 0 0 0 0 0 0 ...
str(data_cancer$diagnosis)
##  num [1:569] 0 0 0 0 0 0 0 0 0 0 ...
data_cancer$diagnosis
##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [38] 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0
##  [75] 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
## [112] 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1
## [149] 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
## [186] 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1
## [223] 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
## [297] 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1
## [334] 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 0
## [371] 0 1 0 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1
## [408] 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 1
## [445] 0 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1
## [482] 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 0 1 1 0 1 0 1 0 0
## [519] 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [556] 1 1 1 1 1 1 1 0 0 0 0 0 0 1

Visualising the correlation between datasets

r <- cor(data_cancer, use="complete.obs")
round(r,2)
##                            id radius_mean texture_mean perimeter_mean area_mean
## id                       1.00        0.07         0.10           0.07      0.10
## radius_mean              0.07        1.00         0.32           1.00      0.99
## texture_mean             0.10        0.32         1.00           0.33      0.32
## perimeter_mean           0.07        1.00         0.33           1.00      0.99
## area_mean                0.10        0.99         0.32           0.99      1.00
## smoothness_mean         -0.01        0.17        -0.02           0.21      0.18
## compactness_mean         0.00        0.51         0.24           0.56      0.50
## concavity_mean           0.05        0.68         0.30           0.72      0.69
## concave.points_mean      0.04        0.82         0.29           0.85      0.82
## symmetry_mean           -0.02        0.15         0.07           0.18      0.15
## fractal_dimension_mean  -0.05       -0.31        -0.08          -0.26     -0.28
## radius_se                0.14        0.68         0.28           0.69      0.73
## texture_se              -0.01       -0.10         0.39          -0.09     -0.07
## perimeter_se             0.14        0.67         0.28           0.69      0.73
## area_se                  0.18        0.74         0.26           0.74      0.80
## smoothness_se            0.10       -0.22         0.01          -0.20     -0.17
## compactness_se           0.03        0.21         0.19           0.25      0.21
## concavity_se             0.06        0.19         0.14           0.23      0.21
## concave.points_se        0.08        0.38         0.16           0.41      0.37
## symmetry_se             -0.02       -0.10         0.01          -0.08     -0.07
## fractal_dimension_se     0.03       -0.04         0.05          -0.01     -0.02
## radius_worst             0.08        0.97         0.35           0.97      0.96
## texture_worst            0.06        0.30         0.91           0.30      0.29
## perimeter_worst          0.08        0.97         0.36           0.97      0.96
## area_worst               0.11        0.94         0.34           0.94      0.96
## smoothness_worst         0.01        0.12         0.08           0.15      0.12
## compactness_worst        0.00        0.41         0.28           0.46      0.39
## concavity_worst          0.02        0.53         0.30           0.56      0.51
## concave.points_worst     0.04        0.74         0.30           0.77      0.72
## symmetry_worst          -0.04        0.16         0.11           0.19      0.14
## fractal_dimension_worst -0.03        0.01         0.12           0.05      0.00
## diagnosis               -0.04       -0.73        -0.42          -0.74     -0.71
##                         smoothness_mean compactness_mean concavity_mean
## id                                -0.01             0.00           0.05
## radius_mean                        0.17             0.51           0.68
## texture_mean                      -0.02             0.24           0.30
## perimeter_mean                     0.21             0.56           0.72
## area_mean                          0.18             0.50           0.69
## smoothness_mean                    1.00             0.66           0.52
## compactness_mean                   0.66             1.00           0.88
## concavity_mean                     0.52             0.88           1.00
## concave.points_mean                0.55             0.83           0.92
## symmetry_mean                      0.56             0.60           0.50
## fractal_dimension_mean             0.58             0.57           0.34
## radius_se                          0.30             0.50           0.63
## texture_se                         0.07             0.05           0.08
## perimeter_se                       0.30             0.55           0.66
## area_se                            0.25             0.46           0.62
## smoothness_se                      0.33             0.14           0.10
## compactness_se                     0.32             0.74           0.67
## concavity_se                       0.25             0.57           0.69
## concave.points_se                  0.38             0.64           0.68
## symmetry_se                        0.20             0.23           0.18
## fractal_dimension_se               0.28             0.51           0.45
## radius_worst                       0.21             0.54           0.69
## texture_worst                      0.04             0.25           0.30
## perimeter_worst                    0.24             0.59           0.73
## area_worst                         0.21             0.51           0.68
## smoothness_worst                   0.81             0.57           0.45
## compactness_worst                  0.47             0.87           0.75
## concavity_worst                    0.43             0.82           0.88
## concave.points_worst               0.50             0.82           0.86
## symmetry_worst                     0.39             0.51           0.41
## fractal_dimension_worst            0.50             0.69           0.51
## diagnosis                         -0.36            -0.60          -0.70
##                         concave.points_mean symmetry_mean
## id                                     0.04         -0.02
## radius_mean                            0.82          0.15
## texture_mean                           0.29          0.07
## perimeter_mean                         0.85          0.18
## area_mean                              0.82          0.15
## smoothness_mean                        0.55          0.56
## compactness_mean                       0.83          0.60
## concavity_mean                         0.92          0.50
## concave.points_mean                    1.00          0.46
## symmetry_mean                          0.46          1.00
## fractal_dimension_mean                 0.17          0.48
## radius_se                              0.70          0.30
## texture_se                             0.02          0.13
## perimeter_se                           0.71          0.31
## area_se                                0.69          0.22
## smoothness_se                          0.03          0.19
## compactness_se                         0.49          0.42
## concavity_se                           0.44          0.34
## concave.points_se                      0.62          0.39
## symmetry_se                            0.10          0.45
## fractal_dimension_se                   0.26          0.33
## radius_worst                           0.83          0.19
## texture_worst                          0.29          0.09
## perimeter_worst                        0.86          0.22
## area_worst                             0.81          0.18
## smoothness_worst                       0.45          0.43
## compactness_worst                      0.67          0.47
## concavity_worst                        0.75          0.43
## concave.points_worst                   0.91          0.43
## symmetry_worst                         0.38          0.70
## fractal_dimension_worst                0.37          0.44
## diagnosis                             -0.78         -0.33
##                         fractal_dimension_mean radius_se texture_se
## id                                       -0.05      0.14      -0.01
## radius_mean                              -0.31      0.68      -0.10
## texture_mean                             -0.08      0.28       0.39
## perimeter_mean                           -0.26      0.69      -0.09
## area_mean                                -0.28      0.73      -0.07
## smoothness_mean                           0.58      0.30       0.07
## compactness_mean                          0.57      0.50       0.05
## concavity_mean                            0.34      0.63       0.08
## concave.points_mean                       0.17      0.70       0.02
## symmetry_mean                             0.48      0.30       0.13
## fractal_dimension_mean                    1.00      0.00       0.16
## radius_se                                 0.00      1.00       0.21
## texture_se                                0.16      0.21       1.00
## perimeter_se                              0.04      0.97       0.22
## area_se                                  -0.09      0.95       0.11
## smoothness_se                             0.40      0.16       0.40
## compactness_se                            0.56      0.36       0.23
## concavity_se                              0.45      0.33       0.19
## concave.points_se                         0.34      0.51       0.23
## symmetry_se                               0.35      0.24       0.41
## fractal_dimension_se                      0.69      0.23       0.28
## radius_worst                             -0.25      0.72      -0.11
## texture_worst                            -0.05      0.19       0.41
## perimeter_worst                          -0.21      0.72      -0.10
## area_worst                               -0.23      0.75      -0.08
## smoothness_worst                          0.50      0.14      -0.07
## compactness_worst                         0.46      0.29      -0.09
## concavity_worst                           0.35      0.38      -0.07
## concave.points_worst                      0.18      0.53      -0.12
## symmetry_worst                            0.33      0.09      -0.13
## fractal_dimension_worst                   0.77      0.05      -0.05
## diagnosis                                 0.01     -0.57       0.01
##                         perimeter_se area_se smoothness_se compactness_se
## id                              0.14    0.18          0.10           0.03
## radius_mean                     0.67    0.74         -0.22           0.21
## texture_mean                    0.28    0.26          0.01           0.19
## perimeter_mean                  0.69    0.74         -0.20           0.25
## area_mean                       0.73    0.80         -0.17           0.21
## smoothness_mean                 0.30    0.25          0.33           0.32
## compactness_mean                0.55    0.46          0.14           0.74
## concavity_mean                  0.66    0.62          0.10           0.67
## concave.points_mean             0.71    0.69          0.03           0.49
## symmetry_mean                   0.31    0.22          0.19           0.42
## fractal_dimension_mean          0.04   -0.09          0.40           0.56
## radius_se                       0.97    0.95          0.16           0.36
## texture_se                      0.22    0.11          0.40           0.23
## perimeter_se                    1.00    0.94          0.15           0.42
## area_se                         0.94    1.00          0.08           0.28
## smoothness_se                   0.15    0.08          1.00           0.34
## compactness_se                  0.42    0.28          0.34           1.00
## concavity_se                    0.36    0.27          0.27           0.80
## concave.points_se               0.56    0.42          0.33           0.74
## symmetry_se                     0.27    0.13          0.41           0.39
## fractal_dimension_se            0.24    0.13          0.43           0.80
## radius_worst                    0.70    0.76         -0.23           0.20
## texture_worst                   0.20    0.20         -0.07           0.14
## perimeter_worst                 0.72    0.76         -0.22           0.26
## area_worst                      0.73    0.81         -0.18           0.20
## smoothness_worst                0.13    0.13          0.31           0.23
## compactness_worst               0.34    0.28         -0.06           0.68
## concavity_worst                 0.42    0.39         -0.06           0.64
## concave.points_worst            0.55    0.54         -0.10           0.48
## symmetry_worst                  0.11    0.07         -0.11           0.28
## fractal_dimension_worst         0.09    0.02          0.10           0.59
## diagnosis                      -0.56   -0.55          0.07          -0.29
##                         concavity_se concave.points_se symmetry_se
## id                              0.06              0.08       -0.02
## radius_mean                     0.19              0.38       -0.10
## texture_mean                    0.14              0.16        0.01
## perimeter_mean                  0.23              0.41       -0.08
## area_mean                       0.21              0.37       -0.07
## smoothness_mean                 0.25              0.38        0.20
## compactness_mean                0.57              0.64        0.23
## concavity_mean                  0.69              0.68        0.18
## concave.points_mean             0.44              0.62        0.10
## symmetry_mean                   0.34              0.39        0.45
## fractal_dimension_mean          0.45              0.34        0.35
## radius_se                       0.33              0.51        0.24
## texture_se                      0.19              0.23        0.41
## perimeter_se                    0.36              0.56        0.27
## area_se                         0.27              0.42        0.13
## smoothness_se                   0.27              0.33        0.41
## compactness_se                  0.80              0.74        0.39
## concavity_se                    1.00              0.77        0.31
## concave.points_se               0.77              1.00        0.31
## symmetry_se                     0.31              0.31        1.00
## fractal_dimension_se            0.73              0.61        0.37
## radius_worst                    0.19              0.36       -0.13
## texture_worst                   0.10              0.09       -0.08
## perimeter_worst                 0.23              0.39       -0.10
## area_worst                      0.19              0.34       -0.11
## smoothness_worst                0.17              0.22       -0.01
## compactness_worst               0.48              0.45        0.06
## concavity_worst                 0.66              0.55        0.04
## concave.points_worst            0.44              0.60       -0.03
## symmetry_worst                  0.20              0.14        0.39
## fractal_dimension_worst         0.44              0.31        0.08
## diagnosis                      -0.25             -0.41        0.01
##                         fractal_dimension_se radius_worst texture_worst
## id                                      0.03         0.08          0.06
## radius_mean                            -0.04         0.97          0.30
## texture_mean                            0.05         0.35          0.91
## perimeter_mean                         -0.01         0.97          0.30
## area_mean                              -0.02         0.96          0.29
## smoothness_mean                         0.28         0.21          0.04
## compactness_mean                        0.51         0.54          0.25
## concavity_mean                          0.45         0.69          0.30
## concave.points_mean                     0.26         0.83          0.29
## symmetry_mean                           0.33         0.19          0.09
## fractal_dimension_mean                  0.69        -0.25         -0.05
## radius_se                               0.23         0.72          0.19
## texture_se                              0.28        -0.11          0.41
## perimeter_se                            0.24         0.70          0.20
## area_se                                 0.13         0.76          0.20
## smoothness_se                           0.43        -0.23         -0.07
## compactness_se                          0.80         0.20          0.14
## concavity_se                            0.73         0.19          0.10
## concave.points_se                       0.61         0.36          0.09
## symmetry_se                             0.37        -0.13         -0.08
## fractal_dimension_se                    1.00        -0.04          0.00
## radius_worst                           -0.04         1.00          0.36
## texture_worst                           0.00         0.36          1.00
## perimeter_worst                         0.00         0.99          0.37
## area_worst                             -0.02         0.98          0.35
## smoothness_worst                        0.17         0.22          0.23
## compactness_worst                       0.39         0.48          0.36
## concavity_worst                         0.38         0.57          0.37
## concave.points_worst                    0.22         0.79          0.36
## symmetry_worst                          0.11         0.24          0.23
## fractal_dimension_worst                 0.59         0.09          0.22
## diagnosis                              -0.08        -0.78         -0.46
##                         perimeter_worst area_worst smoothness_worst
## id                                 0.08       0.11             0.01
## radius_mean                        0.97       0.94             0.12
## texture_mean                       0.36       0.34             0.08
## perimeter_mean                     0.97       0.94             0.15
## area_mean                          0.96       0.96             0.12
## smoothness_mean                    0.24       0.21             0.81
## compactness_mean                   0.59       0.51             0.57
## concavity_mean                     0.73       0.68             0.45
## concave.points_mean                0.86       0.81             0.45
## symmetry_mean                      0.22       0.18             0.43
## fractal_dimension_mean            -0.21      -0.23             0.50
## radius_se                          0.72       0.75             0.14
## texture_se                        -0.10      -0.08            -0.07
## perimeter_se                       0.72       0.73             0.13
## area_se                            0.76       0.81             0.13
## smoothness_se                     -0.22      -0.18             0.31
## compactness_se                     0.26       0.20             0.23
## concavity_se                       0.23       0.19             0.17
## concave.points_se                  0.39       0.34             0.22
## symmetry_se                       -0.10      -0.11            -0.01
## fractal_dimension_se               0.00      -0.02             0.17
## radius_worst                       0.99       0.98             0.22
## texture_worst                      0.37       0.35             0.23
## perimeter_worst                    1.00       0.98             0.24
## area_worst                         0.98       1.00             0.21
## smoothness_worst                   0.24       0.21             1.00
## compactness_worst                  0.53       0.44             0.57
## concavity_worst                    0.62       0.54             0.52
## concave.points_worst               0.82       0.75             0.55
## symmetry_worst                     0.27       0.21             0.49
## fractal_dimension_worst            0.14       0.08             0.62
## diagnosis                         -0.78      -0.73            -0.42
##                         compactness_worst concavity_worst concave.points_worst
## id                                   0.00            0.02                 0.04
## radius_mean                          0.41            0.53                 0.74
## texture_mean                         0.28            0.30                 0.30
## perimeter_mean                       0.46            0.56                 0.77
## area_mean                            0.39            0.51                 0.72
## smoothness_mean                      0.47            0.43                 0.50
## compactness_mean                     0.87            0.82                 0.82
## concavity_mean                       0.75            0.88                 0.86
## concave.points_mean                  0.67            0.75                 0.91
## symmetry_mean                        0.47            0.43                 0.43
## fractal_dimension_mean               0.46            0.35                 0.18
## radius_se                            0.29            0.38                 0.53
## texture_se                          -0.09           -0.07                -0.12
## perimeter_se                         0.34            0.42                 0.55
## area_se                              0.28            0.39                 0.54
## smoothness_se                       -0.06           -0.06                -0.10
## compactness_se                       0.68            0.64                 0.48
## concavity_se                         0.48            0.66                 0.44
## concave.points_se                    0.45            0.55                 0.60
## symmetry_se                          0.06            0.04                -0.03
## fractal_dimension_se                 0.39            0.38                 0.22
## radius_worst                         0.48            0.57                 0.79
## texture_worst                        0.36            0.37                 0.36
## perimeter_worst                      0.53            0.62                 0.82
## area_worst                           0.44            0.54                 0.75
## smoothness_worst                     0.57            0.52                 0.55
## compactness_worst                    1.00            0.89                 0.80
## concavity_worst                      0.89            1.00                 0.86
## concave.points_worst                 0.80            0.86                 1.00
## symmetry_worst                       0.61            0.53                 0.50
## fractal_dimension_worst              0.81            0.69                 0.51
## diagnosis                           -0.59           -0.66                -0.79
##                         symmetry_worst fractal_dimension_worst diagnosis
## id                               -0.04                   -0.03     -0.04
## radius_mean                       0.16                    0.01     -0.73
## texture_mean                      0.11                    0.12     -0.42
## perimeter_mean                    0.19                    0.05     -0.74
## area_mean                         0.14                    0.00     -0.71
## smoothness_mean                   0.39                    0.50     -0.36
## compactness_mean                  0.51                    0.69     -0.60
## concavity_mean                    0.41                    0.51     -0.70
## concave.points_mean               0.38                    0.37     -0.78
## symmetry_mean                     0.70                    0.44     -0.33
## fractal_dimension_mean            0.33                    0.77      0.01
## radius_se                         0.09                    0.05     -0.57
## texture_se                       -0.13                   -0.05      0.01
## perimeter_se                      0.11                    0.09     -0.56
## area_se                           0.07                    0.02     -0.55
## smoothness_se                    -0.11                    0.10      0.07
## compactness_se                    0.28                    0.59     -0.29
## concavity_se                      0.20                    0.44     -0.25
## concave.points_se                 0.14                    0.31     -0.41
## symmetry_se                       0.39                    0.08      0.01
## fractal_dimension_se              0.11                    0.59     -0.08
## radius_worst                      0.24                    0.09     -0.78
## texture_worst                     0.23                    0.22     -0.46
## perimeter_worst                   0.27                    0.14     -0.78
## area_worst                        0.21                    0.08     -0.73
## smoothness_worst                  0.49                    0.62     -0.42
## compactness_worst                 0.61                    0.81     -0.59
## concavity_worst                   0.53                    0.69     -0.66
## concave.points_worst              0.50                    0.51     -0.79
## symmetry_worst                    1.00                    0.54     -0.42
## fractal_dimension_worst           0.54                    1.00     -0.32
## diagnosis                        -0.42                   -0.32      1.00

It provides a solution for reordering the correlation matrix and displays the significance level on the correlogram.

#It includes also a function for computing a matrix of correlation p-value

ggcorrplot(r)

ggcorrplot(r, hc.order = TRUE, type = "lower",
           outline.col = "white",
           ggtheme = ggplot2::theme_gray,
           colors = c("#6D9EC1", "white", "#E46726"))

Visualising the missing values in the data using naniar

vis_miss(data_cancer)

# as per the above graph there is not missing values lets check this other way

sum(is.na(data_cancer))
## [1] 0

Lets check whther every columns have no missing values


sapply(data_cancer,function(x)sum(is.na(x)))

By using the above three methods it is confirmed that above data has no missing values

Spliting data into training set and test set

split = sample.split(data_cancer$diagnosis, SplitRatio = 0.75)

train_set = subset(data_cancer, split ==TRUE)
test_set = subset(data_cancer, split ==FALSE)

View(train_set)

Feature scaling on few columns : colun 2 to colmn 5

train_set[, 2:5] = scale(train_set[ , 2:5])
test_set[, 2:5] = scale(test_set[ , 2:5])
view(train_set)

data.frame(colnames(data_cancer)) # to know the index number of each colums 
##      colnames.data_cancer.
## 1                       id
## 2              radius_mean
## 3             texture_mean
## 4           perimeter_mean
## 5                area_mean
## 6          smoothness_mean
## 7         compactness_mean
## 8           concavity_mean
## 9      concave.points_mean
## 10           symmetry_mean
## 11  fractal_dimension_mean
## 12               radius_se
## 13              texture_se
## 14            perimeter_se
## 15                 area_se
## 16           smoothness_se
## 17          compactness_se
## 18            concavity_se
## 19       concave.points_se
## 20             symmetry_se
## 21    fractal_dimension_se
## 22            radius_worst
## 23           texture_worst
## 24         perimeter_worst
## 25              area_worst
## 26        smoothness_worst
## 27       compactness_worst
## 28         concavity_worst
## 29    concave.points_worst
## 30          symmetry_worst
## 31 fractal_dimension_worst
## 32               diagnosis

Feature scaling on few columns : colun 14 to colmn 15

train_set[, 14:15] = scale(train_set[ , 14:15]) test_set[, 14:15] = scale(test_set[ , 14:15]) view(train_set)

Feature scaling on few columns : colun 22 to colmn 25

train_set[, 22:25] = scale(train_set[ , 22:25])
test_set[, 22:25] = scale(test_set[ , 22:25])
view(train_set)

view(test_set)

Multiple regresssion model :

regressor = lm(diagnosis~.,data = train_set)

#The visreg package provides tools for visualizing these conditional relationships.

#The visreg function takes (1) the model and (2) the variable of interest and plots the conditional relationship, controlling for the other variables. The option gg = TRUE is used to produce a ggplot2 graph.

conditional plot of diagnosis vs. texture mean , we can compare diagnosis with other variable of the data to check relationship

Logistic Regression Model

regressor_lr <- glm(formula = diagnosis ~ ., 
                    family = binomial , 
                    data=data_cancer)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Predicting the test set results

prob_pred = predict(regressor_lr, type = 'response', newdata = test_set[-32])

y_pred = ifelse(prob_pred > 0.5, 1,0)

Making confusion matrix

cm = table(test_set [ , 32], y_pred)
cm
##    y_pred
##      0
##   0 53
##   1 89

SVM Model

library(e1071)

regressor_svm <- svm(formula = diagnosis ~ ., 
                     data=train_set,
                     type = 'C-classification',
                     kernel = 'linear')

Predicting the test set results

y_pred1 = predict(regressor_svm, newdata = test_set[-32])

Making confusion matrix

cm = table(test_set [ , 32], y_pred1)
cm
##    y_pred1
##      0  1
##   0 51  2
##   1  2 87



Company

Our ebook website brings you the convenience of instant access to a diverse range of titles, spanning genres from fiction and non-fiction to self-help, business.

Features

Most Recent Posts

eBook App for FREE

Lorem Ipsum is simply dumy text of the printing typesetting industry lorem.

Hot

Category

Our ebook website brings you the convenience of instant access.

Help

Privacy Policy

Mailing List

© 2023 Created with Royal Elementor Addons