AQUARIUS APRIL 2022
PISCES ASCENDANT HOROSCOPE APRIL 2022
PISCES ASCENDANT APRIL 2022
Mesh Lagan Rashifal May 2022
MESH LAGAN RASHIFAL MAY 2022
Predicting Breast Cancer Using Support Vector Machine in R
Breast Cancer Prediction Breast Cancer Prediction Sachin Sharma 10/17/2021 Installing required package Importing Libraries library(devtools) ## Loading required package: usethis library(readr) library(knitr) library(ggplot2) library(plotly) ## ## Attaching package: 'plotly' ## The following object is masked from 'package:ggplot2': ## ## last_plot ## The following object is masked from 'package:stats': ## ## filter ## The following object is masked from 'package:graphics': ## ## layout library(dplyr) ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union library(naniar) library(tidyverse) ## — Attaching packages ————————————— tidyverse 1.3.1 — ## v tibble 3.1.4 v stringr 1.4.0 ## v tidyr 1.1.3 v forcats 0.5.1 ## v purrr 0.3.4 ## — Conflicts —————————————— tidyverse_conflicts() — ## x dplyr::filter() masks plotly::filter(), stats::filter() ## x dplyr::lag() masks stats::lag() library(ggcorrplot) # finding the correlation with variables library(caTools)# splitting data into training set test set Importing Data data_cancer <- read.csv("breastcancer.csv") head(data_cancer) ## id diagnosis radius_mean texture_mean perimeter_mean area_mean ## 1 842302 M 17.99 10.38 122.80 1001.0 ## 2 842517 M 20.57 17.77 132.90 1326.0 ## 3 84300903 M 19.69 21.25 130.00 1203.0 ## 4 84348301 M 11.42 20.38 77.58 386.1 ## 5 84358402 M 20.29 14.34 135.10 1297.0 ## 6 843786 M 12.45 15.70 82.57 477.1 ## smoothness_mean compactness_mean concavity_mean concave.points_mean ## 1 0.11840 0.27760 0.3001 0.14710 ## 2 0.08474 0.07864 0.0869 0.07017 ## 3 0.10960 0.15990 0.1974 0.12790 ## 4 0.14250 0.28390 0.2414 0.10520 ## 5 0.10030 0.13280 0.1980 0.10430 ## 6 0.12780 0.17000 0.1578 0.08089 ## symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se ## 1 0.2419 0.07871 1.0950 0.9053 8.589 ## 2 0.1812 0.05667 0.5435 0.7339 3.398 ## 3 0.2069 0.05999 0.7456 0.7869 4.585 ## 4 0.2597 0.09744 0.4956 1.1560 3.445 ## 5 0.1809 0.05883 0.7572 0.7813 5.438 ## 6 0.2087 0.07613 0.3345 0.8902 2.217 ## area_se smoothness_se compactness_se concavity_se concave.points_se ## 1 153.40 0.006399 0.04904 0.05373 0.01587 ## 2 74.08 0.005225 0.01308 0.01860 0.01340 ## 3 94.03 0.006150 0.04006 0.03832 0.02058 ## 4 27.23 0.009110 0.07458 0.05661 0.01867 ## 5 94.44 0.011490 0.02461 0.05688 0.01885 ## 6 27.19 0.007510 0.03345 0.03672 0.01137 ## symmetry_se fractal_dimension_se radius_worst texture_worst perimeter_worst ## 1 0.03003 0.006193 25.38 17.33 184.60 ## 2 0.01389 0.003532 24.99 23.41 158.80 ## 3 0.02250 0.004571 23.57 25.53 152.50 ## 4 0.05963 0.009208 14.91 26.50 98.87 ## 5 0.01756 0.005115 22.54 16.67 152.20 ## 6 0.02165 0.005082 15.47 23.75 103.40 ## area_worst smoothness_worst compactness_worst concavity_worst ## 1 2019.0 0.1622 0.6656 0.7119 ## 2 1956.0 0.1238 0.1866 0.2416 ## 3 1709.0 0.1444 0.4245 0.4504 ## 4 567.7 0.2098 0.8663 0.6869 ## 5 1575.0 0.1374 0.2050 0.4000 ## 6 741.6 0.1791 0.5249 0.5355 ## concave.points_worst symmetry_worst fractal_dimension_worst ## 1 0.2654 0.4601 0.11890 ## 2 0.1860 0.2750 0.08902 ## 3 0.2430 0.3613 0.08758 ## 4 0.2575 0.6638 0.17300 ## 5 0.1625 0.2364 0.07678 ## 6 0.1741 0.3985 0.12440 str(data_cancer) ## 'data.frame': 569 obs. of 32 variables: ## $ id : int 842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 … ## $ diagnosis : chr "M" "M" "M" "M" … ## $ radius_mean : num 18 20.6 19.7 11.4 20.3 … ## $ texture_mean : num 10.4 17.8 21.2 20.4 14.3 … ## $ perimeter_mean : num 122.8 132.9 130 77.6 135.1 … ## $ area_mean : num 1001 1326 1203 386 1297 … ## $ smoothness_mean : num 0.1184 0.0847 0.1096 0.1425 0.1003 … ## $ compactness_mean : num 0.2776 0.0786 0.1599 0.2839 0.1328 … ## $ concavity_mean : num 0.3001 0.0869 0.1974 0.2414 0.198 … ## $ concave.points_mean : num 0.1471 0.0702 0.1279 0.1052 0.1043 … ## $ symmetry_mean : num 0.242 0.181 0.207 0.26 0.181 … ## $ fractal_dimension_mean : num 0.0787 0.0567 0.06 0.0974 0.0588 … ## $ radius_se : num 1.095 0.543 0.746 0.496 0.757 … ## $ texture_se : num 0.905 0.734 0.787 1.156 0.781 … ## $ perimeter_se : num 8.59 3.4 4.58 3.44 5.44 … ## $ area_se : num 153.4 74.1 94 27.2 94.4 … ## $ smoothness_se : num 0.0064 0.00522 0.00615 0.00911 0.01149 … ## $ compactness_se : num 0.049 0.0131 0.0401 0.0746 0.0246 … ## $ concavity_se : num 0.0537 0.0186 0.0383 0.0566 0.0569 … ## $ concave.points_se : num 0.0159 0.0134 0.0206 0.0187 0.0188 … ## $ symmetry_se : num 0.03 0.0139 0.0225 0.0596 0.0176 … ## $ fractal_dimension_se : num 0.00619 0.00353 0.00457 0.00921 0.00511 … ## $ radius_worst : num 25.4 25 23.6 14.9 22.5 … ## $ texture_worst : num 17.3 23.4 25.5 26.5 16.7 … ## $ perimeter_worst : num 184.6 158.8 152.5 98.9 152.2 … ## $ area_worst : num 2019 1956 1709 568 1575 … ## $ smoothness_worst : num 0.162 0.124 0.144 0.21 0.137 … ## $ compactness_worst : num 0.666 0.187 0.424 0.866 0.205 … ## $ concavity_worst : num 0.712 0.242 0.45 0.687 0.4 … ## $ concave.points_worst : num 0.265 0.186 0.243 0.258 0.163 … ## $ symmetry_worst : num 0.46 0.275 0.361 0.664 0.236 … ## $ fractal_dimension_worst: num 0.1189 0.089 0.0876 0.173 0.0768 … To visualize all the variable in the data frame data_1 <- data_cancer %>% as.data.frame() %>% select_if(is.numeric) %>% gather(key = "variable", value = "value") ggplot(data_1, aes(value)) + geom_density() + facet_wrap(~variable) # This visualization reprsent which data require feature scaling : concave points, concave points, fractal dimensiona, #smoothness se, We have all the data in the numeric form, except diagnosis which is M and B Lets convert this into numeric only data_cancer$diagnosis <- factor(data_cancer$diagnosis, levels = c("M","B"), labels = c(0,1)) now converting facrtors to character and then character to numeric, if we convert this directly to numeric it will give errors data_cancer$diagnosis <- as.character(data_cancer$diagnosis) data_cancer$diagnosis <- as.numeric(data_cancer$diagnosis) str(data_cancer) ## 'data.frame': 569 obs. of 32 variables: ## $ id : int 842302 842517 84300903 84348301 84358402 843786 844359 84458202 844981 84501001 … ## $ diagnosis : num 0 0 0 0 0 0 0 0 0 0 … ## $ radius_mean : num 18 20.6 19.7 11.4 20.3 … ## $ texture_mean : num
Regression Model in R Programming
Regression Model In R Programming Regression Model In R Programming Sachin Sharma 10/7/2021 library(tidyverse) ## — Attaching packages ————————————— tidyverse 1.3.1 — ## v ggplot2 3.3.5 v purrr 0.3.4 ## v tibble 3.1.4 v dplyr 1.0.7 ## v tidyr 1.1.3 v stringr 1.4.0 ## v readr 2.0.1 v forcats 0.5.1 ## — Conflicts —————————————— tidyverse_conflicts() — ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag() library(ggplot2) library(naniar) library(dplyr) library(datasets) library(tinytex) library(DT) data("mtcars") head(mtcars) ## mpg cyl disp hp drat wt qsec vs am gear carb ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 ## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 Transform certain variables into factors mtcars$cyl <- factor(mtcars$cyl) mtcars$am <- factor(mtcars$am,labels=c("Automatic","Manual")) mtcars$vs <- factor(mtcars$vs) mtcars$gear <- factor(mtcars$gear) mtcars$carb <- factor(mtcars$carb) boxplot(mpg ~ am, data = mtcars, col = (c("purple","red")), ylab = "Miles Per Gallon", xlab = "Type of Transmission", main = "MPG Vs AM") aggregate(mpg~am, data = mtcars, mean) ## am mpg ## 1 Automatic 17.14737 ## 2 Manual 24.39231 Difference of MPG between Automatic and Manual 24.39231 – 17.14737 ## [1] 7.24494 Therefore, we can see that the Manual cars have an MPG of 7.245 (approx.) more than automatic cars We can now use a t-test here automatic_car <- mtcars[mtcars$am == "Automatic",] manual_car <- mtcars[mtcars$am == "Manual",] t.test(automatic_car$mpg, manual_car$mpg) ## ## Welch Two Sample t-test ## ## data: automatic_car$mpg and manual_car$mpg ## t = -3.7671, df = 18.332, p-value = 0.001374 ## alternative hypothesis: true difference in means is not equal to 0 ## 95 percent confidence interval: ## -11.280194 -3.209684 ## sample estimates: ## mean of x mean of y ## 17.14737 24.39231 We can see that the p-value is 0.001374, thus we can state this is a significant difference. Now to quantify this, we can use the following code : model_1 <- lm(mpg ~ am, data = mtcars) summary(model_1) ## ## Call: ## lm(formula = mpg ~ am, data = mtcars) ## ## Residuals: ## Min 1Q Median 3Q Max ## -9.3923 -3.0923 -0.2974 3.2439 9.5077 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 17.147 1.125 15.247 1.13e-15 *** ## amManual 7.245 1.764 4.106 0.000285 *** ## — ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 4.902 on 30 degrees of freedom ## Multiple R-squared: 0.3598, Adjusted R-squared: 0.3385 ## F-statistic: 16.86 on 1 and 30 DF, p-value: 0.000285 Lets see with the help of corrplot , to check the correlation among the variables with mpg. Before plotting the corrplot, we will check the structure of the data ; df_1 <- subset(mtcars, select = c(mpg,cyl,disp,hp,drat,wt,qsec,vs)) head(df_1) ## mpg cyl disp hp drat wt qsec vs ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 ## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 str(df_1) ## 'data.frame': 32 obs. of 8 variables: ## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 … ## $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 … ## $ disp: num 160 160 108 258 360 … ## $ hp : num 110 110 93 110 175 105 245 62 95 123 … ## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 … ## $ wt : num 2.62 2.88 2.32 3.21 3.44 … ## $ qsec: num 16.5 17 18.6 19.4 17 … ## $ vs : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 1 2 2 2 … Here we can see that, cyl and vs columns are in factor, we will now convert this into numeric to plot corrplot and check the correlation. df_1$cyl <- as.character(df_1$cyl) df_1$cyl <- as.numeric(df_1$cyl) df_1$vs <- as.character(df_1$vs) df_1$vs <- as.numeric(df_1$vs) # Now we can check the structure of the data again str(df_1) ## 'data.frame': 32 obs. of 8 variables: ## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 … ## $ cyl : num 6 6 4 6 8 6 8 4 4 6 … ## $ disp: num 160 160 108 258 360 … ## $ hp : num 110 110 93 110 175 105 245 62 95 123 … ## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 … ## $ wt : num 2.62 2.88 2.32 3.21 3.44 … ## $ qsec: num 16.5 17 18.6 19.4 17 … ## $ vs : num 0 0 1 1 0 1 0 1 1 1 … Now we can see that all the columns are in numeric, now we can plot wit the help of ggcorrplot and corrplot to check the correlation : library(ggcorrplot) r <- cor(df_1) ggcorrplot(r,method = "circle", type = c("upper"), legend.title = "Corrplot MTCARS") ## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = ## "none")` instead. library(corrplot) ## corrplot 0.90 loaded r <- cor(df_1) corrplot(r, method = "circle") model_2 <- lm(mpg~am + cyl + disp + hp + wt, data = mtcars) anova(model_1, model_2) ## Analysis of Variance Table ## ## Model 1: mpg ~ am ## Model 2: mpg ~ am + cyl + disp + hp + wt ## Res.Df RSS Df Sum of Sq F Pr(>F) ## 1 30 720.90 ## 2
DPLYR Tutorial – Data Manipulation using DPLYR Package in R Programming
DPLYR-TUTORIAL—Data-Manipulation-with-DPLYR-in-R- DPLYR-TUTORIAL—Data-Manipulation-with-DPLYR-in-R- Sachin Sharma August 31, 2021 Why to use dplyr ? It is really useful for data exploration and transformation Fast while dealing with data frames Functionality of dplyr It is useful while dealing with : ‘select’,‘filter’, ‘mutate’ , ’ arrange’ , ‘summarise’, which can be use as five basic verbs Can be very useful in handling inner joins, left join, semi-join, anti-join # loading packages library(dplyr) ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union library(datasets) #install.packages("hflights") library(hflights) # Lets explore data data("hflights") head(hflights) ## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum ## 5424 2011 1 1 6 1400 1500 AA 428 ## 5425 2011 1 2 7 1401 1501 AA 428 ## 5426 2011 1 3 1 1352 1502 AA 428 ## 5427 2011 1 4 2 1403 1513 AA 428 ## 5428 2011 1 5 3 1405 1507 AA 428 ## 5429 2011 1 6 4 1359 1503 AA 428 ## TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin Dest Distance ## 5424 N576AA 60 40 -10 0 IAH DFW 224 ## 5425 N557AA 60 45 -9 1 IAH DFW 224 ## 5426 N541AA 70 48 -8 -8 IAH DFW 224 ## 5427 N403AA 70 39 3 3 IAH DFW 224 ## 5428 N492AA 62 44 -3 5 IAH DFW 224 ## 5429 N262AA 64 45 -7 -1 IAH DFW 224 ## TaxiIn TaxiOut Cancelled CancellationCode Diverted ## 5424 7 13 0 0 ## 5425 6 9 0 0 ## 5426 5 17 0 0 ## 5427 9 22 0 0 ## 5428 9 9 0 0 ## 5429 6 13 0 0 ‘as_tibble’ creates a “a local data frame” Tibble data frame will print first ten rows and total columns which fit to the screen in a systematic manner in comparison to raw data # Convert to tibble flights <- as_tibble(hflights) flights ## # A tibble: 227,496 x 21 ## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum ## <int> <int> <int> <int> <int> <int> <chr> <int> ## 1 2011 1 1 6 1400 1500 AA 428 ## 2 2011 1 2 7 1401 1501 AA 428 ## 3 2011 1 3 1 1352 1502 AA 428 ## 4 2011 1 4 2 1403 1513 AA 428 ## 5 2011 1 5 3 1405 1507 AA 428 ## 6 2011 1 6 4 1359 1503 AA 428 ## 7 2011 1 7 5 1359 1509 AA 428 ## 8 2011 1 8 6 1355 1454 AA 428 ## 9 2011 1 9 7 1443 1554 AA 428 ## 10 2011 1 10 1 1443 1553 AA 428 ## # … with 227,486 more rows, and 13 more variables: TailNum <chr>, ## # ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>, DepDelay <int>, ## # Origin <chr>, Dest <chr>, Distance <int>, TaxiIn <int>, TaxiOut <int>, ## # Cancelled <int>, CancellationCode <chr>, Diverted <int> Lets use filter to understand it , If we want to view all flights on February 1 we can use the following two methods METHOD – I flights[flights$Month ==2 & flights$DayofMonth ==1, ] ## # A tibble: 577 x 21 ## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum ## <int> <int> <int> <int> <int> <int> <chr> <int> ## 1 2011 2 1 2 1401 1539 AA 428 ## 2 2011 2 1 2 NA NA AA 460 ## 3 2011 2 1 2 NA NA AA 533 ## 4 2011 2 1 2 NA NA AA 1121 ## 5 2011 2 1 2 1746 2109 AA 1294 ## 6 2011 2 1 2 NA NA AA 1436 ## 7 2011 2 1 2 1032 1358 AA 1700 ## 8 2011 2 1 2 NA NA AA 1820 ## 9 2011 2 1 2 558 912 AA 1994 ## 10 2011 2 1 2 1820 2112 AS 731 ## # … with 567 more rows, and 13 more variables: TailNum <chr>, ## # ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>, DepDelay <int>, ## # Origin <chr>, Dest <chr>, Distance <int>, TaxiIn <int>, TaxiOut <int>, ## # Cancelled <int>, CancellationCode <chr>, Diverted <int> METHOD – II Using Filter filter(flights, flights$Month ==2 , flights$DayofMonth ==1) ## # A tibble: 577 x 21 ## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum ## <int> <int> <int> <int> <int> <int> <chr> <int> ## 1 2011 2 1 2 1401 1539 AA 428 ## 2 2011 2 1 2 NA NA AA 460 ## 3 2011 2 1 2 NA NA AA 533 ## 4 2011 2 1 2 NA NA AA 1121 ## 5 2011 2 1 2 1746 2109 AA 1294 ## 6 2011 2 1 2 NA NA AA 1436 ## 7 2011 2 1 2 1032 1358 AA 1700 ## 8 2011 2 1 2 NA NA AA 1820 ## 9 2011 2 1 2 558 912 AA 1994 ## 10 2011 2 1 2 1820 2112 AS 731 ## # … with 567 more rows, and 13 more variables: TailNum <chr>, ## # ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>, DepDelay <int>, ## # Origin <chr>, Dest <chr>, Distance <int>, TaxiIn <int>, TaxiOut <int>, ## # Cancelled <int>, CancellationCode <chr>, Diverted <int> If we want to use AND that means applying condition on two attributes filter(flights, UniqueCarrier =="AA" | UniqueCarrier == "UA" ) ## # A tibble: 5,316 x 21 ## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum ## <int> <int> <int> <int> <int> <int> <chr> <int> ## 1 2011 1 1 6 1400 1500 AA 428 ## 2 2011 1 2 7 1401 1501 AA 428 ## 3 2011 1 3 1 1352 1502 AA 428 ## 4 2011 1 4 2 1403 1513 AA 428 ## 5 2011 1 5 3 1405 1507 AA 428 ## 6 2011 1 6 4 1359 1503 AA 428 ## 7 2011 1 7 5 1359 1509 AA 428 ## 8 2011 1 8 6 1355
MID TERM ICSE CLASS X
[watupro 45]
CLASS XI – TERM 1 – TEST -2 (CBSE)
[watupro 44]
CLASS XI – MID TERM – MOCK TEST – I (CBSE)
[watupro 43]
Class X – Arithmetic Progression
[watupro 42]