ds
setwd("D:\\TYCS\\B1\\DS_16\\DataSets")
getwd()
bike_data = read.csv("day.csv",header=TRUE)
boxplot(bike_data[,c('temp','atemp','hum','windspeed')])
median_value<-median(bike_data$hum)
high<-mean(bike_data$hum) + 2* sd(bike_data$hum)
low<-mean(bike_data$hum) - 2 * sd(bike_data$hum)
bike_data$hum<-ifelse(bike_data$hum>high | bike_data$hum<low, median_value, bike_data$hum)
boxplot(bike_data[,c('temp','atemp','hum','windspeed')])
median_value<-median(bike_data$windspeed)
sd(bike_data$windspeed)
high<-mean(bike_data$windspeed) + 2* sd(bike_data$windspeed)
low<-mean(bike_data$windspeed) - 2* sd(bike_data$windspeed)
bike_data$windspeed<-ifelse(bike_data$windspeed>high | bike_data$windspeed<low, median_value,bike_data$windspeed)
getwd()
bike_data = read.csv("day.csv",header=TRUE)
boxplot(bike_data[,c('temp','atemp','hum','windspeed')])
median_value<-median(bike_data$hum)
high<-mean(bike_data$hum) + 2* sd(bike_data$hum)
low<-mean(bike_data$hum) - 2 * sd(bike_data$hum)
bike_data$hum<-ifelse(bike_data$hum>high | bike_data$hum<low, median_value, bike_data$hum)
boxplot(bike_data[,c('temp','atemp','hum','windspeed')])
median_value<-median(bike_data$windspeed)
sd(bike_data$windspeed)
high<-mean(bike_data$windspeed) + 2* sd(bike_data$windspeed)
low<-mean(bike_data$windspeed) - 2* sd(bike_data$windspeed)
bike_data$windspeed<-ifelse(bike_data$windspeed>high | bike_data$windspeed<low, median_value,bike_data$windspeed)
boxplot(bike_data[,c('temp','atemp','hum','windspeed')])
p-7
install.packages("caTools")
install.packages("dplyr")
install.packages("rpart")
install.packages("rpart.plot")
library(caTools)
library(dplyr)
library(rpart)
library(rpart.plot)
library(caret)
setwd("D:\\TYCS\\B1\\DS_16\\DataSets")
getwd()
x=read.csv("titanic.csv",header=TRUE)
newdata<-na.omit(x)
data=select(newdata,-Cabin)
sample_data=sample.split(data,SplitRatio=0.7)
train_data<-subset(data,sample_data==TRUE)
test_data<-subset(data,sample_data==FALSE)
Model<-glm(Survived~PassengerId+Pclass+Age+SibSp+Parch,data=train_data)
summary(Model)
predicted<-predict(Model,newdata=test_data)
test_data$predicted=ifelse(predicted>0.5,1,0)
actual=factor(test_data$Survived)
predicted=factor(test_data$predicted)
confusionMatrix(predicted,actual,mode='everything')
rtree <- rpart(Survived~PassengerId+Pclass+Age+SibSp+Parch,data=train_data)
y=predict(rtree,newdata=test_data)
test_data$y=ifelse(y>0.5,1,0)
rpart.plot(rtree,main="Decision Tree for Titanic Dataset")
a=factor(test_data$Survived)
p=factor(test_data$y)
confusionMatrix(p,a,mode='everything')
p-8
library(dplyr)
View(iris)
mydata=select(iris,c(1,2,3,4))
#cor(mydata)
mean(cor(mydata))
PCA=princomp(mydata)
summary(PCA)
PCA$loadings
PC=PCA$scores
cor(PC)
install.packages("factoextra")
library(factoextra)
get_eig(PCA)
fviz_eig(PCA,addlabels = TRUE)
fviz_pca_var(PCA,col.var="contrib")
fviz_pca_biplot(PCA,col.ind="Blue",geom="point")+labs(title="PCA",x="PC1",y="PC2")
p-6
#Linear regression
install.packages("caTools")
library(caTools)
head(mtcars)
a<- sample.split(mtcars$mpg,SplitRatio=0.7)
training_data <- mtcars[a,]
testing_data <- mtcars[!a,]
#testing_data
dim(training_data)
dim(testing_data)
plot(mpg~drat, data=mtcars)
Model=lm(mpg~drat, data=training_data)
t=summary(Model)
t
plot(mpg~drat, col="blue",cex=1.3,pch=16,data=training_data)
abline(Model)
#calculate MSE
mean(t$residuals^2)
Test=predict(Model,newdata=testing_data)
testing_data$Test=Test
View(testing_data)
result=predict(Model,data.frame(drat=3.90))
print(result)
#multiple regression
library(caret)
data(mtcars)
head(mtcars)
in_train <- createDataPartition(y=mtcars$mpg,p=0.7,list=FALSE)
training_data <- mtcars[in_train,]
testing_data <- mtcars[-in_train,]
Model=lm(mpg~cyl+disp,data=trainig_data)
model_summ<-summary(Model)
model_summ
#calculate MSE
mean(model_summ$residuals^2)
Test=predict(Model,newdata=testing_data)
testing_data$Test=Test
View(testing_data)
new<-data.frame(cyl=c(6),disp=c(160))
results=predict(Model,newdata=new)
print(results)
No comments