Contenu connexe
Similaire à מערכות לומדות תרגול 3 עצים (20)
Plus de Igor Kleiner (20)
מערכות לומדות תרגול 3 עצים
- 2. #buildfirstmodel logit
#buildmodel
set.seed(2017)
clogit= glm( over50k ~ . , family="binomial",data= ctrain)
summary(clogit)
#prediction andaccuracy on test
predictTest=predict(clogit,newdata=ctest,type = "response")
t(table(ctest$over50k,predictTest>= 0.5))
(2262+484)/sum(table(ctest$over50k,predictTest>=0.5))
#most frequentprediction
table(cdata$over50k)
24283/(24283+7695)
#AUC
ROCRpred= prediction(predictTest,ctest$over50k)
as.numeric(performance(ROCRpred,"auc")@y.values)
###################################################
# summarylogitaccuracy fort=0.5 0.859 auc=0.91 #
###################################################
#decisiontree
#buildmodel
- 3. set.seed(2017)
ctree = rpart( over50k ~ . , method="class",data= ctrain,minbucket=100)
prp(ctree)
#buildprediction
predictTest=predict(ctree,newdata=ctest,type = "class")
t(table(ctest$over50k,predictTest))
(2300+405)/sum(table(ctest$over50k,predictTest))
#accuracy=0.846
#rocr auc
predictTest=predict(ctree,newdata=ctest)
predictTest=predictTest[,2]
ROCRpred= prediction(predictTest,ctest$over50k)
as.numeric(performance(ROCRpred,"auc")@y.values)
##################################################################
# summarytree minbucket=100 accuracy fort=0.5 0.846 auc=0.855 #
##################################################################
#parameterstuningdecisiontree
set.seed(2017)
cartGrid = expand.grid( .cp= seq(0.001,0.1,0.001))
fitControl =trainControl( method="cv",number= 10 )
rezCV=train( over50k~ . , data = ctrain, method= "rpart",trControl = fitControl,tuneGrid=cartGrid )
rezCV
cvmod= rpart(over50k~.,data=ctrain,method="class",cp=0.001)
- 5. predictTest=predict(crf,newdata=ctest)
#accuracy calculation
t(table(ctest$over50k,predictTest))
(2426+214)/(sum(table(ctest$over50k,predictTest)))
#AUC calculation
predictTest=predict(crf,newdata=ctest,type="prob")
ROCRpred= prediction(predictTest[,2],ctest$over50k)
as.numeric(performance(ROCRpred,"auc")@y.values)
##################################################################
# auc=0.889 acc=0.826 #
##################################################################
#parameterstuningrandomforest
# make take a lot of time
metric<- "Accuracy"
control <- trainControl(method="cv",number=10,search="grid")
tunegrid<- expand.grid(.mtry=c(sqrt(ncol(ctrain))))
modellist<- list()
for (ntree inc(700,1000, 1200, 1400 )) {
for(mtry in c(2,3,4,5,6,7,9)) {
tunegrid= expand.grid(.mtry=mtry)
set.seed(2017)
fit<- train(over50k~.,data=ctrain[1:5000,],method="rf",metric=metric,tuneGrid=tunegrid,
trControl=control,ntree=ntree)
- 6. key<- toString(ntree*10000+mtry)
modellist[[key]]<- fit
print(c(ntree,mtry))
print(fit)
}
}
#bestrf
set.seed(2017)
crf = randomForest(over50k~ . , data = ctrain,ntree=1400,mtry=2)
predictTest=predict(crf,newdata=ctest)
t(table(ctest$over50k,predictTest))
(2427+217)/(sum(table(ctest$over50k,predictTest)))
predictTest=predict(crf,newdata=ctest,type="prob")
ROCRpred= prediction(predictTest[,2],ctest$over50k)
as.numeric(performance(ROCRpred,"auc")@y.values)
##################################################################
# auc=0.901 accuracy=0.827 #
##################################################################
#################################################################
#winnerlogit.validationset
- 7. set.seed(2017)
clogit= glm( over50k ~ . , family="binomial",data= ctrain)
summary(clogit)
#predictionandaccuracy on test
predictTest=predict(clogit,newdata=cvalidation,type ="response")
t(table(cvalidation$over50k,predictTest>= 0.5))
(2256+460)/sum(table(cvalidation$over50k,predictTest>=0.5))
#AUC
ROCRpred= prediction(predictTest,cvalidation$over50k)
as.numeric(performance(ROCRpred,"auc")@y.values)
################################
#baggingmtry=numberof features
set.seed(2017)
#buildmodel
crf = randomForest(over50k~ . , data = ctrain,mtry=12)
#predicton test
predictTest=predict(crf,newdata=ctest)
#accuracy calculation
t(table(ctest$over50k,predictTest))
(2398+214)/(sum(table(ctest$over50k,predictTest)))
- 8. #AUC calculation
predictTest=predict(crf,newdata=ctest,type="prob")
ROCRpred= prediction(predictTest[,2],ctest$over50k)
as.numeric(performance(ROCRpred,"auc")@y.values)
importance(crf)
##################################################################
# auc=0.848 acc=0.817 #
##################################################################
#####################
#boosting
install.packages("gbm")
library(gbm)
#transformdependentvariable to01 variable
ctrain$over50k=as.integer(ctrain$over50k)-1
ctest$over50k=as.integer(ctest$over50k)-1
#buildmodel
set.seed(2017)
cboost= gbm(over50k~ . , data = ctrain,distribution="bernoulli",n.trees=5000,interaction.depth=4)
#prediction
predictTest=predict(cboost,newdata=ctest,n.trees=5000,type='response')