I'm getting this error:
Something is wrong; all the Accuracy metric values are missing:
Accuracy Kappa
Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA
Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA
NA's :5 NA's :5
Error in train.default(x, y, weights = w, ...) : Stopping
In addition: Warning message:
In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
There were missing values in resampled performance measures.
The first link suggests that the levels of the response variable cannot be 0
and 1
. This is not the case in my data:
R> str(test$y)
Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
R> levels(test$y)
[1] "No" "Yes"
So, I'm not sure what's going on.
test <- structure(list(y = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("No", "Yes"), class = "factor"), x1 = structure(c(6L,
40L, 26L, 7L, 18L, 9L, 26L, 36L, 23L, 16L, 6L, 20L, 23L, 26L,
41L, 20L, 31L, 7L, 2L, 2L, 18L, 2L, 12L, 9L, 40L, 40L, 14L, 8L,
2L, 20L, 15L, 12L, 8L, 17L, 17L, 21L, 18L, 32L, 2L, 2L), .Label = c("Accommodation and Restaurant Services",
"Admin/Support Services", "Agriculture", "Arts, Entertainment, and Rec.",
"Construction: Heavy and Civil Engineering", "Construction: of Buildings",
"Construction: Specialty Trade Contractors", "EDU Services",
"Finance / Insurance", "Fishing, Hunting, Trapping", "Forestry & Logging",
"Health Care and Social Assistance", "Information", "Management of Companies and Enterprises",
"Manufacturing: Food/Bev/Textile", "Manufacturing: Metals/Machinery/Computers/Appliances",
"Manufacturing: Wood/Paper/Chemical/Mineral", "Merchandise Trade",
"Mining, Quarrying, and Oil and Gas Extraction", "Other Services (Blue Collar)",
"Prof./Sci./Tech: Acct / Tax", "Prof./Sci./Tech: Advertising / Media",
"Prof./Sci./Tech: Architecture / Eng.", "Prof./Sci./Tech: Computer Design",
"Prof./Sci./Tech: Law", "Prof./Sci./Tech: Mgmt Consulting", "Prof./Sci./Tech: Other",
"Prof./Sci./Tech: R&D", "Prof./Sci./Tech: Specialized Design",
"Public Admin.", "Real Estate", "Retail Trade", "Support Agriculture",
"Transportation", "Unknown", "Utilities", "Warehousing", "Waste Management & Remediation Services",
"Wholesale Trade: Brokers", "Wholesale Trade: Durable Goods",
"Wholesale Trade: NonDurable Goods"), class = "factor"), x2 = structure(c(36L,
11L, 35L, 46L, 5L, 10L, 37L, 41L, 11L, 5L, 5L, 10L, 20L, 10L,
5L, 5L, 45L, 20L, 11L, 10L, 18L, 35L, 5L, 6L, 41L, 5L, 44L, 36L,
39L, 10L, 44L, 8L, 34L, 15L, 39L, 10L, 18L, 19L, 35L, 11L), .Label = c("AK",
"AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "HI",
"IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI",
"MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV",
"NY", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT",
"VA", "VT", "WA", "WI", "WV", "WY"), class = "factor"), x3 = c(0.004714,
0, 0.015551, 0.360246999999988, 5e-04, 0.035714, 0.357143, 0.00591043019290109,
0.138889, 0.028846, 0.0075, 0.00051, 0.006329, 0.065789, 0.1125,
0.003125, 0.003889, 0.000391, 0.011905, 0.004, 0, 0.00025, 0.005,
0.076923, 0.149254, 0.0220719438793245, 0.360246999999988, 0.057692,
0, 0.015625, 0.000714, 0, 0.001087, 0.006135, 0.003846, 0.066667,
0.009091, 0, 0.360246999999988, 0.012821), x4 = c(3.69626899674553,
0, 4.34824643385123, 4.22834902062364, 2.94001815500766, 3.27207378750001,
4.61543448110941, 4.56919828334781, 4.32498170308737, 3.73719264270474,
3.87511916546257, 1.70757017609794, 3.76499759928488, 3.7635028654676,
4.15094055396548, 3.43949059038968, 3.70423633730879, 3.18864729599972,
2.85186960072977, 2.37291200297011, 0, 2.69983772586725, 3.23829706787539,
3.17695898058691, 4.32314893008404, 0, 4.64518638929519, 3.17405980772503,
0, 2.5092025223311, 2.47856649559384, 0, 2.06818586174616, 4.08439751914115,
3.50906804501716, 3.02160271602824, 2.71349054309394, 0, 4.6020708485543,
2.79657433321043), x5 = c(472, 502, 506, 510, 497, 493, 515,
542, 557, 465, 480, 369.618950156498, 518, 571, 512, 520, 464,
578, 500, 526, 489.830047438596, 345, 664.964755505884, 546,
505, 572, 540, 567, 473, 575, 558, 509.58218597766, 579, 616,
561, 581, 291, 415.846613389669, 476, 442), x6 = c(374, 482,
491, 540, 534, 493, 514, 570, 577, 485, 488, 627, 542, 529, 445,
531, 456, 535, 381, 586, 474.392596434054, 484, 487.854513298151,
518, 524, 582, 530, 571, 582.582737417662, 572, 592, 477, 585,
594, 574, 609, 389, 581.722630168064, 550, 458), x7 = c(5.8e-05,
0, 0.015551, 0.01, 0, 0, 0.0683816249999983, -0.00050051658067362,
0.068194, 0.056615, 0, 0, 0.001097, 0, 0.0683816249999983, 0,
0.002361, 0.000781, 0.021667, 0, 0, 0, 0, 0.001154, 0.001, -0.000657947357427473,
0, 0, 0, 0, 0, 0, 0, 0.001479, 0.001269, 0.005333, 0.000455,
0, 0, 0), x8 = c(14, 13, 53, 24, 8, 13, 13, 20, 17, 35, 19, 11,
42, 15, 33, 1, 20, 6, 24, 3, 14, 3, 3, 17, 42, 8, 4, 0, 5, 4,
10, 5, 8, 41, 31, 6, 2, 18, 7, 7), x9 = c(18, 2, 49, 19, 14,
8, 7, 6, 7, 21, 19, 1, 34, 2, 24, 3, 30, 5, 3, 12, 9, 4, 2, 9,
59, 15, 7, 0, 20, 1, 6, 13, 1, 64, 34, 18, 12, 0, 0, 6), x10 = c(48,
68.8884165199473, 63, 54, 78, 80, 77.3502747403963, 74, 79, 71,
76.7682937433346, 65.0624751538981, 63, 80, 41, 81.4257054732527,
67, 78, 80, 73, 52.5390991618267, 60.8813703575155, 66, 72, 64,
61.266324949851, 43.2207804060158, 80, 61.708917114202, 80, 75,
73.3412226739437, 80, 78, 57, 78, 23, 30.321279640657, 69.1391208799255,
60.9766796474371), x11 = c(4.62, 0.81, 1.98, 1.51, 1.51, 1.2,
0.74, 1.2, 4.04, 2.06, 1.43, 1.51, 4.16, 0.81, 0.81, 1.82, 2.1,
0.89, 0.73, 0.97, 20.49, 1.51, 1.51, 4.09, 1.33, 0.89, 1.59,
1.43, 4.54, 1.51, 1.2, 1.04, 1.59, 2.57, 4.4, 1.28, 0.89, 17.94,
1.29, 1.59), x12 = c(-3, -44.4574826440087, 1, 5, 2, 2, 39.0861520260711,
14, 0, -6, 40.5638314058397, 22.0124501206663, 3, 12, 27, 7.55072978911628,
5, -1, -12, 0, 14.5217398963732, -2.06782290930381, -13, 4, 1,
39.251983622172, 0, 0, 33.2355632837177, 0, 6, 20.3416928763606,
40.7136165846826, -2, 7, 0, 9, 0.622995283657772, -6.64967287401836,
-3.6632790085156)), .Names = c("y", "x1", "x2", "x3", "x4", "x5",
"x6", "x7", "x8", "x9", "x10", "x11", "x12"), row.names = c(59110L,
266133L, 110275L, 271642L, 54361L, 54818L, 59197L, 94902L, 80531L,
291L, 51460L, 228662L, 174960L, 27500L, 105584L, 132839L, 233895L,
194802L, 123435L, 165332L, 318615L, 133731L, 256878L, 99780L,
31551L, 106032L, 280841L, 130066L, 136252L, 29868L, 282962L,
55762L, 312670L, 152593L, 50020L, 220877L, 13104L, 20888L, 319386L,
229603L), class = "data.frame")
Based on comments both here and on github/caret, I have updated the code. The non-parallel forest now works, but the parallel forests do not.
test$x7 <- NULL # remove low variance "dummy" variable
# based on comments on github (link above).
library(caret)
library(randomForest)
library(party) # conditional RF
library(kernlab)
library(parallel)
library(doParallel)
t_control <- trainControl(method= "repeatedcv", number= 10,
repeats= 1)
mtry_def <- floor(sqrt(ncol(test)))
t_grid <- expand.grid(mtry= c(mtry_def/2, mtry_def, 2 * mtry_def))
set.seed(14387)
## works without parallel (after removing options per @topepo):
rf1 <- train(y ~ ., data= test,
method= "cforest", trControl= t_control,
tuneGrid= t_grid) # remove verbose, importance, proximity
## doesn't work with parallel:
cl <- makeCluster(detectCores() - 1)
registerDoParallel(cl)
rf1 <- train(y ~ ., data= test,
method= "cforest", trControl= t_control,
tuneGrid= t_grid, allowParallel= TRUE) # same errors as prior to edit
rf2 <- train(y ~ ., data= test,
method= "parRF", trControl= t_control, verbose= FALSE,
tuneGrid= t_grid, allowParallel= TRUE, proximity= FALSE,
importance= TRUE) # same errors as prior to edit
# moving from method= "parRF" --> method= "rf" does work:
rf3 <- train(y ~ ., data= test,
method= "rf", trControl= t_control, verbose= FALSE,
tuneGrid= t_grid, allowParallel= TRUE, proximity= FALSE,
importance= TRUE)
stopCluster(cl)
# defaults (ie-- outside caret) work
rf3a <- randomForest(y ~ ., data= test, mtry= 3, importance=TRUE)
rf3b <- cforest(y ~ ., data= test, controls= cforest_control(mtry= 3))
# updated sessionInfo() -- AM running on a different computer
R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C LC_TIME=English_United States.1252
attached base packages:
[1] stats4 grid parallel stats graphics grDevices utils datasets methods base
other attached packages:
[1] kernlab_0.9-22 party_1.0-23 strucchange_1.5-1 sandwich_2.3-4 zoo_1.7-12 modeltools_0.2-21
[7] mvtnorm_1.0-3 randomForest_4.6-10 caret_6.0-52 ggplot2_1.0.1 lattice_0.20-33 doParallel_1.0.8
[13] iterators_1.0.7 foreach_1.4.2
loaded via a namespace (and not attached):
[1] Rcpp_0.12.1 compiler_3.2.2 nloptr_1.0.4 plyr_1.8.3 class_7.3-13 tools_3.2.2
[7] digest_0.6.8 lme4_1.1-9 nlme_3.1-122 gtable_0.1.2 mgcv_1.8-7 Matrix_1.2-2
[13] brglm_0.5-9 SparseM_1.7 coin_1.1-0 proto_0.3-10 e1071_1.6-7 BradleyTerry2_1.0-6
[19] stringr_1.0.0 gtools_3.5.0 MatrixModels_0.4-1 nnet_7.3-11 survival_2.38-3 multcomp_1.4-1
[25] TH.data_1.0-6 minqa_1.2.4 reshape2_1.4.1 car_2.1-0 magrittr_1.5 scales_0.3.0
[31] codetools_0.2-14 MASS_7.3-43 splines_3.2.2 pbkrtest_0.4-2 colorspace_1.2-6 quantreg_5.19
[37] stringi_0.5-5 munsell_0.4.2
#### original sessionInfo()
R> sessionInfo()
R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C LC_TIME=English_United States.1252
attached base packages:
[1] parallel stats4 grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] doParallel_1.0.8 iterators_1.0.7 foreach_1.4.2 kernlab_0.9-22 party_1.0-23 strucchange_1.5-1
[7] sandwich_2.3-3 zoo_1.7-12 modeltools_0.2-21 mvtnorm_1.0-3 randomForest_4.6-10 caret_6.0-52
[13] ggplot2_1.0.1 lattice_0.20-33
loaded via a namespace (and not attached):
[1] Rcpp_0.12.1 compiler_3.2.2 nloptr_1.0.4 plyr_1.8.3 class_7.3-13 tools_3.2.2
[7] digest_0.6.8 lme4_1.1-9 gtable_0.1.2 nlme_3.1-121 mgcv_1.8-7 Matrix_1.2-2
[13] SparseM_1.7 brglm_0.5-9 coin_1.1-0 proto_0.3-10 e1071_1.6-7 BradleyTerry2_1.0-6
[19] stringr_1.0.0 MatrixModels_0.4-1 gtools_3.5.0 nnet_7.3-10 survival_2.38-3 multcomp_1.4-1
[25] TH.data_1.0-6 minqa_1.2.4 car_2.1-0 reshape2_1.4.1 magrittr_1.5 scales_0.3.0
[31] codetools_0.2-14 splines_3.2.2 MASS_7.3-43 pbkrtest_0.4-2 colorspace_1.2-6 quantreg_5.19
[37] stringi_0.5-5 munsell_0.4.2
Any help would be greatly appreciated, thanks!!
When I run the first cforest
model, I can see that "In addition: There were 31 warnings (use warnings() to see them)"
. These say that
unused arguments (verbose = FALSE, proximity = FALSE, importance = TRUE)
These are arguments to the randomForest
function and not cforest
. Removing them removes the errors.
Update for the update:
This looks like confusion over the ...
and where allowParallel
can be invoked. When running the code for rf1
, I get these warnings:
unused argument (allowParallel = TRUE)
Looking at ?train
and ?cforest
, neither has that argument; it is in trainControl
.
Here is the confusing part: running rf3
with allowParallel
as an argument to train
does not generate an error. This is because cforest
does not have the ellipses and randomForest
does:
> names(formals(cforest))
[1] "formula" "data" "subset" "weights" "controls" "xtrafo"
[7] "ytrafo" "scores"
> names(formals(randomForest:::randomForest.default))
[1] "x" "y" "xtest" "ytest"
[5] "ntree" "mtry" "replace" "classwt"
[9] "cutoff" "strata" "sampsize" "nodesize"
[13] "maxnodes" "importance" "localImp" "nPerm"
[17] "proximity" "oob.prox" "norm.votes" "do.trace"
[21] "keep.forest" "corr.bias" "keep.inbag" "..."
So, for rf1
there is no "bottomless pit" to send the inappropriate argument (allowParallel
) but for rf3
there is a sequence of ...
arguments and none of the functions ever have a terminal test to see if allowParallel
is an inappropriate argument.
tl;dr
Pass allowParallel
to trainControl
and not train
.
Max