I'm using R Learner in Knime. I want to discretize a matrix, which is the following:
> my_matrix= as(knime.in,"matrix");
> dput(head(my_matrix, 5))
structure(c("KS", "OH", "NJ", "OH", "OK", "128", "107", "137",
" 84", " 75", "415", "415", "415", "408", "415", "No", "No",
"No", "Yes", "Yes", "Yes", "Yes", "No", "No", "No", "25", "26",
" 0", " 0", " 0", "265.1", "161.6", "243.4", "299.4", "166.7",
"110", "123", "114", " 71", "113", "45.07", "27.47", "41.38",
"50.90", "28.34", "197.4", "195.5", "121.2", " 61.9", "148.3",
" 99", "103", "110", " 88", "122", "16.78", "16.62", "10.30",
" 5.26", "12.61", "244.7", "254.4", "162.6", "196.9", "186.9",
" 91", "103", "104", " 89", "121", "11.01", "11.45", " 7.32",
" 8.86", " 8.41", "10.0", "13.7", "12.2", " 6.6", "10.1", " 3",
" 3", " 5", " 7", " 3", "2.70", "3.70", "3.29", "1.78", "2.73",
"1", "1", "0", "2", "3", "False", "False", "False", "False",
"False"), .Dim = c(5L, 20L), .Dimnames = list(c("Row0", "Row1",
"Row2", "Row3", "Row4"), c("State", "Account length", "Area code",
"International plan", "Voice mail plan", "Number vmail messages",
"Total day minutes", "Total day calls", "Total day charge", "Total eve minutes",
"Total eve calls", "Total eve charge", "Total night minutes",
"Total night calls", "Total night charge", "Total intl minutes",
"Total intl calls", "Total intl charge", "Customer service calls",
"Churn")))
I'm using the following code to discretize the matrix:
require(arules)
#require(arulesViz)
my_matrix= as(knime.in,"matrix");
my_rows= nrow(my_matrix);
my_cols= ncol(my_matrix);
#discretize(x, method="interval", categories = 3, labels = NULL,
# ordered=FALSE, onlycuts=FALSE, ...)
typeof(my_matrix)
vector = my_matrix[,2]
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))
my_matrix[,3] = ...
etc...
In corrispondence of the line of code:
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))
I get the following error:
Error in seq.default(from = min(x, na.rm = TRUE), to = max(x, na.rm = TRUE), : 'from' cannot be NA, NaN or infinite
If I put "sum(is.na(vector)) here:
vector = my_matrix[,2]
sum(is.na(vector))
my_matrix[,2] = discretize(vector, method="interval", categories = 3, labels=c("length0","length1","length2"))
I get:
> sum(is.na(vector))
[1] 0
so I have no NA element in the vector. Anyway, typeof(matrix) is "character". If I print the vector, I get the following:
> vector = my_matrix[,2]
> sum(is.na(vector))
[1] 0
> head(vector, 20)
Row0 Row1 Row2 Row3 Row4 Row5 Row6 Row7 Row8 Row9 Row10 Row11 Row12
"128" "107" "137" " 84" " 75" "118" "121" "147" "117" "141" " 65" " 74" "168"
Row13 Row14 Row15 Row16 Row17 Row18 Row19
" 95" " 62" "161" " 85" " 93" " 76" " 73"
The problem is that you vector consists of strings. Ideally you solve this problem in knime. Nodes for this kind of conversions do exist.
However you can also replace
vector = my_matrix[,2]
by
vector = as.numeric(my_matrix[,2])