Confusion matrix error when array dimensions are of size 3

blue-sky picture blue-sky · May 18, 2017 · Viewed 7.2k times · Source

This code :

from pandas_ml import ConfusionMatrix
y_actu = [1,2]
y_pred = [1,2]
cm = ConfusionMatrix(y_actu, y_pred)
cm.print_stats()

prints :

population: 2
P: 1
N: 1
PositiveTest: 1
NegativeTest: 1
TP: 1
TN: 1
FP: 0
FN: 0
TPR: 1.0
TNR: 1.0
PPV: 1.0
NPV: 1.0
FPR: 0.0
FDR: 0.0
FNR: 0.0
ACC: 1.0
F1_score: 1.0
MCC: 1.0
informedness: 1.0
markedness: 1.0
prevalence: 0.5
LRP: inf
LRN: 0.0
DOR: inf
FOR: 0.0
/opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/bcm.py:332: RuntimeWarning: divide by zero encountered in double_scalars
  return(np.float64(self.TPR) / self.FPR)

This is expected.

Modifying code to :

from pandas_ml import ConfusionMatrix
y_actu = [1,2,3]
y_pred = [1,2,3]
cm = ConfusionMatrix(y_actu, y_pred)
cm.print_stats()

change I made is :

y_actu = [1,2,3]
y_pred = [1,2,3]

results in error :

OrderedDict([('Accuracy', 1.0), ('95% CI', (0.29240177382128668, nan)), ('No Information Rate', 'ToDo'), ('P-Value [Acc > NIR]', 0.29629629629629622), ('Kappa', 1.0), ("Mcnemar's Test P-Value", 'ToDo')])

ValueErrorTraceback (most recent call last)
<ipython-input-30-d8c5dc2bea73> in <module>()
      3 y_pred = [1,2,3]
      4 cm = ConfusionMatrix(y_actu, y_pred)
----> 5 cm.print_stats()

/opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in print_stats(self, lst_stats)
    446         Prints statistics
    447         """
--> 448         print(self._str_stats(lst_stats))
    449 
    450     def get(self, actual=None, predicted=None):

/opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in _str_stats(self, lst_stats)
    427         }
    428 
--> 429         stats = self.stats(lst_stats)
    430 
    431         d_stats_str = collections.OrderedDict([

/opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in stats(self, lst_stats)
    390         d_stats = collections.OrderedDict()
    391         d_stats['cm'] = self
--> 392         d_stats['overall'] = self.stats_overall
    393         d_stats['class'] = self.stats_class
    394         return(d_stats)

/opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/cm.py in __getattr__(self, attr)
     33         Returns (weighted) average statistics
     34         """
---> 35         return(self._avg_stat(attr))

/opt/conda/lib/python3.5/site-packages/pandas_ml/confusion_matrix/abstract.py in _avg_stat(self, stat)
    509             v = getattr(binary_cm, stat)
    510             print(v)
--> 511             s_values[cls] = v
    512         value = (s_values * self.true).sum() / self.population
    513         return(value)

/opt/conda/lib/python3.5/site-packages/pandas/core/series.py in __setitem__(self, key, value)
    771         # do the setitem
    772         cacher_needs_updating = self._check_is_chained_assignment_possible()
--> 773         setitem(key, value)
    774         if cacher_needs_updating:
    775             self._maybe_update_cacher()

/opt/conda/lib/python3.5/site-packages/pandas/core/series.py in setitem(key, value)
    767                     pass
    768 
--> 769             self._set_with(key, value)
    770 
    771         # do the setitem

/opt/conda/lib/python3.5/site-packages/pandas/core/series.py in _set_with(self, key, value)
    809             if key_type == 'integer':
    810                 if self.index.inferred_type == 'integer':
--> 811                     self._set_labels(key, value)
    812                 else:
    813                     return self._set_values(key, value)

/opt/conda/lib/python3.5/site-packages/pandas/core/series.py in _set_labels(self, key, value)
    826         if mask.any():
    827             raise ValueError('%s not contained in the index' % str(key[mask]))
--> 828         self._set_values(indexer, value)
    829 
    830     def _set_values(self, key, value):

/opt/conda/lib/python3.5/site-packages/pandas/core/series.py in _set_values(self, key, value)
    831         if isinstance(key, Series):
    832             key = key._values
--> 833         self._data = self._data.setitem(indexer=key, value=value)
    834         self._maybe_update_cacher()
    835 

/opt/conda/lib/python3.5/site-packages/pandas/core/internals.py in setitem(self, **kwargs)
   3166 
   3167     def setitem(self, **kwargs):
-> 3168         return self.apply('setitem', **kwargs)
   3169 
   3170     def putmask(self, **kwargs):

/opt/conda/lib/python3.5/site-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
   3054 
   3055             kwargs['mgr'] = self
-> 3056             applied = getattr(b, f)(**kwargs)
   3057             result_blocks = _extend_blocks(applied, result_blocks)
   3058 

/opt/conda/lib/python3.5/site-packages/pandas/core/internals.py in setitem(self, indexer, value, mgr)
    685                         indexer.dtype == np.bool_ and
    686                         len(indexer[indexer]) == len(value)):
--> 687                     raise ValueError("cannot set using a list-like indexer "
    688                                      "with a different length than the value")
    689 

ValueError: cannot set using a list-like indexer with a different length than the value

Reading Assignment to containers in Pandas states "Using endemic lists is not allowed on assignment and is not recommended to do this at all." have I created an endemic list ? What is an endemic list ?

Answer

spies006 picture spies006 · May 22, 2017

I would recommend using confusion_matrix from scikit-learn. The other metrics that you mention such as Precision, Recall, F1-score are also available from sklearn.metrics.

>>> from sklearn.metrics import confusion_matrix
>>> y_actu = [1,2,3]
>>> y_pred = [1,2,3]
>>> confusion_matrix(y_actu, y_pred)
array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]])