I have a training data CSV with three columns (two for data and a third for targets) and I successfully predicted the target column for my test CSV. The problem is I need to inverse transform the results back to strings for further analysis. Below is the code and error.
from sklearn import datasets
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
df_train = pd.read_csv('/Users/justinchristensen/Documents/Python_Education/SKLearn/Path_Training_Data.csv')
df_test = pd.read_csv('/Users/justinchristensen/Documents/Python_Education/SKLearn/Path_Test_Data.csv')
#Separate columns in training data set
x_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:,-1:]
#Separate columns in test data set
x_test = df_test.iloc[:,:-1]
#Initiate classifier
clf = svm.SVC(gamma=0.001, C=100)
le = LabelEncoder()
#Transform strings into integers
x_train_encoded = x_train.apply(LabelEncoder().fit_transform)
y_train_encoded = y_train.apply(LabelEncoder().fit_transform)
x_test_encoded = x_test.apply(LabelEncoder().fit_transform)
#Fit the model into the classifier
clf.fit(x_train_encoded,y_train_encoded)
#Predict test values
y_pred = clf.predict(x_test_encoded)
The error
NotFittedError
Traceback (most recent call last)
<ipython-input-38-09840b0071d5> in <module>()
1
----> 2 y_pred_inverse = le.inverse_transform(y_pred)
~/anaconda3/lib/python3.6/site-packages/sklearn/preprocessing/label.py in inverse_transform(self, y)
146 y : numpy array of shape [n_samples]
147 """
--> 148 check_is_fitted(self, 'classes_')
149
150 diff = np.setdiff1d(y, np.arange(len(self.classes_)))
~/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any)
766
767 if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
--> 768 raise NotFittedError(msg % {'name': type(estimator).__name__})
769
770
NotFittedError: This LabelEncoder instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.
You need to use the same label object which you used for transforming your targets to get them back. Each time you use the Label Enocder you instantiated a new object. Use the same object.
Change the following line
y_train_encoded = y_train.apply(le().fit_transform)
y_test_encoded = y_test.apply(le().fit_transform)
Then use the same object to reverse the transformation. You can check the first example here in the documentation for reference as well.