https://stackoverflow.com/questions/37292872/how-can-i-one-hot-encode-in-pythonpython
# transform a given column into one hot. Use prefix to have multiple dummies >>> import pandas as pd >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], 'B': ['b', 'a', 'c']}) >>> # Get one hot encoding of columns B ... >>> df A B 0 a b 1 b a 2 c c >>> one_hot = pd.get_dummies(df['B']) >>> # Drop columns B as it is now encoded ... >>> df = df.drop('B', axis=1) >>> # Join the encoded df ... >>> df = df.join(one_hot) >>> df A a b c 0 a 0 1 0 1 b 1 0 0 2 c 0 0 1
def one_hot(df, cols): """ @param df pandas DataFrame @param cols a list of columns to encode @return a DataFrame with one-hot encoding """ for each in cols: dummies = pd.get_dummies(df[each], prefix=each, drop_first=False) df = pd.concat([df, dummies], axis=1) return df
>>> from sklearn.preprocessing import OneHotEncoder >>> enc = OneHotEncoder() >>> enc.fit([[0, 0, 3], [1,1,0], [0,2,1], [1,0,2]]) OneHotEncoder(categorical_features='all', dtype=<class 'numpy.float64'>, handle_unknown='error', n_values='auto', sparse=True) >>> enc.n_values_ array([2, 3, 4]) >>> enc.feature_indices_ array([0, 2, 5, 9]) >>> enc.transform([[0,1,1]]) <1x9 sparse matrix of type '<class 'numpy.float64'>' with 3 stored elements in Compressed Sparse Row format> >>> enc.transform([[0,1,1]]).toarray() array([[ 1., 0., 0., 1., 0., 0., 1., 0., 0.]])
from sklearn.preprocessing import LabelBinarizer label_binarizer = LabelBinarizer() label_binarizer.fit(all_your_labels_list) # need to be global or remembered to use it later def one_hot_encode(x): """ One hot encode a list of sample labels. Return a one-hot encoded vector for each label. : x: List of sample Labels : return: Numpy array of one-hot encoded labels """ return label_binarizer.transform(x)