/snippetz

Snippet functions I use frequently to simplify life

Primary LanguagePython

Snippet Functions

This repo consists of snippet functions that I used to make life easier while working in data science projects.

Encoding

categorical_cols = ['a', 'b', 'c', 'd'] 

from sklearn.preprocessing import LabelEncoder
# instantiate labelencoder object
le = LabelEncoder()

# apply le on categorical feature columns
data[categorical_cols] = data[categorical_cols].apply(lambda col: le.fit_transform(col))    
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()

#One-hot-encode the categorical columns.
#Unfortunately outputs an array instead of dataframe.
array_hot_encoded = ohe.fit_transform(data[categorical_cols])

#Convert it to df
data_hot_encoded = pd.DataFrame(array_hot_encoded, index=data.index)

#Extract only the columns that didnt need to be encoded
data_other_cols = data.drop(columns=categorical_cols)

#Concatenate the two dataframes : 
data_out = pd.concat([data_hot_encoded, data_other_cols], axis=1)