data analytics model for spam sms classifier on the basis of the text/ message entered it can classify the ttext spam or non spam messages
- to classify the messages as spam or non-spam
- can be used in other projects using the model
Loading important Files/Modules
import pickle
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import string
#reading binary file from the model and vectorizer
tfidf = pickle.load(open('vectorizer.pkl', 'rb'))
model = pickle.load(open('model.pkl', 'rb'))
sms_text Pre-Processing
nltk.download('stopwords')
ps = PorterStemmer()
def transform_text(text):
text = text.lower() #convert the sms_text to lower case
text = nltk.word_tokenize(text) #tokenizing the sms_text and creating tokens
#removing alphanumeric value from the sms_text(lower case)
text = [word for word in text if word.isalnum()]
# removing stopwords and puncuations from the the text
text = [word for word in text if word not in stopwords.words('english') and word not in string.punctuation]
text = [ps.stem(word) for word in text]
return " ".join(text)
transformed_sms=transform_text()
Result Generation
vector_input = tfidf.transform([transformed_sms])
result = model.predict(vector_input)[0]
h5> Showing Result
accuracy=model.accuracy_score(y_test,result) precision=model.precision_score(y_test,result)