pip install nltk
import nltk
import random
from nltk import classify
from nltk import NaiveBayesClassifier as NBC
name.py
def feature_extraction(word):
return {"last":word[-3:]}
#A function that retrieves only the last 3 characters of a name
maleNames=[(name, "male")for name in malelist]
femaleNames = [(name, 'female')for name in femalelist]
allNames = maleNames + femaleNames #Join the list of men and the list of women
random.shuffle(allNames) #Shuffle the contents of the list
featureData=[(feature_extraction(n),gender) for (n,gender) in allNames]
#[(Three letters behind,sex),(Three letters behind,sex),(Three letters behind,sex)・ ・ ・]In the shape of
genderIdentifier=NBC.train(featureData)
#If you want to check the accuracy, it feels like you did,
#num=7*len(featureData)//Of 10 data
#train_data=featureData[num:]
#test_data=featureData[:num]
#genderIdentifier=NBC.train(train_data)
import pickle
f = open('my_classifier.pickle', 'wb')
pickle.dump(genderIdentifier, f)
f.close()
import pickle
f = open('my_classifier.pickle', 'rb')
classifier = pickle.load(f)
f.close()
Save Naive Bayes Trained Classifier in NLTK
Machine Learning Model - Gender Identifier with NLTK in less than 15 lines of code
Machine Learning Model - Gender Identifier with NLTK in less than 15 lines of code
Recommended Posts