pip install nltk
import nltk
import random
from nltk import classify
from nltk import NaiveBayesClassifier as NBC
name.py
def feature_extraction(word):
return {"last":word[-3:]}
#Fonction pour extraire uniquement les 3 derniers caractères du nom
maleNames=[(name, "male")for name in malelist]
femaleNames = [(name, 'female')for name in femalelist]
allNames = maleNames + femaleNames #Rejoignez la liste des hommes et la liste des femmes
random.shuffle(allNames) #Mélangez le contenu de la liste
featureData=[(feature_extraction(n),gender) for (n,gender) in allNames]
#[(Trois lettres derrière,sexe),(Trois lettres derrière,sexe),(Trois lettres derrière,sexe)・ ・ ・]En forme de
genderIdentifier=NBC.train(featureData)
#Si vous voulez vérifier l'exactitude, c'est comme si vous l'aviez fait,
#num=7*len(featureData)//Sur 10 données
#train_data=featureData[num:]
#test_data=featureData[:num]
#genderIdentifier=NBC.train(train_data)
import pickle
f = open('my_classifier.pickle', 'wb')
pickle.dump(genderIdentifier, f)
f.close()
import pickle
f = open('my_classifier.pickle', 'rb')
classifier = pickle.load(f)
f.close()
Save Naive Bayes Trained Classifier in NLTK
Machine Learning Model - Gender Identifier with NLTK in less than 15 lines of code
Machine Learning Model - Gender Identifier with NLTK in less than 15 lines of code
Recommended Posts