A memorandum when trying to detect English spelling mistakes in python
Library I tried this time
autocorrect
pip3 install autocorrect
test.py
from autocorrect import Speller
spell = Speller(lang='en')
print(spell('caaaar'))
print(spell('mussage'))
print(spell('survice'))
print(spell('hte'))
$ python3 test.py
aaaaaa
message
service
the
pyspellchecker
pip3 install pyspellchecker
test.py
from spellchecker import SpellChecker
spell = SpellChecker()
# find those words that may be misspelled
misspelled = spell.unknown(['something', 'is', 'hapenning', 'here'])
for word in misspelled:
# Get the one `most likely` answer
print(spell.correction(word))
# Get a list of `likely` options
print(spell.candidates(word))
$ python3 test.py
happenning
{'hapening', 'happenning'}
text blob library
pip install -U textblob
python -m textblob.download_corpora
test.py
from textblob import TextBlob
str_org = "I havv goood speling!"
b = TextBlob(str_org)
str_correct = str(b.correct())
if str_org != str_correct:
print("org : " + str_org)
print("mod : " + str_correct)
$ python3 test.py
org : I havv goood speling!
mod : I have good spelling!
pyenchant
brew install --build-from-source enchant
test.py
import enchant
d = enchant.Dict("en_US")
def check(text):
ret = d.check(text)
print("check string : " + text)
print(ret)
check("Helo")
check("Hello")
check("Helllo")
$ python3 test.py
check string : Helo
False
check string : Hello
True
check string : Helllo
False
$ brew install enchant
Error:
homebrew-core is a shallow clone.
homebrew-cask is a shallow clone.
To `brew update`, first run:
git -C /usr/local/Homebrew/Library/Taps/homebrew/homebrew-core fetch --unshallow
git -C /usr/local/Homebrew/Library/Taps/homebrew/homebrew-cask fetch --unshallow
This restriction has been made on GitHub's request because updating shallow
clones is an extremely expensive operation due to the tree layout and traffic of
Homebrew/homebrew-core and Homebrew/homebrew-cask. We don't do this for you
automatically to avoid repeatedly performing an expensive unshallow operation in
CI systems (which should instead be fixed to not use shallow clones). Sorry for
the inconvenience!
Warning: You are using macOS 10.13.
We (and Apple) do not provide support for this old version.
You will encounter build failures with some formulae.
Please create pull requests instead of asking for help on Homebrew's GitHub,
Twitter or any other official channels. You are responsible for resolving
any issues you experience while you are running this
old version.
Error: enchant: no bottle available!
You can try to install from source with e.g.
brew install --build-from-source enchant
Please note building from source is unsupported. You will encounter build
failures with some formulae. If you experience any issues please create pull
requests instead of asking for help on Homebrew's GitHub, Twitter or any other
official channels.
brew install --build-from-source enchant
README.md
) for spell checkingtest.py
import enchant
d = enchant.Dict("en_US")
def check(text):
ret = d.check(text)
print("check string : " + text)
print(ret)
return ret
with open('file.txt', 'w') as txt:
txt.write("textblob result\n")
# get flist
with open('flist.log', 'r') as flist:
# get fname
for fname in flist:
fname = fname.replace('\n', '')
with open(fname, 'r') as md:
# get line
for line in md:
# split to word
line = line.lower()
words = line.split()
for word in words:
# replace
word = word.replace('!', '')
word = word.replace('.', '')
word = word.replace('#', '')
word = word.replace(',', '')
word = word.replace('\n', '')
word = word.replace(':', '')
word = word.replace('"', '')
word = word.replace(']', '')
word = word.replace('[', '')
word = word.replace('*', '')
word = word.replace('`', '')
word = word.replace('\'', '')
word = word.replace('(', '')
word = word.replace(')', '')
word = word.replace('|', '')
NG_list = ["openembedded"]
#word = word.replace('-', '')
#word = word.replace('/', '')
#check string
SKIP = False
for item in NG_list:
if word == item:
SKIP = True
if SKIP == True:
continue
# skip empty
if not word:
continue
# check word
ret = check(word)
# save
if ret == False:
with open('file.txt', 'a') as txt:
txt.write(str(ret) + " : " + word)
txt.write("\n")
os._exit(0)
$ cd .
$ find . -name README.md > flist.log
$ python3 test.py
(Under trial)
TEST.md
I havv goood speling!
I hava god speling!
I hava godi speling!
test.py
from textblob import TextBlob
with open('file.txt', 'w') as txt:
txt.write("textblob result\n")
with open('TEST.md', 'r') as md:
text = ""
lines = [line.rstrip('.') for line in md]
for line in lines:
str_org = line
#str_org = str_org.replace( ' ' , '' )
str_org = str_org.replace( '.' , '' )
str_org = str_org.replace( '\n' , '' )
b = TextBlob(str_org)
str_correct = str(b.correct())
if str_org != str_correct:
print("org : " + str_org)
print("mod : " + str_correct)
with open('file.txt', 'a') as txt:
txt.write("---\n")
txt.write(str_org)
txt.write("\n")
txt.write(str_correct)
txt.write("\n")
$ python3 test.py
jamspell
Trying https://github.com/bakwc/JamSpell
https://pypi.python.org/pypi/autocorrect http://norvig.com/spell-correct.html https://pypi.org/project/pyspellchecker/ https://textblob.readthedocs.io/en/dev/ Use PyEnchant for quick spell checking https://pyenchant.github.io/pyenchant/install.html Replace, translate, re.sub, re.subn in Python extract English words from string in python
Recommended Posts