100 coups de traitement du langage
Je n'ai pas le temps, donc il y a beaucoup d'endroits appropriés.
# coding:utf-8
#00 Ordre inverse des chaînes
string = "stressed"
print string[::-1]
print "".join(reversed(string))
#01 «Patatokukashi»
string = u"Patatoku Kashii"
print string[0::2]
print string[1::2]
#02 "Pat car" + "Tax" = "Patatokukasie"
string1 = u"Voiture Pat"
string2 = u"Taxi"
ret = ""
for s1, s2 in zip(string1, string2):
ret+=(s1+s2)
print ret
#03 Taux circonférentiel
string = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
string.replace(", ", "").replace(".", "")
words = string.split(" ")
print "".join(map(lambda x: str(len(x)), words))
#04 Symbole d'élément
string = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
words = string.split(" ")
n = [1, 5, 6, 7, 8, 9, 15, 16, 19]
d = {}
for idx, word in enumerate(words):
if idx+1 in n:
d[idx+1] = word[0]
else:
d[idx+1] = word[0:2]
print d
# 05 n-gram
def ngram(seq, n=2):
ret = []
N = len(seq)-n+1
for i in xrange(N):
ret.append(seq[i:i+n])
return ret
seq1 = "I am an NLPer"
seq2 = seq1.split(" ")
print ngram(seq1)
print ngram(seq2)
#06 ensemble
seq1 = "paraparaparadise"
seq2 = "paragraph"
X = set(ngram(seq1))
Y = set(ngram(seq2))
print "X: %s"%X
print "Y: %s"%Y
print "X+Y: %s"%(X|Y)
print "X-Y: %s"%(X-Y)
print "X&Y: %s"%(X&Y)
#07 Génération de phrases par modèle
def template(x=12, y="Température", z=22.4):
return "%temps de s%s est%s"%(x, y, z)
print template()
#08 Cryptographie
string = u"Implémentez le chiffrement de fonction qui convertit chaque caractère de la chaîne de caractères donnée selon les spécifications suivantes."
def cipher(string):
l = "abcdefghijklmnopqrstuvwxyz"
ret = ""
for s in string:
if s in l:
s = chr(219-ord(s))
ret += s
return ret
print cipher(string)# encode
print cipher(cipher(string))# decode
# 09 Typoglycemia
import random
def typoglycemia(string):
ret = []
for word in string.split(" "):
if len(word) <= 4:
ret.append(word)
else:
typo = word[0] + "".join(random.sample(word[1:-1], len(word[1:-1]))) + word[-1]
ret.append(typo)
return " ".join(ret)
string = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind."
print typoglycemia(string)
Recommended Posts