I want to do python, but I'm not sure where to start We will proceed with 100 language processing knock 2015 The environment is Windows 10, python 3.6.0
Get a string in which the characters of the string "stressed" are arranged in reverse (from the end to the beginning).
00.py
string = "stressed"
print(string[::-1])
#Implemented in slices
# string[Starting position:End position:Number of steps]
#Start and end positions are counted from the end by specifying a negative number
#You can also do the following
print(string[-1::-1])
Implemented with slice
Take out the 1st, 3rd, 5th, and 7th characters of the character string "Patatokukashi" and get the concatenated character string.
01.py
string = "Patatoku Kashii"
print(string[::2])
#As with 00, specify the slice step by 2
By using steps like 00
Get the character string "Patatokukashi" by alternately connecting the characters "Police car" + "Taxi" from the beginning.
02.py
string1 = "Police car"
string2 = "taxi"
string3 = ""
i = 0
while i < len(string1):
string3 += string1[i] + string2[i]
i+=1
print(string3)
A little subtle ... Only the same character length can be connected
Break down the sentence "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
03.py
import re #Use of regular expressions
from collections import defaultdict #For counting characters
string = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
# ,When.List by word after deleting
words_list = re.sub('[,.]','',string).split()
#Initialization of counter
counter = defaultdict(int)
#Count from the first word / letter
for word in words_list:
for c in word:
counter[c] += 1
#Because it is a dictionary type(letter,Count number)Convert to tuple list type
count_list = dict(counter).items()
print(count_list)
There is too much type conversion processing overall ... I want to reduce it a little more.
Break down the sentence "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can." Into words 1, 5, 6, 7, 8, 9, 15, 16, 19 The first word is the first character, the other words are the first two characters, and the associative array (dictionary type or map type) from the extracted character string to the word position (what number of words from the beginning) is created. Create it.
04.py
import re #Use of regular expressions
elements = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
mono_words = [1, 5, 6, 7, 8, 9, 15, 16, 19]
shortened_elements = {}
# .List by word after deleting
elements_list = elements.replace('.','').split()
#While exploring the element word list one by one
#Check if it is a defined one-character expression
#Enter the abbreviated element name and the number from the beginning in the element dictionary
#You can get an iterator by using enumerate
for i,e in enumerate(elements_list):
count = i + 1
if(count in mono_words):
shortened_elements[e[:1]] = count
else:
shortened_elements[e[:2]] = count
print(shortened_elements)
The word bi-gram of "I am an NLPer" {‘I am’,’am an’,’an NLPer’} With the character bi-gram {‘I a’,’a m ‘,’m a’,’a n’,’n N’,’N L’,’L P’,’P e’,’e r’}
Should be the result
05.py
import re #Use of regular expressions
#Character strings and lists are prepared as sequences
sentence_string = "I am an NLPer"
sentence_list = sentence_string.split()
#N with a number n and a sequence as arguments-gram function
def n_gram(n,sequence):
#List for return values
ngram = []
#To make common processing for strings and lists
#Convert to a character-by-character list if a string is given as an argument
# ,When.Whenスペースを削除
if isinstance(sequence, str):
sequence = list(re.sub('[,. ]','',sequence))
# n-gram creation process
#Position of i in for statement+Slice the argument n
#The end of for is from the list length minus n to the point where 1 is added.
for i in range(len(sequence)-n+1):
ngram.append(sequence[i:i+n])
return ngram
#Word bi-gram
print(n_gram(2,sentence_list))
#Character bi-gram
print(n_gram(2,sentence_string))
Find the set of characters bi-grams contained in "paraparaparadise" and "paragraph" as X and Y, respectively, and find the union, intersection, and complement of X and Y, respectively. In addition, find out if the bi-gram'se'is included in X and Y.
06.py
import re #Use of regular expressions
#Character strings and lists are prepared as sequences
X = "paraparaparadise"
Y = "paragraph"
# n-Reuse of gram function 05
def n_gram(n,sequence):
ngram = []
if isinstance(sequence, str):
sequence = list(re.sub('[,. ]','',sequence))
for i in range(len(sequence)-n+1):
#Since the part changed to 05, the list in the list could not be converted to the set type described later
#Conversion processing is put in the tuple type
ngram.append(tuple(sequence[i:i+n]))
return ngram
# X,Y bi-Gram creation
#Defined as set type for set calculation
X = set(n_gram(2,X))
Y = set(n_gram(2,Y))
#Union
print(X | Y)
#Intersection
print(X & Y)
#Difference set
print(X - Y)
print(Y - X)
# 'se'Check if
if ('s','e') in X & Y:
print("'se'Is included in X and Y")
else:
print("'se'Is not included in X or Y")
Implement a function that takes arguments x, y, z and returns the string "y at x is z". Furthermore, set x = 12, y = "temperature", z = 22.4, and check the execution result.
07.py
def tostr(x,y,z):
return ("%s time%s is%s" % (x,y,z))
print( tostr(12,"temperature",22.4))
Implement the function cipher that converts each character of the given character string according to the following specifications.
Replace with (219 --character code) characters in lowercase letters Output other characters as they are Use this function to encrypt / decrypt English messages.
08.py
import re #Use of regular expressions
def cipher(str):
#Once listed, process character by character
str = list(str)
re_str = []
for s in str:
if re.search('[a-z]',s):
#97 English characters in character code~122 In the following process, a->z,b->y,c->x...z->Converted like a
re_str.append(chr(219-ord(s)))
else:
re_str.append(s)
return "".join(re_str)
test_str = "I am a esaka!!"
print(cipher(test_str))
#result:I zn z vhzpz!!
print(cipher(cipher(test_str)))
#result:I am a esaka!!
09.py
import random #Use of random number processing
def rand_str(str):
#List separated by spaces
str = str.split(' ')
re_str = []
for i,s in enumerate(str):
if len(s) > 4 and i != 0 and i != len(str)-1:
re_str.append("".join(random.sample(s,len(s))))
else:
re_str.append(s)
#The return value is to replace the word with a space
return " ".join(re_str)
test_str = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."
print(rand_str(test_str))
#result: I tdcuon'l evibele that I ludoc ltyucala andnetrsdu what I was drienag : the lnpaeneohm erpow of the uahmn mind .
I misunderstood that I wouldn't sort only the first and last words of a given string. It's the beginning and end of * each word *. Is it this that was popular a while ago?
That's why I fixed it below
09.py
import random #Use of random number processing
def rand_str(str):
#List separated by spaces
str = str.split(' ')
re_str = []
for i,s in enumerate(str):
if len(s) > 4:
re_str.append(s[0]+"".join(random.sample(s[1:-1],len(s)-2))+s[-1])
else:
re_str.append(s)
#The return value is to replace the word with a space
return " ".join(re_str)
test_str = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."
test_str2 = "If you have the research and the research, the research will be messed up, but based on the research that you can read it properly, how about replacing the research with the research on purpose? You'll read it properly, right?"
print(rand_str(test_str))
#result: I cnlu'dot belevie that I colud allctauy udeatsnnrd what I was radineg : the pehnomanel pwoer of the huamn mind .
print(rand_str(test_str2))
#result:As long as there is a sword and a sword, the sword is messed up, but the sword is that you can read it properly. Isn't it right?
For the time being, with this
Recommended Posts