Everybody, there are times when you want to find out if a string pattern exists from a string. We have implemented three algorithms that are perfect for such situations.
I will add a commentary when I have time to spare.
import random
def random_str(x, char_list):
return "".join([random.choice(char_list) for _ in range(x)])
if __name__ == "__main__":
print(random_str(10, ["a","b","c","d"]))
Now, I will introduce three string search algorithms from here.
from make_random_str import random_str
string = random_str(10000,["A","B","C","D","E"])
def brute_forth(string, pattern):
i = 0
j = 0
lp = len(pattern)
ls = len(string)
ans = []
while i <= ls-1:
if string[i] == pattern[j]:
if j == lp-1:
ans.append(i-lp+1)
i +=1
j = 0
else:
i += 1
j += 1
else:
i = i-j+1
j = 0
if not ans:
ans = -1
return ans
print(brute_forth(string,"ABCDE"))
#Below, using regular expressions, brute_Check if the forth algo is producing the correct output
import re
L =[]
check = re.finditer(r"ABCDE", string)
for i in check:
L.append(i.span()[0])
print(L)
from make_random_str import random_str
string = random_str(1000000, ["A","B","C","D","E"])
def kmp(string, pattern):
#First, create a table from pattern of how many to shift
table = [0] * len(pattern)
j = 0
for i in range(1, len(pattern)):
if pattern[i] == pattern[j]:
table[i] = j
j += 1
else:
j = 0
ans = []
s = 0
t = 0
while s < len(string):
if string[s] == pattern[t]:
if t == len(pattern)-1:
ans.append(s-t)
s += 1
t = 0
else:
s += 1
t += 1
elif t == 0:
s += 1
else:
t = table[j]
return ans
print(kmp(string, "ABCEABC"))
#Below, using regular expressions, brute_Check if the forth algo is producing the correct output
import re
L =[]
check = re.finditer(r"ABCEABC", string)
for i in check:
L.append(i.span()[0])
print(L)
from make_random_str import random_str
string = random_str(10000,["A","B","C","D","E"])
def bm(string,pattern):
lp = len(pattern)
ls = len(string)
#First create a table
#Since ascii is used, a table with a length of 256 is prepared.
table = [lp for _ in range(256)]
for i in range(lp):
table[ord(pattern[i])] = lp-i-1
ans = []
i = lp - 1
while i < ls:
j = lp - 1
while string[i] == pattern[j]:
if j == 0:
ans.append(i)
i -=1
j -=1
#Don't forget to take max
i = i + max(table[ord(string[i])], lp-j)
if not ans:
return -1
return ans
print(bm(string, "ABCDE"))
#Below, using regular expressions, brute_Check if the forth algo is producing the correct output
import re
L =[]
check = re.finditer(r"ABCDE", string)
for i in check:
L.append(i.span()[0])
print(L)
Recommended Posts