Original story: Program to remove duplicate statements in Python
How about this? I thought it would be easier to write the code if I wanted to comment.
check.py
# -*- coding: utf-8 -*-
import sys
from hashlib import sha256
from os.path import isfile
def output_unique_text(path, enc='utf-8'):
d = {}
with open(path, 'rb') as reader, open('output.txt', 'wb') as writer:
for i, line in enumerate(reader, 1):
value = sha256(line).hexdigest()
if d.get(value):
msg = '{}: "{}" is duplicate.\n'
decoded = line.decode(enc).strip()
sys.stdout.buffer.write(bytes(msg.format(i, decoded), enc))
else:
d[value] = line
writer.write(line)
def main():
if len(sys.argv) <= 1 or not isfile(sys.argv[1]):
print('specify path/to/filename')
sys.exit(1)
output_unique_text(sys.argv[1])
print('confirm output.txt')
if __name__ == '__main__':
main()
Recommended Posts