** Added on June 1, 2017 ** Corrected comments by @ kota9 and @ pashango2
This is my first time writing an article. As a background, when I downloaded the data necessary for research, there was a zip file in the zip file and it had a structure like a zip file in it again, so I wrote a script that automatically expands it. It was because I thought about it. Also, for beginners, I will introduce the code along the process I thought about. If you can afford it, just look at the last code.
-Extract the zip file recursively -Allow execution even if a directory is specified -Automatically delete the unzipped zip file
Write down the general flow of processing.
(1) Check if the command line argument is a zip file or a directory (2) Exception handling
(3) If it is a zip file (3.1) Extract the specified zip file (3.2) Delete the zip file after extracting
(4) If it is a directory (4.1) Perform (3) for the zip file in the directory (4.2) Repeat (4) for the directories in the directory
(4) is a little difficult to understand, but the point is to extract the zip file, and if it is a directory, search for the zip file in it again. Since the extracted zip file may be a directory, I thought it would be more efficient to extract the zip file before proceeding with the directory search. (See below)
The execution statement assumed this time is
$ python expand_zip.py ZIP_FILE_NAME
Or
$ python expand_zip.py DIR_NAME
Therefore, it is necessary to change the process depending on which one was executed.
expand_zip.py
# -*- coding: utf-8 -*-
import os
import sys
if __name__ == "__main__":
args = sys.argv
if(os.path.isdir(args[1])):
#When the directory is entered
else:
#When the zip file is entered
Exception handling is required because input is accepted from the user. I'm not used to it here either, so please correct it if you make a mistake.
expand_zip.py
# -*- coding: utf-8 -*-
import os
import sys
if __name__ == "__main__":
args = sys.argv
try:
if(os.path.isdir(args[1])):
#When the directory is entered
else:
#When the zip file is entered
except IndexError:
print('IndexError: Usage "python %s ZIPFILE_NAME" or "python %s DIR_NAME"' % (args[0], args[0]))
except IOError:
print('IOError: Couldn\'t open "%s"' % args[1])
expand_zip.py
# -*- coding: utf-8 -*-
import os
import sys
import zipfile
def unzip(filename):
with zipfile.ZipFile(filename, "r") as zf:
zf.extractall(path=os.path.dirname(filename))
if __name__ == "__main__":
args = sys.argv
try:
if(os.path.isdir(args[1])):
#When the directory is entered
else:
unzip(os.path.join(args[1]))
except IndexError:
print('IndexError: Usage "python %s ZIPFILE_NAME" or "python %s DIR_NAME"' % (args[0], args[0]))
except IOError:
print('IOError: Couldn\'t open "%s"' % args[1])
** zf.extractall (path) ** will extract the zip file to the path location. This time, I will extract it to the location where the zip file originally existed.
(3.2) Delete the zip file after extracting
expand_zip.py
# -*- coding: utf-8 -*-
import os
import sys
import zipfile
def unzip(filename):
with zipfile.ZipFile(filename, "r") as zf:
zf.extractall(path=os.path.dirname(filename))
delete_zip(filename)
def delete_zip(zip_file):
os.remove(zip_file)
if __name__ == "__main__":
args = sys.argv
try:
if(os.path.isdir(args[1])):
#When the directory is entered
else:
unzip(os.path.join(args[1]))
except IndexError:
print('IndexError: Usage "python %s ZIPFILE_NAME" or "python %s DIR_NAME"' % (args[0], args[0]))
except IOError:
print('IOError: Couldn\'t open "%s"' % args[1])
expand_zip.py
# -*- coding: utf-8 -*-
import os
import sys
import zipfile
import glob
def unzip(filename):
with zipfile.ZipFile(filename, "r") as zf:
zf.extractall(path=os.path.dirname(filename))
delete_zip(filename)
def delete_zip(zip_file):
os.remove(zip_file)
def walk_in_dir(dir_path):
for filename in glob.glob(os.path.join(dir_path, "*.zip")):
unzip(filename=os.path.join(dir_path,filename))
if __name__ == "__main__":
args = sys.argv
try:
if(os.path.isdir(args[1])):
walk_in_dir(args[1])
else:
unzip(os.path.join(args[1]))
except IndexError:
print('IndexError: Usage "python %s ZIPFILE_NAME" or "python %s DIR_NAME"' % (args[0], args[0]))
except IOError:
print('IOError: Couldn\'t open "%s"' % args[1])
In the ~~ for statement, ** os.listdir (dir_path) ** gets all the files and directories in dir_path, and then ** if os.path.isfile (os.path.join (dir_path,,) f)) ** gets only the files in it, and the last ** if u ".zip" in f ** gets the one with the extension .zip. ~~
** Added on June 1, 2017 ** In response to @ pashango2's comment
for filename in (f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) if u".zip" in f):
The description
for filename in glob.glob(os.path.join(dir_path, "*.zip")):
Changed to.
** Addendum to here **
expand_zip.py
# -*- coding: utf-8 -*-
import os
import sys
import zipfile
import glob
def unzip(filename):
with zipfile.ZipFile(filename, "r") as zf:
zf.extractall(path=os.path.dirname(filename))
delete_zip(filename)
def delete_zip(zip_file):
os.remove(zip_file)
def walk_in_dir(dir_path):
for filename in glob.glob(os.path.join(dir_path, "*.zip")):
unzip(filename=os.path.join(dir_path,filename))
for dirname in (d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))):
walk_in_dir(os.path.join(dir_path, dirname))
if __name__ == "__main__":
args = sys.argv
try:
if(os.path.isdir(args[1])):
walk_in_dir(args[1])
else:
unzip(os.path.join(args[1]))
except IndexError:
print('IndexError: Usage "python %s ZIPFILE_NAME" or "python %s DIR_NAME"' % (args[0], args[0]))
except IOError:
print('IOError: Couldn\'t open "%s"' % args[1])
The reason why the zip file in the directory is expanded and then recursively processed for all the directories in the directory is because the extraction result of the zip file may be a directory, so in this order. It has become.
Also, in this code, if the specified command line argument is a zip file, it is expanded in (3) and ended, so it is necessary to include that as a target for recursion.
So the final code is
expand_zip.py
# -*- coding: utf-8 -*-
import os
import sys
import zipfile
import glob
def unzip(filename):
with zipfile.ZipFile(filename, "r") as zf:
zf.extractall(path=os.path.dirname(filename))
delete_zip(filename)
def delete_zip(zip_file):
os.remove(zip_file)
def walk_in_dir(dir_path):
for filename in glob.glob(os.path.join(dir_path, "*.zip")):
unzip(filename=os.path.join(dir_path,filename))
for dirname in (d for d in os.listdir(dir_path) if os.path.isdir(os.path.join(dir_path, d))):
walk_in_dir(os.path.join(dir_path, dirname))
if __name__ == "__main__":
args = sys.argv
try:
if(os.path.isdir(args[1])):
walk_in_dir(args[1])
else:
unzip(os.path.join(args[1]))
name, _ = os.path.splitext(args[1])
if (os.path.isdir(name)):
walk_in_dir(name)
except IndexError:
print('IndexError: Usage "python %s ZIPFILE_NAME" or "python %s DIR_NAME"' % (args[0], args[0]))
except IOError:
print('IOError: Couldn\'t open "%s"' % args[1])
Will be.
This was my first post, but it will be a good opportunity to organize my knowledge, so I would like to continue posting in the future. If you find any mistakes in this article, please correct them.
Recommended Posts