From d6718f2f7d15be8f6801caa7cd13fd78e2d400b3 Mon Sep 17 00:00:00 2001 From: Wh1isper <9573586@qq.com> Date: Mon, 12 Dec 2022 15:45:18 +0800 Subject: [PATCH] add CompatibleZipFile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: 晚橙 <383301857@qq.com> --- jupyter_archive/handlers.py | 66 +++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/jupyter_archive/handlers.py b/jupyter_archive/handlers.py index 351efff..cd5b424 100644 --- a/jupyter_archive/handlers.py +++ b/jupyter_archive/handlers.py @@ -7,12 +7,75 @@ import zipfile import threading from http.client import responses +import shutil +from zipfile import ZipFile +from zipfile import ZipInfo from jupyter_server.base.handlers import JupyterHandler from jupyter_server.utils import url2path, url_path_join, ensure_async from tornado import ioloop, web from urllib.parse import quote + +class CompatibleZipFile(ZipFile): + """Compatible ZipFile for mac(utf-8) and chinese windows(gbk)""" + + def _extract_member(self, member, targetpath, pwd): + def try_macos_decode(fn): + try: + return fn.encode("cp437").decode("utf-8") + except UnicodeError: + return None + + def try_windows_chinese_decode(fn): + try: + return fn.encode("cp437").decode("gbk") + except UnicodeError: + return None + + if not isinstance(member, ZipInfo): + member = self.getinfo(member) + + # build the destination pathname, replacing + # forward slashes to platform specific separators. + arcname = member.filename.replace('/', os.path.sep) + # Compatible patch here + new_arcname = try_macos_decode(arcname) or try_windows_chinese_decode(arcname) + if new_arcname: + arcname = new_arcname + + if os.path.altsep: + arcname = arcname.replace(os.path.altsep, os.path.sep) + # interpret absolute pathname as relative, remove drive letter or + # UNC path, redundant separators, "." and ".." components. + arcname = os.path.splitdrive(arcname)[1] + invalid_path_parts = ('', os.path.curdir, os.path.pardir) + arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) + if x not in invalid_path_parts) + if os.path.sep == '\\': + # filter illegal characters on Windows + arcname = self._sanitize_windows_name(arcname, os.path.sep) + + targetpath = os.path.join(targetpath, arcname) + targetpath = os.path.normpath(targetpath) + + # Create all upper directories if necessary. + upperdirs = os.path.dirname(targetpath) + if upperdirs and not os.path.exists(upperdirs): + os.makedirs(upperdirs) + + if member.is_dir(): + if not os.path.isdir(targetpath): + os.mkdir(targetpath) + return targetpath + + with self.open(member, pwd=pwd) as source, \ + open(targetpath, "wb") as target: + shutil.copyfileobj(source, target) + + return targetpath + + SUPPORTED_FORMAT = [ "zip", "tgz", @@ -77,11 +140,10 @@ def make_writer(handler, archive_format="zip"): def make_reader(archive_path): - archive_format = "".join(archive_path.suffixes) if archive_format.endswith(".zip"): - archive_file = zipfile.ZipFile(archive_path, mode="r") + archive_file = CompatibleZipFile(archive_path, mode="r") elif any([archive_format.endswith(ext) for ext in [".tgz", ".tar.gz"]]): archive_file = tarfile.open(archive_path, mode="r|gz") elif any([archive_format.endswith(ext) for ext in [".tbz", ".tbz2", ".tar.bz", ".tar.bz2"]]):