From d6718f2f7d15be8f6801caa7cd13fd78e2d400b3 Mon Sep 17 00:00:00 2001
From: Wh1isper <9573586@qq.com>
Date: Mon, 12 Dec 2022 15:45:18 +0800
Subject: [PATCH] add CompatibleZipFile
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: 晚橙 <383301857@qq.com>
---
 jupyter_archive/handlers.py | 66 +++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/jupyter_archive/handlers.py b/jupyter_archive/handlers.py
index 351efff..cd5b424 100644
--- a/jupyter_archive/handlers.py
+++ b/jupyter_archive/handlers.py
@@ -7,12 +7,75 @@
 import zipfile
 import threading
 from http.client import responses
+import shutil
+from zipfile import ZipFile
+from zipfile import ZipInfo
 
 from jupyter_server.base.handlers import JupyterHandler
 from jupyter_server.utils import url2path, url_path_join, ensure_async
 from tornado import ioloop, web
 from urllib.parse import quote
 
+
+class CompatibleZipFile(ZipFile):
+    """Compatible ZipFile for mac(utf-8) and chinese windows(gbk)"""
+
+    def _extract_member(self, member, targetpath, pwd):
+        def try_macos_decode(fn):
+            try:
+                return fn.encode("cp437").decode("utf-8")
+            except UnicodeError:
+                return None
+
+        def try_windows_chinese_decode(fn):
+            try:
+                return fn.encode("cp437").decode("gbk")
+            except UnicodeError:
+                return None
+
+        if not isinstance(member, ZipInfo):
+            member = self.getinfo(member)
+
+        # build the destination pathname, replacing
+        # forward slashes to platform specific separators.
+        arcname = member.filename.replace('/', os.path.sep)
+        # Compatible patch here
+        new_arcname = try_macos_decode(arcname) or try_windows_chinese_decode(arcname)
+        if new_arcname:
+            arcname = new_arcname
+
+        if os.path.altsep:
+            arcname = arcname.replace(os.path.altsep, os.path.sep)
+        # interpret absolute pathname as relative, remove drive letter or
+        # UNC path, redundant separators, "." and ".." components.
+        arcname = os.path.splitdrive(arcname)[1]
+        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
+        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
+                                   if x not in invalid_path_parts)
+        if os.path.sep == '\\':
+            # filter illegal characters on Windows
+            arcname = self._sanitize_windows_name(arcname, os.path.sep)
+
+        targetpath = os.path.join(targetpath, arcname)
+        targetpath = os.path.normpath(targetpath)
+
+        # Create all upper directories if necessary.
+        upperdirs = os.path.dirname(targetpath)
+        if upperdirs and not os.path.exists(upperdirs):
+            os.makedirs(upperdirs)
+
+        if member.is_dir():
+            if not os.path.isdir(targetpath):
+                os.mkdir(targetpath)
+            return targetpath
+
+        with self.open(member, pwd=pwd) as source, \
+                open(targetpath, "wb") as target:
+            shutil.copyfileobj(source, target)
+
+        return targetpath
+
+
 SUPPORTED_FORMAT = [
     "zip",
     "tgz",
@@ -77,11 +140,10 @@ def make_writer(handler, archive_format="zip"):
 
 
 def make_reader(archive_path):
-
     archive_format = "".join(archive_path.suffixes)
 
     if archive_format.endswith(".zip"):
-        archive_file = zipfile.ZipFile(archive_path, mode="r")
+        archive_file = CompatibleZipFile(archive_path, mode="r")
     elif any([archive_format.endswith(ext) for ext in [".tgz", ".tar.gz"]]):
         archive_file = tarfile.open(archive_path, mode="r|gz")
     elif any([archive_format.endswith(ext) for ext in [".tbz", ".tbz2", ".tar.bz", ".tar.bz2"]]):