From 7c63dddd8c88de5347e7f7cbcd12078a71e8e7ce Mon Sep 17 00:00:00 2001
From: DrakkLord <DrakkLord@users.noreply.github.com>
Date: Thu, 25 Jul 2024 19:18:04 +0200
Subject: [PATCH] added support for windows UTF-8 file name handling

---
 CMakeLists.txt                                |  4 +-
 source/cppfs/CMakeLists.txt                   |  9 +++
 .../cppfs/include/cppfs/windows/UTF8Handler.h | 29 +++++++
 .../cppfs/source/windows/LocalFileHandle.cpp  | 24 +++---
 .../source/windows/LocalFileIterator.cpp      |  7 +-
 .../cppfs/source/windows/LocalFileWatcher.cpp | 78 ++++++++-----------
 source/cppfs/source/windows/UTF8Handler.cpp   | 50 ++++++++++++
 7 files changed, 139 insertions(+), 62 deletions(-)
 create mode 100644 source/cppfs/include/cppfs/windows/UTF8Handler.h
 create mode 100644 source/cppfs/source/windows/UTF8Handler.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 34d80c0..ad3ab72 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,7 +70,9 @@ option(OPTION_BUILD_DOCS               "Build documentation."
 option(OPTION_BUILD_EXAMPLES           "Build examples."                                        OFF)
 option(OPTION_BUILD_SSH_BACKEND        "Build SSH backend"                                      OFF)
 option(OPTION_FORCE_SYSTEM_DIR_INSTALL "Force system dir install"                               OFF)
-
+if(WIN32)
+	option(OPTION_WINDOWS_UTF8         "Build Windows UTF8 support"                             ON)
+endif()
 
 #
 # Declare project
diff --git a/source/cppfs/CMakeLists.txt b/source/cppfs/CMakeLists.txt
index 520512c..379fc41 100644
--- a/source/cppfs/CMakeLists.txt
+++ b/source/cppfs/CMakeLists.txt
@@ -119,10 +119,12 @@ endif()
 if("${CMAKE_SYSTEM_NAME}" MATCHES "Windows")
     set(headers ${headers}
         ${include_path}/windows/LocalFileWatcher.h
+	    ${include_path}/windows/UTF8Handler.h
     )
 
     set(sources ${sources}
         ${source_path}/windows/LocalFileWatcher.cpp
+        ${source_path}/windows/UTF8Handler.cpp
     )
 endif()
 
@@ -188,6 +190,13 @@ set_target_properties(${target}
     SOVERSION ${META_VERSION_MAJOR}
 )
 
+if(WIN32 AND OPTION_WINDOWS_UTF8)
+	target_compile_options(${target}
+		PRIVATE
+		-DUNICODE
+		-D_UNICODE
+	)
+endif()
 
 #
 # Include directories
diff --git a/source/cppfs/include/cppfs/windows/UTF8Handler.h b/source/cppfs/include/cppfs/windows/UTF8Handler.h
new file mode 100644
index 0000000..0c6bbcf
--- /dev/null
+++ b/source/cppfs/include/cppfs/windows/UTF8Handler.h
@@ -0,0 +1,29 @@
+
+#pragma once
+
+
+#include <memory>
+
+#include <cppfs/AbstractFileSystem.h>
+
+#if defined(_UNICODE)
+    #define UTF8Convert_UTF8toW(s) UTF8Handler::utf8_to_wstring(s)
+    #define UTF8Convert_WtoUTF8(s) UTF8Handler::wstring_to_utf8(s)
+#else
+    #define UTF8Convert_UTF8toW(s) s
+    #define UTF8Convert_WtoUTF8(s) s
+#endif
+
+namespace cppfs
+{
+    /**
+    *  @brief
+    *    Windows specific UTF-8 string conversions
+    */
+    namespace UTF8Handler
+    {
+        std::wstring utf8_to_wstring(const std::string& utf8_str);
+        std::string wstring_to_utf8(const std::wstring& wstr);
+    } // namespace UTF8Handler
+
+} // namespace cppfs
diff --git a/source/cppfs/source/windows/LocalFileHandle.cpp b/source/cppfs/source/windows/LocalFileHandle.cpp
index c8fda29..0d0fbc3 100644
--- a/source/cppfs/source/windows/LocalFileHandle.cpp
+++ b/source/cppfs/source/windows/LocalFileHandle.cpp
@@ -9,7 +9,7 @@
 #include <cppfs/FilePath.h>
 #include <cppfs/windows/LocalFileSystem.h>
 #include <cppfs/windows/LocalFileIterator.h>
-
+#include <cppfs/windows/UTF8Handler.h>
 
 namespace cppfs
 {
@@ -105,7 +105,7 @@ std::vector<std::string> LocalFileHandle::listFiles() const
     // Open directory
     WIN32_FIND_DATA findData;
     std::string query = FilePath(m_path).fullPath() + "/*";
-    HANDLE findHandle = FindFirstFileA(query.c_str(), &findData);
+    HANDLE findHandle = FindFirstFile(UTF8Convert_UTF8toW(query).c_str(), &findData);
 
     if (findHandle == INVALID_HANDLE_VALUE)
     {
@@ -116,7 +116,7 @@ std::vector<std::string> LocalFileHandle::listFiles() const
     do
     {
         // Get name
-        std::string name = findData.cFileName;
+        std::string name = UTF8Convert_WtoUTF8(findData.cFileName);
 
         // Ignore . and ..
         if (name != ".." && name != ".")
@@ -213,7 +213,7 @@ bool LocalFileHandle::createDirectory()
     if (exists()) return false;
 
     // Create directory
-    if (!CreateDirectoryA(m_path.c_str(), nullptr))
+    if (!CreateDirectory(UTF8Convert_UTF8toW(m_path).c_str(), nullptr))
     {
         return false;
     }
@@ -229,7 +229,7 @@ bool LocalFileHandle::removeDirectory()
     if (!isDirectory()) return false;
 
     // Remove directory
-    if (!RemoveDirectoryA(m_path.c_str()))
+    if (!RemoveDirectory(UTF8Convert_UTF8toW(m_path).c_str()))
     {
         return false;
     }
@@ -255,7 +255,7 @@ bool LocalFileHandle::copy(AbstractFileHandleBackend & dest)
     }
 
     // Copy file
-    if (!CopyFileA(src.c_str(), dst.c_str(), FALSE))
+    if (!CopyFile(UTF8Convert_UTF8toW(src).c_str(), UTF8Convert_UTF8toW(dst).c_str(), FALSE))
     {
         // Error!
         return false;
@@ -282,7 +282,7 @@ bool LocalFileHandle::move(AbstractFileHandleBackend & dest)
     }
 
     // Move file
-    if (!MoveFileA(src.c_str(), dst.c_str()))
+    if (!MoveFile(UTF8Convert_UTF8toW(src).c_str(), UTF8Convert_UTF8toW(dst).c_str()))
     {
         // Error!
         return false;
@@ -312,7 +312,7 @@ bool LocalFileHandle::createLink(AbstractFileHandleBackend & dest)
     }
 
     // Copy file
-    if (!CreateHardLinkA(dst.c_str(), src.c_str(), 0))
+    if (!CreateHardLink(UTF8Convert_UTF8toW(dst).c_str(), UTF8Convert_UTF8toW(src).c_str(), 0))
     {
         // Error!
         return false;
@@ -338,7 +338,7 @@ bool LocalFileHandle::createSymbolicLink(AbstractFileHandleBackend & dest)
     }
 
     // Copy file
-    if (!CreateSymbolicLinkA(dst.c_str(), src.c_str(), 0))
+    if (!CreateSymbolicLink(UTF8Convert_UTF8toW(dst).c_str(), UTF8Convert_UTF8toW(src).c_str(), 0))
     {
         // Error!
         return false;
@@ -357,7 +357,7 @@ bool LocalFileHandle::rename(const std::string & filename)
     std::string path = FilePath(FilePath(m_path).directoryPath()).resolve(filename).fullPath();
 
     // Rename
-    if (!MoveFileA(m_path.c_str(), path.c_str()))
+    if (!MoveFile(UTF8Convert_UTF8toW(m_path).c_str(), UTF8Convert_UTF8toW(path).c_str()))
     {
         // Error!
         return false;
@@ -377,7 +377,7 @@ bool LocalFileHandle::remove()
     if (!isFile()) return false;
 
     // Delete file
-    if (!DeleteFileA(m_path.c_str()))
+    if (!DeleteFile(UTF8Convert_UTF8toW(m_path).c_str()))
     {
         return false;
     }
@@ -406,7 +406,7 @@ void LocalFileHandle::readFileInfo() const
     m_fileInfo = (void *)new WIN32_FILE_ATTRIBUTE_DATA;
 
     // Get file info
-    if (!GetFileAttributesExA(m_path.c_str(), GetFileExInfoStandard, (WIN32_FILE_ATTRIBUTE_DATA*)m_fileInfo))
+    if (!GetFileAttributesEx(UTF8Convert_UTF8toW(m_path).c_str(), GetFileExInfoStandard, (WIN32_FILE_ATTRIBUTE_DATA*)m_fileInfo))
     {
         // Error!
         delete (WIN32_FILE_ATTRIBUTE_DATA *)m_fileInfo;
diff --git a/source/cppfs/source/windows/LocalFileIterator.cpp b/source/cppfs/source/windows/LocalFileIterator.cpp
index 5e01f0e..c01a0a2 100644
--- a/source/cppfs/source/windows/LocalFileIterator.cpp
+++ b/source/cppfs/source/windows/LocalFileIterator.cpp
@@ -5,6 +5,7 @@
 
 #include <cppfs/FilePath.h>
 #include <cppfs/windows/LocalFileSystem.h>
+#include <cppfs/windows/UTF8Handler.h>
 
 
 namespace cppfs
@@ -78,7 +79,7 @@ std::string LocalFileIterator::name() const
     }
 
     // Return filename of current item
-	return std::string(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName);
+	return UTF8Convert_WtoUTF8(std::wstring(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName));
 }
 
 void LocalFileIterator::next()
@@ -97,7 +98,7 @@ void LocalFileIterator::readNextEntry()
 		{
 			// Open directory
 			std::string query = FilePath(m_path).fullPath() + "/*";
-			m_findHandle = FindFirstFileA(query.c_str(), static_cast<WIN32_FIND_DATA *>(m_findData));
+			m_findHandle = FindFirstFile(UTF8Handler::utf8_to_wstring(query).c_str(), static_cast<WIN32_FIND_DATA *>(m_findData));
 
 			// Abort if directory could not be opened
 			if (m_findHandle == INVALID_HANDLE_VALUE)
@@ -122,7 +123,7 @@ void LocalFileIterator::readNextEntry()
 		m_index++;
 
 		// Get filename
-		filename = std::string(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName);
+		filename = UTF8Handler::wstring_to_utf8(std::wstring(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName));
 	} while (filename == ".." || filename == ".");
 }
 
diff --git a/source/cppfs/source/windows/LocalFileWatcher.cpp b/source/cppfs/source/windows/LocalFileWatcher.cpp
index b9d6664..c0d69c7 100644
--- a/source/cppfs/source/windows/LocalFileWatcher.cpp
+++ b/source/cppfs/source/windows/LocalFileWatcher.cpp
@@ -5,7 +5,7 @@
 
 #include <cppfs/FilePath.h>
 #include <cppfs/windows/LocalFileSystem.h>
-
+#include <cppfs/windows/UTF8Handler.h>
 
 namespace
 {
@@ -64,8 +64,8 @@ AbstractFileSystem * LocalFileWatcher::fs() const
 void LocalFileWatcher::add(FileHandle & dir, unsigned int events, RecursiveMode recursive)
 {
     // Open directory
-    ::HANDLE dirHandle = ::CreateFileA(
-        dir.path().c_str(),                                     // Pointer to the directory name
+    ::HANDLE dirHandle = ::CreateFile(
+        UTF8Convert_UTF8toW(dir.path()).c_str(),                // Pointer to the directory name
         FILE_LIST_DIRECTORY,                                    // Access (read/write) mode
         FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, // File share mode
         NULL,                                                   // Security descriptor
@@ -189,50 +189,36 @@ void LocalFileWatcher::watch(int timeout)
             // Get file event notification
             FILE_NOTIFY_INFORMATION * fileEvent = reinterpret_cast<FILE_NOTIFY_INFORMATION*>(entry);
 
-            // Convert filename to 8-bit character string
-            char fileName[4096];
-            int numBytes = ::WideCharToMultiByte(CP_ACP,
-                0,
-                fileEvent->FileName,
-                fileEvent->FileNameLength / sizeof(WCHAR),
-                fileName,
-                sizeof(fileName),
-                NULL,
-                NULL);
-
-            // Check if conversion was successful
-            if (numBytes != 0) {
-                // Get filename in unified format
-                std::string fname = FilePath(std::string(fileName, fileName + numBytes)).path();
-
-                // Determine event type
-                FileEvent eventType = (FileEvent)0;
-                switch (fileEvent->Action) {
-                    case FILE_ACTION_ADDED:
-                        eventType = FileCreated;
-                        break;
-
-                    case FILE_ACTION_REMOVED:
-                        eventType = FileRemoved;
-                        break;
-
-                    case FILE_ACTION_MODIFIED:
-                    case FILE_ACTION_RENAMED_NEW_NAME:
-                        eventType = FileModified;
-                        break;
+            // Get filename in unified format
+            std::string fname = FilePath(UTF8Handler::wstring_to_utf8(fileEvent->FileName)).path();
+
+            // Determine event type
+            FileEvent eventType = (FileEvent)0;
+            switch (fileEvent->Action) {
+                case FILE_ACTION_ADDED:
+                    eventType = FileCreated;
+                    break;
+
+                case FILE_ACTION_REMOVED:
+                    eventType = FileRemoved;
+                    break;
+
+                case FILE_ACTION_MODIFIED:
+                case FILE_ACTION_RENAMED_NEW_NAME:
+                    eventType = FileModified;
+                    break;
                 
-                    default:
-                        break;
-                }
-
-                // Check if event is watched for
-                if (watcher.events & eventType) {
-                    // Get file handle
-                    FileHandle fh = watcher.dir.open(fname);
-
-                    // Invoke callback function
-                    onFileEvent(fh, eventType);
-                }
+                default:
+                    break;
+            }
+
+            // Check if event is watched for
+            if (watcher.events & eventType) {
+                // Get file handle
+                FileHandle fh = watcher.dir.open(fname);
+
+                // Invoke callback function
+                onFileEvent(fh, eventType);
             }
 
             // Get next event
diff --git a/source/cppfs/source/windows/UTF8Handler.cpp b/source/cppfs/source/windows/UTF8Handler.cpp
new file mode 100644
index 0000000..be11d58
--- /dev/null
+++ b/source/cppfs/source/windows/UTF8Handler.cpp
@@ -0,0 +1,50 @@
+
+#include <cppfs/windows/UTF8Handler.h>
+#include <windows.h>
+#include <stdexcept>
+
+
+namespace cppfs
+{
+    namespace UTF8Handler
+    {
+        std::wstring utf8_to_wstring(const std::string& utf8_str) {
+            if (utf8_str.empty()) {
+                return std::wstring();
+            }
+
+            const int wstr_len = MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, nullptr, 0);
+            if (wstr_len == 0) {
+                throw std::runtime_error("Error converting UTF-8 string to UTF-16.");
+            }
+
+            std::wstring wstr(wstr_len, L'\0');
+
+            if (MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, &wstr[0], wstr_len) == 0) {
+                throw std::runtime_error("Error converting UTF-8 string to UTF-16.");
+            }
+            wstr.resize(static_cast<size_t>(wstr_len) - 1); // -1 because MultiByteToWideChar includes the null terminator in its count
+
+            return wstr;
+        }
+
+        std::string wstring_to_utf8(const std::wstring& wstr) {
+            if (wstr.empty()) {
+                return std::string();
+            }
+
+            const int utf8_len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, nullptr, 0, nullptr, nullptr);
+            if (utf8_len == 0) {
+                throw std::runtime_error("Error converting UTF-16 string to UTF-8.");
+            }
+
+            std::string utf8_str(utf8_len, '\0');
+            if (WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, &utf8_str[0], utf8_len, nullptr, nullptr) == 0) {
+                throw std::runtime_error("Error converting UTF-16 string to UTF-8.");
+            }
+            utf8_str.resize(static_cast<size_t>(utf8_len) - 1); // -1 because WideCharToMultiByte includes the null terminator in its count
+
+            return utf8_str;
+        }
+    }
+} // namespace cppfs