From 7c63dddd8c88de5347e7f7cbcd12078a71e8e7ce Mon Sep 17 00:00:00 2001 From: DrakkLord <DrakkLord@users.noreply.github.com> Date: Thu, 25 Jul 2024 19:18:04 +0200 Subject: [PATCH] added support for windows UTF-8 file name handling --- CMakeLists.txt | 4 +- source/cppfs/CMakeLists.txt | 9 +++ .../cppfs/include/cppfs/windows/UTF8Handler.h | 29 +++++++ .../cppfs/source/windows/LocalFileHandle.cpp | 24 +++--- .../source/windows/LocalFileIterator.cpp | 7 +- .../cppfs/source/windows/LocalFileWatcher.cpp | 78 ++++++++----------- source/cppfs/source/windows/UTF8Handler.cpp | 50 ++++++++++++ 7 files changed, 139 insertions(+), 62 deletions(-) create mode 100644 source/cppfs/include/cppfs/windows/UTF8Handler.h create mode 100644 source/cppfs/source/windows/UTF8Handler.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 34d80c0..ad3ab72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,7 +70,9 @@ option(OPTION_BUILD_DOCS "Build documentation." option(OPTION_BUILD_EXAMPLES "Build examples." OFF) option(OPTION_BUILD_SSH_BACKEND "Build SSH backend" OFF) option(OPTION_FORCE_SYSTEM_DIR_INSTALL "Force system dir install" OFF) - +if(WIN32) + option(OPTION_WINDOWS_UTF8 "Build Windows UTF8 support" ON) +endif() # # Declare project diff --git a/source/cppfs/CMakeLists.txt b/source/cppfs/CMakeLists.txt index 520512c..379fc41 100644 --- a/source/cppfs/CMakeLists.txt +++ b/source/cppfs/CMakeLists.txt @@ -119,10 +119,12 @@ endif() if("${CMAKE_SYSTEM_NAME}" MATCHES "Windows") set(headers ${headers} ${include_path}/windows/LocalFileWatcher.h + ${include_path}/windows/UTF8Handler.h ) set(sources ${sources} ${source_path}/windows/LocalFileWatcher.cpp + ${source_path}/windows/UTF8Handler.cpp ) endif() @@ -188,6 +190,13 @@ set_target_properties(${target} SOVERSION ${META_VERSION_MAJOR} ) +if(WIN32 AND OPTION_WINDOWS_UTF8) + target_compile_options(${target} + PRIVATE + -DUNICODE + -D_UNICODE + ) +endif() # # Include directories diff --git a/source/cppfs/include/cppfs/windows/UTF8Handler.h b/source/cppfs/include/cppfs/windows/UTF8Handler.h new file mode 100644 index 0000000..0c6bbcf --- /dev/null +++ b/source/cppfs/include/cppfs/windows/UTF8Handler.h @@ -0,0 +1,29 @@ + +#pragma once + + +#include <memory> + +#include <cppfs/AbstractFileSystem.h> + +#if defined(_UNICODE) + #define UTF8Convert_UTF8toW(s) UTF8Handler::utf8_to_wstring(s) + #define UTF8Convert_WtoUTF8(s) UTF8Handler::wstring_to_utf8(s) +#else + #define UTF8Convert_UTF8toW(s) s + #define UTF8Convert_WtoUTF8(s) s +#endif + +namespace cppfs +{ + /** + * @brief + * Windows specific UTF-8 string conversions + */ + namespace UTF8Handler + { + std::wstring utf8_to_wstring(const std::string& utf8_str); + std::string wstring_to_utf8(const std::wstring& wstr); + } // namespace UTF8Handler + +} // namespace cppfs diff --git a/source/cppfs/source/windows/LocalFileHandle.cpp b/source/cppfs/source/windows/LocalFileHandle.cpp index c8fda29..0d0fbc3 100644 --- a/source/cppfs/source/windows/LocalFileHandle.cpp +++ b/source/cppfs/source/windows/LocalFileHandle.cpp @@ -9,7 +9,7 @@ #include <cppfs/FilePath.h> #include <cppfs/windows/LocalFileSystem.h> #include <cppfs/windows/LocalFileIterator.h> - +#include <cppfs/windows/UTF8Handler.h> namespace cppfs { @@ -105,7 +105,7 @@ std::vector<std::string> LocalFileHandle::listFiles() const // Open directory WIN32_FIND_DATA findData; std::string query = FilePath(m_path).fullPath() + "/*"; - HANDLE findHandle = FindFirstFileA(query.c_str(), &findData); + HANDLE findHandle = FindFirstFile(UTF8Convert_UTF8toW(query).c_str(), &findData); if (findHandle == INVALID_HANDLE_VALUE) { @@ -116,7 +116,7 @@ std::vector<std::string> LocalFileHandle::listFiles() const do { // Get name - std::string name = findData.cFileName; + std::string name = UTF8Convert_WtoUTF8(findData.cFileName); // Ignore . and .. if (name != ".." && name != ".") @@ -213,7 +213,7 @@ bool LocalFileHandle::createDirectory() if (exists()) return false; // Create directory - if (!CreateDirectoryA(m_path.c_str(), nullptr)) + if (!CreateDirectory(UTF8Convert_UTF8toW(m_path).c_str(), nullptr)) { return false; } @@ -229,7 +229,7 @@ bool LocalFileHandle::removeDirectory() if (!isDirectory()) return false; // Remove directory - if (!RemoveDirectoryA(m_path.c_str())) + if (!RemoveDirectory(UTF8Convert_UTF8toW(m_path).c_str())) { return false; } @@ -255,7 +255,7 @@ bool LocalFileHandle::copy(AbstractFileHandleBackend & dest) } // Copy file - if (!CopyFileA(src.c_str(), dst.c_str(), FALSE)) + if (!CopyFile(UTF8Convert_UTF8toW(src).c_str(), UTF8Convert_UTF8toW(dst).c_str(), FALSE)) { // Error! return false; @@ -282,7 +282,7 @@ bool LocalFileHandle::move(AbstractFileHandleBackend & dest) } // Move file - if (!MoveFileA(src.c_str(), dst.c_str())) + if (!MoveFile(UTF8Convert_UTF8toW(src).c_str(), UTF8Convert_UTF8toW(dst).c_str())) { // Error! return false; @@ -312,7 +312,7 @@ bool LocalFileHandle::createLink(AbstractFileHandleBackend & dest) } // Copy file - if (!CreateHardLinkA(dst.c_str(), src.c_str(), 0)) + if (!CreateHardLink(UTF8Convert_UTF8toW(dst).c_str(), UTF8Convert_UTF8toW(src).c_str(), 0)) { // Error! return false; @@ -338,7 +338,7 @@ bool LocalFileHandle::createSymbolicLink(AbstractFileHandleBackend & dest) } // Copy file - if (!CreateSymbolicLinkA(dst.c_str(), src.c_str(), 0)) + if (!CreateSymbolicLink(UTF8Convert_UTF8toW(dst).c_str(), UTF8Convert_UTF8toW(src).c_str(), 0)) { // Error! return false; @@ -357,7 +357,7 @@ bool LocalFileHandle::rename(const std::string & filename) std::string path = FilePath(FilePath(m_path).directoryPath()).resolve(filename).fullPath(); // Rename - if (!MoveFileA(m_path.c_str(), path.c_str())) + if (!MoveFile(UTF8Convert_UTF8toW(m_path).c_str(), UTF8Convert_UTF8toW(path).c_str())) { // Error! return false; @@ -377,7 +377,7 @@ bool LocalFileHandle::remove() if (!isFile()) return false; // Delete file - if (!DeleteFileA(m_path.c_str())) + if (!DeleteFile(UTF8Convert_UTF8toW(m_path).c_str())) { return false; } @@ -406,7 +406,7 @@ void LocalFileHandle::readFileInfo() const m_fileInfo = (void *)new WIN32_FILE_ATTRIBUTE_DATA; // Get file info - if (!GetFileAttributesExA(m_path.c_str(), GetFileExInfoStandard, (WIN32_FILE_ATTRIBUTE_DATA*)m_fileInfo)) + if (!GetFileAttributesEx(UTF8Convert_UTF8toW(m_path).c_str(), GetFileExInfoStandard, (WIN32_FILE_ATTRIBUTE_DATA*)m_fileInfo)) { // Error! delete (WIN32_FILE_ATTRIBUTE_DATA *)m_fileInfo; diff --git a/source/cppfs/source/windows/LocalFileIterator.cpp b/source/cppfs/source/windows/LocalFileIterator.cpp index 5e01f0e..c01a0a2 100644 --- a/source/cppfs/source/windows/LocalFileIterator.cpp +++ b/source/cppfs/source/windows/LocalFileIterator.cpp @@ -5,6 +5,7 @@ #include <cppfs/FilePath.h> #include <cppfs/windows/LocalFileSystem.h> +#include <cppfs/windows/UTF8Handler.h> namespace cppfs @@ -78,7 +79,7 @@ std::string LocalFileIterator::name() const } // Return filename of current item - return std::string(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName); + return UTF8Convert_WtoUTF8(std::wstring(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName)); } void LocalFileIterator::next() @@ -97,7 +98,7 @@ void LocalFileIterator::readNextEntry() { // Open directory std::string query = FilePath(m_path).fullPath() + "/*"; - m_findHandle = FindFirstFileA(query.c_str(), static_cast<WIN32_FIND_DATA *>(m_findData)); + m_findHandle = FindFirstFile(UTF8Handler::utf8_to_wstring(query).c_str(), static_cast<WIN32_FIND_DATA *>(m_findData)); // Abort if directory could not be opened if (m_findHandle == INVALID_HANDLE_VALUE) @@ -122,7 +123,7 @@ void LocalFileIterator::readNextEntry() m_index++; // Get filename - filename = std::string(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName); + filename = UTF8Handler::wstring_to_utf8(std::wstring(static_cast<WIN32_FIND_DATA *>(m_findData)->cFileName)); } while (filename == ".." || filename == "."); } diff --git a/source/cppfs/source/windows/LocalFileWatcher.cpp b/source/cppfs/source/windows/LocalFileWatcher.cpp index b9d6664..c0d69c7 100644 --- a/source/cppfs/source/windows/LocalFileWatcher.cpp +++ b/source/cppfs/source/windows/LocalFileWatcher.cpp @@ -5,7 +5,7 @@ #include <cppfs/FilePath.h> #include <cppfs/windows/LocalFileSystem.h> - +#include <cppfs/windows/UTF8Handler.h> namespace { @@ -64,8 +64,8 @@ AbstractFileSystem * LocalFileWatcher::fs() const void LocalFileWatcher::add(FileHandle & dir, unsigned int events, RecursiveMode recursive) { // Open directory - ::HANDLE dirHandle = ::CreateFileA( - dir.path().c_str(), // Pointer to the directory name + ::HANDLE dirHandle = ::CreateFile( + UTF8Convert_UTF8toW(dir.path()).c_str(), // Pointer to the directory name FILE_LIST_DIRECTORY, // Access (read/write) mode FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, // File share mode NULL, // Security descriptor @@ -189,50 +189,36 @@ void LocalFileWatcher::watch(int timeout) // Get file event notification FILE_NOTIFY_INFORMATION * fileEvent = reinterpret_cast<FILE_NOTIFY_INFORMATION*>(entry); - // Convert filename to 8-bit character string - char fileName[4096]; - int numBytes = ::WideCharToMultiByte(CP_ACP, - 0, - fileEvent->FileName, - fileEvent->FileNameLength / sizeof(WCHAR), - fileName, - sizeof(fileName), - NULL, - NULL); - - // Check if conversion was successful - if (numBytes != 0) { - // Get filename in unified format - std::string fname = FilePath(std::string(fileName, fileName + numBytes)).path(); - - // Determine event type - FileEvent eventType = (FileEvent)0; - switch (fileEvent->Action) { - case FILE_ACTION_ADDED: - eventType = FileCreated; - break; - - case FILE_ACTION_REMOVED: - eventType = FileRemoved; - break; - - case FILE_ACTION_MODIFIED: - case FILE_ACTION_RENAMED_NEW_NAME: - eventType = FileModified; - break; + // Get filename in unified format + std::string fname = FilePath(UTF8Handler::wstring_to_utf8(fileEvent->FileName)).path(); + + // Determine event type + FileEvent eventType = (FileEvent)0; + switch (fileEvent->Action) { + case FILE_ACTION_ADDED: + eventType = FileCreated; + break; + + case FILE_ACTION_REMOVED: + eventType = FileRemoved; + break; + + case FILE_ACTION_MODIFIED: + case FILE_ACTION_RENAMED_NEW_NAME: + eventType = FileModified; + break; - default: - break; - } - - // Check if event is watched for - if (watcher.events & eventType) { - // Get file handle - FileHandle fh = watcher.dir.open(fname); - - // Invoke callback function - onFileEvent(fh, eventType); - } + default: + break; + } + + // Check if event is watched for + if (watcher.events & eventType) { + // Get file handle + FileHandle fh = watcher.dir.open(fname); + + // Invoke callback function + onFileEvent(fh, eventType); } // Get next event diff --git a/source/cppfs/source/windows/UTF8Handler.cpp b/source/cppfs/source/windows/UTF8Handler.cpp new file mode 100644 index 0000000..be11d58 --- /dev/null +++ b/source/cppfs/source/windows/UTF8Handler.cpp @@ -0,0 +1,50 @@ + +#include <cppfs/windows/UTF8Handler.h> +#include <windows.h> +#include <stdexcept> + + +namespace cppfs +{ + namespace UTF8Handler + { + std::wstring utf8_to_wstring(const std::string& utf8_str) { + if (utf8_str.empty()) { + return std::wstring(); + } + + const int wstr_len = MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, nullptr, 0); + if (wstr_len == 0) { + throw std::runtime_error("Error converting UTF-8 string to UTF-16."); + } + + std::wstring wstr(wstr_len, L'\0'); + + if (MultiByteToWideChar(CP_UTF8, 0, utf8_str.c_str(), -1, &wstr[0], wstr_len) == 0) { + throw std::runtime_error("Error converting UTF-8 string to UTF-16."); + } + wstr.resize(static_cast<size_t>(wstr_len) - 1); // -1 because MultiByteToWideChar includes the null terminator in its count + + return wstr; + } + + std::string wstring_to_utf8(const std::wstring& wstr) { + if (wstr.empty()) { + return std::string(); + } + + const int utf8_len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, nullptr, 0, nullptr, nullptr); + if (utf8_len == 0) { + throw std::runtime_error("Error converting UTF-16 string to UTF-8."); + } + + std::string utf8_str(utf8_len, '\0'); + if (WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, &utf8_str[0], utf8_len, nullptr, nullptr) == 0) { + throw std::runtime_error("Error converting UTF-16 string to UTF-8."); + } + utf8_str.resize(static_cast<size_t>(utf8_len) - 1); // -1 because WideCharToMultiByte includes the null terminator in its count + + return utf8_str; + } + } +} // namespace cppfs