From ee7cfe5e6d49c6d8f58d62725855573ca9839d9b Mon Sep 17 00:00:00 2001 From: Mouhib Agrebi Date: Thu, 20 Feb 2025 09:55:53 +0100 Subject: [PATCH] Resolve character encoding mismatch by enforcing UTF-8 in sqlline.py The issue with certain characters, such as Japanese, Polish, and other non-ASCII characters, being exported incorrectly has been resolved. The root cause was an encoding mismatch, where character data was not properly handled in the `java_cmd` command in `sqlline.py`. This command constructs and executes a Java process to launch SQLLine, a command-line tool for executing SQL queries against Apache Phoenix. To address this, the `sqlline.py` file was modified to explicitly enforce UTF-8 encoding. This ensures that all character data is correctly encoded and exported, preventing data corruption or misinterpretation due to incorrect encoding settings. --- bin/sqlline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/sqlline.py b/bin/sqlline.py index a45ed68c8cc..2cd776890da 100755 --- a/bin/sqlline.py +++ b/bin/sqlline.py @@ -101,6 +101,7 @@ def kill_child(): '" -Djava.util.logging.config.class=org.apache.hadoop.hbase.logging.JulToSlf4jInitializer ' + \ ' -Dlog4j2.configurationFile=file:' + os.path.join(phoenix_utils.current_dir, "log4j2.properties") + \ disable_jna + \ + " -Dfile.encoding=UTF-8 " + \ " sqlline.SqlLine -d org.apache.phoenix.jdbc.PhoenixDriver" + \ (not args.noconnect and " -u " + phoenix_utils.shell_quote([jdbc_url]) or "") + \ " -n none -p none --color=" + \