pylint-dev · Pierre-Sassoulas · Jun 14, 2024 · Jun 14, 2024 · Jun 14, 2024 · Pierre-Sassoulas
diff --git a/doc/whatsnew/fragments/8736.bugfix b/doc/whatsnew/fragments/8736.bugfix
@@ -0,0 +1,5 @@
+When displaying unicode with surrogates (or other potential ``UnicodeEncodeError``),
+pylint will now display the best representation of the string by ignoring unicode
+errors instead of crashing.
+
+Closes #8736.
diff --git a/pylint/reporters/base_reporter.py b/pylint/reporters/base_reporter.py
@@ -42,7 +42,11 @@ def handle_message(self, msg: Message) -> None:
 
     def writeln(self, string: str = "") -> None:
         """Write a line in the output buffer."""
-        print(string, file=self.out)
+        try:
+            print(string, file=self.out)
+        except UnicodeEncodeError:
+            best_effort_string = string.encode(encoding="utf-8", errors="ignore")
+            print(best_effort_string.decode("utf8"), file=self.out)
 
     def display_reports(self, layout: Section) -> None:
         """Display results encapsulated in the layout tree."""

diff --git a/pylint/testutils/functional/lint_module_output_update.py b/pylint/testutils/functional/lint_module_output_update.py
@@ -40,4 +40,12 @@ def _check_output_text(
         with open(self._test_file.expected_output, "w", encoding="utf-8") as f:
             writer = csv.writer(f, dialect="test")
             for line in actual_output:
-                writer.writerow(line.to_csv())
+                try:
+                    writer.writerow(line.to_csv())
+                except UnicodeEncodeError:
+                    writer.writerow(
+                        [
+                            s.encode("utf8", "ignore").decode("utf8")
+                            for s in line.to_csv()
+                        ]
+                    )
diff --git a/pylint/testutils/lint_module_test.py b/pylint/testutils/lint_module_test.py
@@ -303,7 +303,15 @@ def error_msg_for_unequal_output(
             expected_csv = StringIO()
             writer = csv.writer(expected_csv, dialect="test")
             for line in sorted(received_lines, key=sort_by_line_number):
-                writer.writerow(line.to_csv())
+                try:
+                    writer.writerow(line.to_csv())
+                except UnicodeEncodeError:
+                    writer.writerow(
+                        [
+                            s.encode("utf8", "ignore").decode("utf8")
+                            for s in line.to_csv()
+                        ]
+                    )
             error_msg += expected_csv.getvalue()
         return error_msg
 

diff --git a/tests/functional/r/regression_02/regression_8736.py b/tests/functional/r/regression_02/regression_8736.py
@@ -0,0 +1,3 @@
+"""This does not crash in the functional tests, but it did when called directly."""
+
+assert "\U00010000" == "\ud800\udc00"  # [comparison-of-constants]
diff --git a/tests/functional/r/regression_02/regression_8736.txt b/tests/functional/r/regression_02/regression_8736.txt
@@ -0,0 +1 @@
+comparison-of-constants:3:7:3:37::"Comparison between constants: '𐀀 == ' has a constant value":HIGH
-comparison-of-constants:3:7:3:37::"Comparison between constants: '𐀀 == ' has a constant value":HIGH
+comparison-of-constants:3:7:3:37::"Comparison between constants: '"\U00010000" == "\ud800\udc00"' has a constant value":HIGH
-comparison-of-constants:3:7:3:37::"Comparison between constants: '𐀀 == ' has a constant value":HIGH
+comparison-of-constants:3:7:3:37::"Comparison between constants: '"\U00010000" == "\ud800\udc00"' has a constant value":HIGH
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""This does not crash in the functional tests, but it did when called directly."""

		assert "\U00010000" == "\ud800\udc00" # [comparison-of-constants]