From eb9bc29fdfa65665575db85d52ecd2604fe8052a Mon Sep 17 00:00:00 2001 From: Magnus Holm Date: Thu, 20 Jun 2019 10:56:03 +0200 Subject: [PATCH] JS: Handle surrogate pairs correctly --- javascript/diff_match_patch_uncompressed.js | 21 +++++++++ javascript/tests/diff_match_patch_test.html | 1 + javascript/tests/diff_match_patch_test.js | 51 +++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/javascript/diff_match_patch_uncompressed.js b/javascript/diff_match_patch_uncompressed.js index f9fa82b..940b6e1 100644 --- a/javascript/diff_match_patch_uncompressed.js +++ b/javascript/diff_match_patch_uncompressed.js @@ -22,6 +22,16 @@ * @author fraser@google.com (Neil Fraser) */ +/** + * Determine if the index is inside a surrogate pair. + * @param {string} str The string + * @param {numer} idx The index + */ +function insideSurrogate(str, idx) { + var code = str.charCodeAt(idx); + return code >= 0xDC00 && code <= 0xDFFF; +} + /** * Class containing the diff, match and patch methods. * @constructor @@ -361,6 +371,11 @@ diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) { x1++; y1++; } + if (insideSurrogate(text1, x1)) { + x1--; + y1--; + } + v1[k1_offset] = x1; if (x1 > text1_length) { // Ran off the right of the graph. @@ -569,6 +584,9 @@ diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) { } pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); } + if (insideSurrogate(text1, pointermid)) { + pointermid--; + } return pointermid; }; @@ -601,6 +619,9 @@ diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) { } pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); } + if (insideSurrogate(text1, text1.length - pointermid)) { + pointermid--; + } return pointermid; }; diff --git a/javascript/tests/diff_match_patch_test.html b/javascript/tests/diff_match_patch_test.html index 4661730..3b738b4 100644 --- a/javascript/tests/diff_match_patch_test.html +++ b/javascript/tests/diff_match_patch_test.html @@ -113,6 +113,7 @@ 'testPatchObj', 'testPatchFromText', 'testPatchToText', + 'testPatchSurrogates', 'testPatchAddContext', 'testPatchMake', 'testPatchSplitMax', diff --git a/javascript/tests/diff_match_patch_test.js b/javascript/tests/diff_match_patch_test.js index 109e56a..6b74e87 100644 --- a/javascript/tests/diff_match_patch_test.js +++ b/javascript/tests/diff_match_patch_test.js @@ -767,6 +767,57 @@ function testPatchToText() { strp = '@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n'; p = dmp.patch_fromText(strp); assertEquals(strp, dmp.patch_toText(p)); + +} + +function testPatchSurrogates() { + var p, p2, strp; + + // These share the same high surrogate prefix + p = dmp.patch_make('\u{1F30D}', '\u{1F308}'); + strp = dmp.patch_toText(p); + p2 = dmp.patch_fromText(strp); + assertEquivalent(p, p2); + + // These share the same low surrogate suffix + p = dmp.patch_make('\u{10120}', '\u{10520}'); + strp = dmp.patch_toText(p); + p2 = dmp.patch_fromText(strp); + assertEquivalent(p, p2); + + // No common prefix, but later there's the same high surrogate char + p = dmp.patch_make('abbb\u{1F30D}', 'cbbb\u{1F308}'); + strp = dmp.patch_toText(p); + p2 = dmp.patch_fromText(strp); + assertEquivalent(p, p2); + + // No common suffix, but earlier there's the same low surrogate char + p = dmp.patch_make('\u{10120}aaac', '\u{10520}aaab'); + strp = dmp.patch_toText(p); + p2 = dmp.patch_fromText(strp); + assertEquivalent(p, p2); + + // No common suffix, but earlier there's the same low surrogate char + p = dmp.patch_make('abbb\u{10120}aaac', '\u{10520}aaab'); + strp = dmp.patch_toText(p); + p2 = dmp.patch_fromText(strp); + assertEquivalent(p, p2); + + var padding1 = ""; + while (padding1.length < 100) { + padding1 += String.fromCharCode(50 + padding1.length); + } + + var padding2 = ""; + while (padding2.length < 100) { + padding2 += String.fromCharCode(200 + padding2.length); + } + + // Add some random padding + p = dmp.patch_make(padding1+'\u{10120}'+padding2, padding2+'\u{10520}'+padding1); + strp = dmp.patch_toText(p); + p2 = dmp.patch_fromText(strp); + assertEquivalent(p, p2); } function testPatchAddContext() {