Skip to content

Commit 55e9cb0

Browse files
committed
fix : fix surrogate pairs splitting for toText()/fromText()
1 parent 116615b commit 55e9cb0

File tree

2 files changed

+53
-13
lines changed

2 files changed

+53
-13
lines changed

javascript/diff_match_patch_uncompressed.js

+46-6
Original file line numberDiff line numberDiff line change
@@ -1423,17 +1423,17 @@ diff_match_patch.prototype.digit16 = function(c) {
14231423

14241424
/**
14251425
* Decode URI-encoded string but allow for encoded surrogate halves
1426-
*
1426+
*
14271427
* diff_match_patch needs this relaxation of the requirements because
14281428
* not all libraries and versions produce valid URI strings in toDelta
14291429
* and we don't want to crash this code when the input is valid input
14301430
* but at the same time invalid utf-8
1431-
*
1431+
*
14321432
* @example: decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70'
14331433
* @example: decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c'
1434-
*
1434+
*
14351435
* @cite: @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js
1436-
*
1436+
*
14371437
* @param {String} text input string encoded by encodeURI() or equivalent
14381438
* @return {String}
14391439
*/
@@ -2215,6 +2215,46 @@ diff_match_patch.prototype.patch_splitMax = function(patches) {
22152215
}
22162216
};
22172217

2218+
diff_match_patch.prototype.diffs_joinSurrogatePairs = function(diffs) {
2219+
var newDiffs = [];
2220+
2221+
var lastEnd;
2222+
2223+
for (var x = 0 ; x < diffs.length ; x++) {
2224+
var thisDiff = diffs[x];
2225+
var thisTop = thisDiff[1][0];
2226+
var thisEnd = thisDiff[1][thisDiff[1].length - 1];
2227+
2228+
if (0 === thisDiff[1].length) {
2229+
continue;
2230+
}
2231+
2232+
// trap a trailing high-surrogate so we can
2233+
// distribute it to the successive edits
2234+
if (thisEnd && this.isHighSurrogate(thisEnd)) {
2235+
lastEnd = thisEnd;
2236+
thisDiff[1] = thisDiff[1].slice(0, -1);
2237+
}
2238+
2239+
if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) {
2240+
thisDiff[1] = lastEnd + thisDiff[1];
2241+
}
2242+
2243+
if (0 === thisDiff[1].length) {
2244+
continue;
2245+
}
2246+
2247+
newDiffs.push(thisDiff)
2248+
}
2249+
2250+
return newDiffs
2251+
}
2252+
2253+
diff_match_patch.prototype.patch_joinSurrogatePairs = function(patch) {
2254+
patch.diffs = this.diffs_joinSurrogatePairs(patch.diffs)
2255+
return patch
2256+
}
2257+
22182258

22192259
/**
22202260
* Take a list of patches and return a textual representation.
@@ -2224,7 +2264,7 @@ diff_match_patch.prototype.patch_splitMax = function(patches) {
22242264
diff_match_patch.prototype.patch_toText = function(patches) {
22252265
var text = [];
22262266
for (var x = 0; x < patches.length; x++) {
2227-
text[x] = patches[x];
2267+
text[x] = this.patch_joinSurrogatePairs(patches[x]);
22282268
}
22292269
return text.join('');
22302270
};
@@ -2277,7 +2317,7 @@ diff_match_patch.prototype.patch_fromText = function(textline) {
22772317
while (textPointer < text.length) {
22782318
var sign = text[textPointer].charAt(0);
22792319
try {
2280-
var line = decodeURI(text[textPointer].substring(1));
2320+
var line = this.decodeURI(text[textPointer].substring(1));
22812321
} catch (ex) {
22822322
// Malformed URI sequence.
22832323
throw new Error('Illegal escape in patch_fromText: ' + line);

javascript/tests/diff_match_patch_test.js

+7-7
Original file line numberDiff line numberDiff line change
@@ -502,14 +502,14 @@ function testDiffDelta() {
502502

503503
(function(){
504504
const originalText = `U+1F17x 🅰️ 🅱️ 🅾️ 🅿️ safhawifhkw
505-
U+1F18x 🆎
505+
U+1F18x 🆎
506506
0 1 2 3 4 5 6 7 8 9 A B C D E F
507-
U+1F19x 🆑 🆒 🆓 🆔 🆕 🆖 🆗 🆘 🆙 🆚
508-
U+1F20x 🈁 🈂️ sfss.,_||saavvvbbds
509-
U+1F21x 🈚
507+
U+1F19x 🆑 🆒 🆓 🆔 🆕 🆖 🆗 🆘 🆙 🆚
508+
U+1F20x 🈁 🈂️ sfss.,_||saavvvbbds
509+
U+1F21x 🈚
510510
U+1F22x 🈯
511-
U+1F23x 🈲 🈳 🈴 🈵 🈶 🈷️ 🈸 🈹 🈺
512-
U+1F25x 🉐 🉑
511+
U+1F23x 🈲 🈳 🈴 🈵 🈶 🈷️ 🈸 🈹 🈺
512+
U+1F25x 🉐 🉑
513513
U+1F30x 🌀 🌁 🌂 🌃 🌄 🌅 🌆 🌇 🌈 🌉 🌊 🌋 🌌 🌍 🌎 🌏
514514
U+1F31x 🌐 🌑 🌒 🌓 🌔 🌕 🌖 🌗 🌘 🌙 🌚 🌛 🌜 🌝 🌞 `;
515515

@@ -892,7 +892,7 @@ function testPatchFromText() {
892892
function testPatchToText() {
893893
var strp = '@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n';
894894
var p = dmp.patch_fromText(strp);
895-
assertEquals(strp, dmp.patch_toText(p));
895+
assertEquals(strp, dmp.patch_toText(p));
896896

897897
strp = '@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n';
898898
p = dmp.patch_fromText(strp);

0 commit comments

Comments
 (0)