Skip to content

Commit d6ab887

Browse files
committed
Fix: Second update to diff-match-patch fix
In 1.0.2 we applied an update from google/diff-match-patch#80 to resolve the surrogate pair encoding issue. Since that time we found a bug in the fix and resolved that and therefore this patch brings those additional updates to Simeprium. There previously remained issues when decoding broken patches that already existed or which came from unpatched versions of the iOS library and also remained issues when unexpectedly receiving empty diff groups in `toDelta`
1 parent 752c223 commit d6ab887

File tree

4 files changed

+134
-10
lines changed

4 files changed

+134
-10
lines changed

RELEASE-NOTES.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 1.0.4
4+
5+
- Update diff-match-patch to newer revision of surrogate-pair encoding fix
6+
37
## 1.0.2
48

59
- Update diff-match-patch to fix problem when encoding consecutive surrogate pairs

package-lock.json

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "simperium",
3-
"version": "1.0.3",
3+
"version": "1.0.4",
44
"description": "A simperium client for node.js",
55
"main": "./lib/simperium/index.js",
66
"repository": {

src/simperium/jsondiff/diff_match_patch.js

+128-8
Original file line numberDiff line numberDiff line change
@@ -1361,39 +1361,159 @@ diff_match_patch.prototype.diff_toDelta = function(diffs) {
13611361
var text = [];
13621362
var lastEnd;
13631363
for (var x = 0; x < diffs.length; x++) {
1364-
13651364
var thisDiff = diffs[x];
13661365
var thisTop = thisDiff[1][0];
13671366
var thisEnd = thisDiff[1][thisDiff[1].length - 1];
13681367

1368+
if (0 === thisDiff[1].length) {
1369+
continue;
1370+
}
1371+
1372+
// trap a trailing high-surrogate so we can
1373+
// distribute it to the successive edits
13691374
if (thisEnd && this.isHighSurrogate(thisEnd)) {
1375+
lastEnd = thisEnd;
13701376
thisDiff[1] = thisDiff[1].slice(0, -1);
13711377
}
13721378

13731379
if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) {
13741380
thisDiff[1] = lastEnd + thisDiff[1];
13751381
}
13761382

1377-
lastEnd = thisEnd;
1378-
if ( 0 === thisDiff[1].length ) {
1383+
if (0 === thisDiff[1].length) {
13791384
continue;
13801385
}
13811386

1382-
switch (diffs[x][0]) {
1387+
switch (thisDiff[0]) {
13831388
case DIFF_INSERT:
1384-
text[x] = '+' + encodeURI(diffs[x][1]);
1389+
text.push('+' + encodeURI(thisDiff[1]));
13851390
break;
13861391
case DIFF_DELETE:
1387-
text[x] = '-' + diffs[x][1].length;
1392+
text.push('-' + thisDiff[1].length);
13881393
break;
13891394
case DIFF_EQUAL:
1390-
text[x] = '=' + diffs[x][1].length;
1395+
text.push('=' + thisDiff[1].length);
13911396
break;
13921397
}
13931398
}
13941399
return text.join('\t').replace(/%20/g, ' ');
13951400
};
13961401

1402+
diff_match_patch.prototype.digit16 = function(c) {
1403+
switch (c) {
1404+
case '0': return 0;
1405+
case '1': return 1;
1406+
case '2': return 2;
1407+
case '3': return 3;
1408+
case '4': return 4;
1409+
case '5': return 5;
1410+
case '6': return 6;
1411+
case '7': return 7;
1412+
case '8': return 8;
1413+
case '9': return 9;
1414+
case 'A': case 'a': return 10;
1415+
case 'B': case 'b': return 11;
1416+
case 'C': case 'c': return 12;
1417+
case 'D': case 'd': return 13;
1418+
case 'E': case 'e': return 14;
1419+
case 'F': case 'f': return 15;
1420+
default: throw new Error('Invalid hex-code');
1421+
}
1422+
};
1423+
1424+
/**
1425+
* Decode URI-encoded string but allow for encoded surrogate halves
1426+
*
1427+
* diff_match_patch needs this relaxation of the requirements because
1428+
* not all libraries and versions produce valid URI strings in toDelta
1429+
* and we don't want to crash this code when the input is valid input
1430+
* but at the same time invalid utf-8
1431+
*
1432+
* @example: decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70'
1433+
* @example: decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c'
1434+
*
1435+
* @cite: @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js
1436+
*
1437+
* @param {String} text input string encoded by encodeURI() or equivalent
1438+
* @return {String}
1439+
*/
1440+
diff_match_patch.prototype.decodeURI = function(text) {
1441+
try {
1442+
return decodeURI(text);
1443+
} catch ( e ) {
1444+
var i = 0;
1445+
var decoded = '';
1446+
1447+
while (i < text.length) {
1448+
if ( text[i] !== '%' ) {
1449+
decoded += text[i++];
1450+
continue;
1451+
}
1452+
1453+
// start a percent-sequence
1454+
var byte1 = (this.digit16(text[i + 1]) << 4) + this.digit16(text[i + 2]);
1455+
if ((byte1 & 0x80) === 0) {
1456+
decoded += String.fromCharCode(byte1);
1457+
i += 3;
1458+
continue;
1459+
}
1460+
1461+
if ('%' !== text[i + 3]) {
1462+
throw new URIError('URI malformed');
1463+
}
1464+
1465+
var byte2 = (this.digit16(text[i + 4]) << 4) + this.digit16(text[i + 5]);
1466+
if ((byte2 & 0xC0) !== 0x80) {
1467+
throw new URIError('URI malformed');
1468+
}
1469+
byte2 = byte2 & 0x3F;
1470+
if ((byte1 & 0xE0) === 0xC0) {
1471+
decoded += String.fromCharCode(((byte1 & 0x1F) << 6) | byte2);
1472+
i += 6;
1473+
continue;
1474+
}
1475+
1476+
if ('%' !== text[i + 6]) {
1477+
throw new URIError('URI malformed');
1478+
}
1479+
1480+
var byte3 = (this.digit16(text[i + 7]) << 4) + this.digit16(text[i + 8]);
1481+
if ((byte3 & 0xC0) !== 0x80) {
1482+
throw new URIError('URI malformed');
1483+
}
1484+
byte3 = byte3 & 0x3F;
1485+
if ((byte1 & 0xF0) === 0xE0) {
1486+
// unpaired surrogate are fine here
1487+
decoded += String.fromCharCode(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3);
1488+
i += 9;
1489+
continue;
1490+
}
1491+
1492+
if ('%' !== text[i + 9]) {
1493+
throw new URIError('URI malformed');
1494+
}
1495+
1496+
var byte4 = (this.digit16(text[i + 10]) << 4) + this.digit16(text[i + 11]);
1497+
if ((byte4 & 0xC0) !== 0x80) {
1498+
throw new URIError('URI malformed');
1499+
}
1500+
byte4 = byte4 & 0x3F;
1501+
if ((byte1 & 0xF8) === 0xF0) {
1502+
var codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
1503+
if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
1504+
decoded += String.fromCharCode((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800);
1505+
decoded += String.fromCharCode(0xDC00 | (codePoint & 0xFFFF) & 0x3FF);
1506+
i += 12;
1507+
continue;
1508+
}
1509+
}
1510+
1511+
throw new URIError('URI malformed');
1512+
}
1513+
1514+
return decoded;
1515+
}
1516+
};
13971517

13981518
/**
13991519
* Given the original text1, and an encoded string which describes the
@@ -1416,7 +1536,7 @@ diff_match_patch.prototype.diff_fromDelta = function(text1, delta) {
14161536
case '+':
14171537
try {
14181538
diffs[diffsLength++] =
1419-
new diff_match_patch.Diff(DIFF_INSERT, decodeURI(param));
1539+
new diff_match_patch.Diff(DIFF_INSERT, this.decodeURI(param));
14201540
} catch (ex) {
14211541
// Malformed URI sequence.
14221542
throw new Error('Illegal escape in diff_fromDelta: ' + param);

0 commit comments

Comments
 (0)