@@ -1176,6 +1176,166 @@ diff_match_patch.prototype.diff_cleanupMerge = function(diffs) {
1176
1176
}
1177
1177
} ;
1178
1178
1179
+ /**
1180
+ * Rearrange diff boundaries that split Unicode surrogate pairs.
1181
+ *
1182
+ * @param {!Array.<!diff_match_patch.Diff> } diffs Array of diff tuples.
1183
+ */
1184
+ diff_match_patch . prototype . diff_cleanupSplitSurrogates = function ( diffs ) {
1185
+ var lastEnd ;
1186
+ for ( var x = 0 ; x < diffs . length ; x ++ ) {
1187
+ var thisDiff = diffs [ x ] ;
1188
+ var thisTop = thisDiff [ 1 ] [ 0 ] ;
1189
+ var thisEnd = thisDiff [ 1 ] [ thisDiff [ 1 ] . length - 1 ] ;
1190
+
1191
+ if ( 0 === thisDiff [ 1 ] . length ) {
1192
+ diffs . splice ( x -- , 1 ) ;
1193
+ continue ;
1194
+ }
1195
+
1196
+ if ( thisEnd && this . isHighSurrogate ( thisEnd ) ) {
1197
+ lastEnd = thisEnd ;
1198
+ thisDiff [ 1 ] = thisDiff [ 1 ] . slice ( 0 , - 1 ) ;
1199
+ }
1200
+
1201
+ if ( lastEnd && thisTop && this . isHighSurrogate ( lastEnd ) && this . isLowSurrogate ( thisTop ) ) {
1202
+ thisDiff [ 1 ] = lastEnd + thisDiff [ 1 ] ;
1203
+ }
1204
+
1205
+ if ( 0 === thisDiff [ 1 ] . length ) {
1206
+ diffs . splice ( x -- , 1 ) ;
1207
+ continue ;
1208
+ }
1209
+ }
1210
+
1211
+ return diffs ;
1212
+ } ;
1213
+
1214
+ diff_match_patch . prototype . isHighSurrogate = function ( c ) {
1215
+ var v = c . charCodeAt ( 0 ) ;
1216
+ return v >= 0xD800 && v <= 0xDBFF ;
1217
+ } ;
1218
+
1219
+ diff_match_patch . prototype . isLowSurrogate = function ( c ) {
1220
+ var v = c . charCodeAt ( 0 ) ;
1221
+ return v >= 0xDC00 && v <= 0xDFFF ;
1222
+ } ;
1223
+
1224
+ diff_match_patch . prototype . digit16 = function ( c ) {
1225
+ switch ( c ) {
1226
+ case '0' : return 0 ;
1227
+ case '1' : return 1 ;
1228
+ case '2' : return 2 ;
1229
+ case '3' : return 3 ;
1230
+ case '4' : return 4 ;
1231
+ case '5' : return 5 ;
1232
+ case '6' : return 6 ;
1233
+ case '7' : return 7 ;
1234
+ case '8' : return 8 ;
1235
+ case '9' : return 9 ;
1236
+ case 'A' : case 'a' : return 10 ;
1237
+ case 'B' : case 'b' : return 11 ;
1238
+ case 'C' : case 'c' : return 12 ;
1239
+ case 'D' : case 'd' : return 13 ;
1240
+ case 'E' : case 'e' : return 14 ;
1241
+ case 'F' : case 'f' : return 15 ;
1242
+ default : throw new Error ( 'Invalid hex-code' ) ;
1243
+ }
1244
+ } ;
1245
+
1246
+ /**
1247
+ * Decode URI-encoded string but allow for encoded surrogate halves
1248
+ *
1249
+ * diff_match_patch needs this relaxation of the requirements because
1250
+ * not all libraries and versions produce valid URI strings in toDelta
1251
+ * and we don't want to crash this code when the input is valid input
1252
+ * but at the same time invalid utf-8
1253
+ *
1254
+ * @example : decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70'
1255
+ * @example : decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c'
1256
+ *
1257
+ * @cite : @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js
1258
+ *
1259
+ * @param {String } text input string encoded by encodeURI() or equivalent
1260
+ * @return {String }
1261
+ */
1262
+ diff_match_patch . prototype . decodeURI = function ( text ) {
1263
+ try {
1264
+ return decodeURI ( text ) ;
1265
+ } catch ( e ) {
1266
+ var i = 0 ;
1267
+ var decoded = '' ;
1268
+
1269
+ while ( i < text . length ) {
1270
+ if ( text [ i ] !== '%' ) {
1271
+ decoded += text [ i ++ ] ;
1272
+ continue ;
1273
+ }
1274
+
1275
+ // start a percent-sequence
1276
+ var byte1 = ( this . digit16 ( text [ i + 1 ] ) << 4 ) + this . digit16 ( text [ i + 2 ] ) ;
1277
+ if ( ( byte1 & 0x80 ) === 0 ) {
1278
+ decoded += String . fromCharCode ( byte1 ) ;
1279
+ i += 3 ;
1280
+ continue ;
1281
+ }
1282
+
1283
+ if ( '%' !== text [ i + 3 ] ) {
1284
+ throw new URIError ( 'URI malformed' ) ;
1285
+ }
1286
+
1287
+ var byte2 = ( this . digit16 ( text [ i + 4 ] ) << 4 ) + this . digit16 ( text [ i + 5 ] ) ;
1288
+ if ( ( byte2 & 0xC0 ) !== 0x80 ) {
1289
+ throw new URIError ( 'URI malformed' ) ;
1290
+ }
1291
+ byte2 = byte2 & 0x3F ;
1292
+ if ( ( byte1 & 0xE0 ) === 0xC0 ) {
1293
+ decoded += String . fromCharCode ( ( ( byte1 & 0x1F ) << 6 ) | byte2 ) ;
1294
+ i += 6 ;
1295
+ continue ;
1296
+ }
1297
+
1298
+ if ( '%' !== text [ i + 6 ] ) {
1299
+ throw new URIError ( 'URI malformed' ) ;
1300
+ }
1301
+
1302
+ var byte3 = ( this . digit16 ( text [ i + 7 ] ) << 4 ) + this . digit16 ( text [ i + 8 ] ) ;
1303
+ if ( ( byte3 & 0xC0 ) !== 0x80 ) {
1304
+ throw new URIError ( 'URI malformed' ) ;
1305
+ }
1306
+ byte3 = byte3 & 0x3F ;
1307
+ if ( ( byte1 & 0xF0 ) === 0xE0 ) {
1308
+ // unpaired surrogate are fine here
1309
+ decoded += String . fromCharCode ( ( ( byte1 & 0x0F ) << 12 ) | ( byte2 << 6 ) | byte3 ) ;
1310
+ i += 9 ;
1311
+ continue ;
1312
+ }
1313
+
1314
+ if ( '%' !== text [ i + 9 ] ) {
1315
+ throw new URIError ( 'URI malformed' ) ;
1316
+ }
1317
+
1318
+ var byte4 = ( this . digit16 ( text [ i + 10 ] ) << 4 ) + this . digit16 ( text [ i + 11 ] ) ;
1319
+ if ( ( byte4 & 0xC0 ) !== 0x80 ) {
1320
+ throw new URIError ( 'URI malformed' ) ;
1321
+ }
1322
+ byte4 = byte4 & 0x3F ;
1323
+ if ( ( byte1 & 0xF8 ) === 0xF0 ) {
1324
+ var codePoint = ( ( byte1 & 0x07 ) << 0x12 ) | ( byte2 << 0x0C ) | ( byte3 << 0x06 ) | byte4 ;
1325
+ if ( codePoint >= 0x010000 && codePoint <= 0x10FFFF ) {
1326
+ decoded += String . fromCharCode ( ( codePoint & 0xFFFF ) >>> 10 & 0x3FF | 0xD800 ) ;
1327
+ decoded += String . fromCharCode ( 0xDC00 | ( codePoint & 0xFFFF ) & 0x3FF ) ;
1328
+ i += 12 ;
1329
+ continue ;
1330
+ }
1331
+ }
1332
+
1333
+ throw new URIError ( 'URI malformed' ) ;
1334
+ }
1335
+
1336
+ return decoded ;
1337
+ }
1338
+ } ;
1179
1339
1180
1340
/**
1181
1341
* loc is a location in text1, compute and return the equivalent location in
@@ -1219,6 +1379,7 @@ diff_match_patch.prototype.diff_xIndex = function(diffs, loc) {
1219
1379
* @return {string } HTML representation.
1220
1380
*/
1221
1381
diff_match_patch . prototype . diff_prettyHtml = function ( diffs ) {
1382
+ diffs = this . diff_cleanupSplitSurrogates ( diffs ) ;
1222
1383
var html = [ ] ;
1223
1384
var pattern_amp = / & / g;
1224
1385
var pattern_lt = / < / g;
@@ -1319,6 +1480,7 @@ diff_match_patch.prototype.diff_levenshtein = function(diffs) {
1319
1480
* @return {string } Delta text.
1320
1481
*/
1321
1482
diff_match_patch . prototype . diff_toDelta = function ( diffs ) {
1483
+ diffs = this . diff_cleanupSplitSurrogates ( diffs ) ;
1322
1484
var text = [ ] ;
1323
1485
for ( var x = 0 ; x < diffs . length ; x ++ ) {
1324
1486
switch ( diffs [ x ] [ 0 ] ) {
@@ -1361,7 +1523,7 @@ diff_match_patch.prototype.diff_fromDelta = function(text1, delta) {
1361
1523
switch ( tokens [ x ] . charAt ( 0 ) ) {
1362
1524
case '+' :
1363
1525
try {
1364
- diffs [ diffsLength ++ ] = [ DIFF_INSERT , decodeURI ( param ) ] ;
1526
+ diffs [ diffsLength ++ ] = [ DIFF_INSERT , this . decodeURI ( param ) ] ;
1365
1527
} catch ( ex ) {
1366
1528
// Malformed URI sequence.
1367
1529
throw new Error ( 'Illegal escape in diff_fromDelta: ' + param ) ;
@@ -1597,11 +1759,23 @@ diff_match_patch.prototype.patch_addContext_ = function(patch, text) {
1597
1759
padding += this . Patch_Margin ;
1598
1760
1599
1761
// Add the prefix.
1762
+ if (
1763
+ patch . start2 - padding > 0 &&
1764
+ diff_match_patch . prototype . isLowSurrogate ( text [ patch . start2 - padding ] )
1765
+ ) {
1766
+ padding ++ ;
1767
+ }
1600
1768
var prefix = text . substring ( patch . start2 - padding , patch . start2 ) ;
1601
1769
if ( prefix ) {
1602
1770
patch . diffs . unshift ( [ DIFF_EQUAL , prefix ] ) ;
1603
1771
}
1604
1772
// Add the suffix.
1773
+ if (
1774
+ patch . start2 + patch . length1 + padding < text . length &&
1775
+ diff_match_patch . prototype . isHighSurrogate ( text [ patch . start2 + patch . length1 + padding ] )
1776
+ ) {
1777
+ padding ++ ;
1778
+ }
1605
1779
var suffix = text . substring ( patch . start2 + patch . length1 ,
1606
1780
patch . start2 + patch . length1 + padding ) ;
1607
1781
if ( suffix ) {
@@ -1675,6 +1849,7 @@ diff_match_patch.prototype.patch_make = function(a, opt_b, opt_c) {
1675
1849
if ( diffs . length === 0 ) {
1676
1850
return [ ] ; // Get rid of the null case.
1677
1851
}
1852
+ diffs = this . diff_cleanupSplitSurrogates ( diffs ) ;
1678
1853
var patches = [ ] ;
1679
1854
var patch = new diff_match_patch . patch_obj ( ) ;
1680
1855
var patchDiffLength = 0 ; // Keeping our own length var is faster in JS.
@@ -2171,6 +2346,7 @@ diff_match_patch.patch_obj.prototype.toString = function() {
2171
2346
var text = [ '@@ -' + coords1 + ' +' + coords2 + ' @@\n' ] ;
2172
2347
var op ;
2173
2348
// Escape the body of the patch with %xx notation.
2349
+ diff_match_patch . prototype . diff_cleanupSplitSurrogates ( this . diffs ) ;
2174
2350
for ( var x = 0 ; x < this . diffs . length ; x ++ ) {
2175
2351
switch ( this . diffs [ x ] [ 0 ] ) {
2176
2352
case DIFF_INSERT :
0 commit comments