@@ -88,6 +88,15 @@ diff_match_patch.Diff.prototype.toString = function() {
88
88
return this [ 0 ] + ',' + this [ 1 ] ;
89
89
} ;
90
90
91
+ diff_match_patch . prototype . isHighSurrogate = function ( c ) {
92
+ var v = c . charCodeAt ( 0 ) ;
93
+ return v >= 0xD800 && v <= 0xDBFF ;
94
+ }
95
+
96
+ diff_match_patch . prototype . isLowSurrogate = function ( c ) {
97
+ var v = c . charCodeAt ( 0 ) ;
98
+ return v >= 0xDC00 && v <= 0xDFFF ;
99
+ }
91
100
92
101
/**
93
102
* Find the differences between two texts. Simplifies the problem by stripping
@@ -134,12 +143,18 @@ diff_match_patch.prototype.diff_main = function(text1, text2, opt_checklines,
134
143
135
144
// Trim off common prefix (speedup).
136
145
var commonlength = this . diff_commonPrefix ( text1 , text2 ) ;
146
+ if ( commonlength > 0 && this . isHighSurrogate ( text1 [ commonlength - 1 ] ) ) {
147
+ commonlength -- ;
148
+ }
137
149
var commonprefix = text1 . substring ( 0 , commonlength ) ;
138
150
text1 = text1 . substring ( commonlength ) ;
139
151
text2 = text2 . substring ( commonlength ) ;
140
152
141
153
// Trim off common suffix (speedup).
142
154
commonlength = this . diff_commonSuffix ( text1 , text2 ) ;
155
+ if ( commonlength > 0 && this . isLowSurrogate ( text1 [ text1 . length - commonlength ] ) ) {
156
+ commonlength -- ;
157
+ }
143
158
var commonsuffix = text1 . substring ( text1 . length - commonlength ) ;
144
159
text1 = text1 . substring ( 0 , text1 . length - commonlength ) ;
145
160
text2 = text2 . substring ( 0 , text2 . length - commonlength ) ;
@@ -187,13 +202,23 @@ diff_match_patch.prototype.diff_compute_ = function(text1, text2, checklines,
187
202
188
203
var longtext = text1 . length > text2 . length ? text1 : text2 ;
189
204
var shorttext = text1 . length > text2 . length ? text2 : text1 ;
205
+ var shortlength = shorttext . length ;
190
206
var i = longtext . indexOf ( shorttext ) ;
191
207
if ( i != - 1 ) {
208
+ // skip leading unpaired surrogate
209
+ if ( this . isLowSurrogate ( longtext [ i ] ) ) {
210
+ shortlength -- ;
211
+ i ++ ;
212
+ }
213
+ // skip trailing unpaired surrogate
214
+ if ( this . isHighSurrogate ( longtext [ i + shortlength ] ) ) {
215
+ shortlength -- ;
216
+ }
192
217
// Shorter text is inside the longer text (speedup).
193
218
diffs = [ new diff_match_patch . Diff ( DIFF_INSERT , longtext . substring ( 0 , i ) ) ,
194
219
new diff_match_patch . Diff ( DIFF_EQUAL , shorttext ) ,
195
220
new diff_match_patch . Diff ( DIFF_INSERT ,
196
- longtext . substring ( i + shorttext . length ) ) ] ;
221
+ longtext . substring ( i + shortlength ) ) ] ;
197
222
// Swap insertions for deletions if diff is reversed.
198
223
if ( text1 . length > text2 . length ) {
199
224
diffs [ 0 ] [ 0 ] = diffs [ 2 ] [ 0 ] = DIFF_DELETE ;
@@ -439,6 +464,15 @@ diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) {
439
464
*/
440
465
diff_match_patch . prototype . diff_bisectSplit_ = function ( text1 , text2 , x , y ,
441
466
deadline ) {
467
+ // backup if we split a surrogate
468
+ if (
469
+ x > 0 && x < text1 . length && this . isLowSurrogate ( text1 [ x ] ) &&
470
+ y > 0 && y < text2 . length && this . isLowSurrogate ( text2 [ y ] )
471
+ ) {
472
+ x -- ;
473
+ y -- ;
474
+ }
475
+
442
476
var text1a = text1 . substring ( 0 , x ) ;
443
477
var text2a = text2 . substring ( 0 , y ) ;
444
478
var text1b = text1 . substring ( x ) ;
@@ -569,6 +603,12 @@ diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) {
569
603
}
570
604
pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
571
605
}
606
+
607
+ // shorten the prefix if it splits a surrogate
608
+ if ( pointermid > 0 && this . isHighSurrogate ( text1 [ pointermid - 1 ] ) ) {
609
+ pointermid -- ;
610
+ }
611
+
572
612
return pointermid ;
573
613
} ;
574
614
@@ -601,6 +641,12 @@ diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) {
601
641
}
602
642
pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
603
643
}
644
+
645
+ // shorten the suffix if it splits a surrogate
646
+ if ( pointermid < length - 1 && this . isLowSurrogate ( text1 [ pointermid ] ) ) {
647
+ pointermid ++ ;
648
+ }
649
+
604
650
return pointermid ;
605
651
} ;
606
652
@@ -749,6 +795,24 @@ diff_match_patch.prototype.diff_halfMatch_ = function(text1, text2) {
749
795
text1_b = hm [ 3 ] ;
750
796
}
751
797
var mid_common = hm [ 4 ] ;
798
+
799
+ // move forward to prevent splitting a surrogate pair
800
+ if ( mid_common . length > 0 && this . isLowSurrogate ( mid_common [ 0 ] ) ) {
801
+ text1_a = text1_a + mid_common [ 0 ] ;
802
+ text2_a = text2_a + mid_common [ 0 ] ;
803
+ mid_common = mid_common . substring ( 1 ) ;
804
+ }
805
+
806
+ // back up to prevent splitting a surrogate pair
807
+ if (
808
+ text1_b . length > 0 && this . isLowSurrogate ( text1_b [ 0 ] ) &&
809
+ text2_b . length > 0 && this . isLowSurrogate ( text2_b [ 0 ] )
810
+ ) {
811
+ text1_b = mid_common [ mid_common . length - 1 ] + text1_b ;
812
+ text2_b = mid_common [ mid_common . length - 1 ] + text2_b ;
813
+ mid_common = mid_common . substring ( 0 , - 1 ) ;
814
+ }
815
+
752
816
return [ text1_a , text1_b , text2_a , text2_b , mid_common ] ;
753
817
} ;
754
818
0 commit comments