|
| 1 | +/* global StringPad, StringToCodePoints, UTF16EncodeCodePoint, UnicodeEscape */ |
| 2 | +// 22.2.5.1.1 EncodeForRegExpEscape ( c ) |
| 3 | +// eslint-disable-next-line no-unused-vars |
| 4 | +function EncodeForRegExpEscape(c) { |
| 5 | + // 1. If c is matched by SyntaxCharacter or c is U+002F (SOLIDUS), then |
| 6 | + if ( |
| 7 | + c === 0x005e || // ^ |
| 8 | + c === 0x0024 || // $ |
| 9 | + c === 0x005c || // \ |
| 10 | + c === 0x002e || // . |
| 11 | + c === 0x002a || // * |
| 12 | + c === 0x002b || // + |
| 13 | + c === 0x003f || // ? |
| 14 | + c === 0x0028 || // ( |
| 15 | + c === 0x0029 || // ) |
| 16 | + c === 0x005b || // [ |
| 17 | + c === 0x005d || // ] |
| 18 | + c === 0x007b || // { |
| 19 | + c === 0x007d || // } |
| 20 | + c === 0x007c || // | |
| 21 | + c === 0x002f // / |
| 22 | + ) { |
| 23 | + // a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and UTF16EncodeCodePoint(c). |
| 24 | + return "\\" + UTF16EncodeCodePoint(c); |
| 25 | + } |
| 26 | + // 2. Else if c is the code point listed in some cell of the “Code Point” column of Table 63, then |
| 27 | + else if (c >= 0x0009 && c <= 0x000d) { |
| 28 | + // a. Return the string-concatenation of 0x005C (REVERSE SOLIDUS) and the string in the “ControlEscape” column of the row whose “Code Point” column contains c. |
| 29 | + return ( |
| 30 | + "\\" + |
| 31 | + { |
| 32 | + 0x0009: "t", // CHARACTER TABULATION |
| 33 | + 0x000a: "n", // LINE FEED |
| 34 | + 0x000b: "v", // LINE TABULATION |
| 35 | + 0x000c: "f", // FORM FEED |
| 36 | + 0x000d: "r" // CARRIAGE RETURN |
| 37 | + }[c] |
| 38 | + ); |
| 39 | + } |
| 40 | + // 3. Let otherPunctuators be the string-concatenation of ",-=<>#&!%:;@~'`" and the code unit 0x0022 (QUOTATION MARK). |
| 41 | + var otherPunctuators = ",-=<>#&!%:;@~'`\""; |
| 42 | + // 4. Let toEscape be StringToCodePoints(otherPunctuators). |
| 43 | + var toEscape = StringToCodePoints(otherPunctuators); |
| 44 | + // 5. If toEscape contains c, c is matched by either WhiteSpace or LineTerminator, or c has the same numeric value as a leading surrogate or trailing surrogate, then |
| 45 | + if ( |
| 46 | + toEscape.indexOf(c) > -1 || |
| 47 | + // https://www.compart.com/en/unicode/category/Zs |
| 48 | + c === 0xfeff || // ZERO WIDTH NO-BREAK SPACE |
| 49 | + c === 0x0020 || // SPACE |
| 50 | + c === 0x00a0 || // NO-BREAK SPACE |
| 51 | + c === 0x1680 || // OGHAM SPACE MARK |
| 52 | + (c >= 0x2000 && c <= 0x200a) || // other spaces |
| 53 | + c === 0x202f || // NARROW NO-BREAK SPACE |
| 54 | + c === 0x205f || // MEDIUM MATHEMATICAL SPACE |
| 55 | + c === 0x3000 || // IDEOGRAPHIC SPACE |
| 56 | + c === 0x2028 || // LINE SEPARATOR |
| 57 | + c === 0x2029 || // PARAGRAPH SEPARATOR |
| 58 | + (c >= 0xd800 && c <= 0xdbff) || // leading surrogate |
| 59 | + (c >= 0xdc00 && c <= 0xdfff) // trailing surrogate |
| 60 | + ) { |
| 61 | + // a. Let cNum be the numeric value of c. |
| 62 | + var cNum = c; |
| 63 | + // b. If cNum ≤ 0xFF, then |
| 64 | + if (cNum <= 0x00ff) { |
| 65 | + // i. Let hex be Number::toString(𝔽(cNum), 16). |
| 66 | + var hex = Number.prototype.toString.call(cNum, 16); |
| 67 | + // ii. Return the string-concatenation of the code unit 0x005C (REVERSE SOLIDUS), "x", and StringPad(hex, 2, "0", start). |
| 68 | + return "\\x" + StringPad(hex, 2, "0", "START"); |
| 69 | + } |
| 70 | + // c. Let escaped be the empty String. |
| 71 | + var escaped = ""; |
| 72 | + // d. Let codeUnits be UTF16EncodeCodePoint(c). |
| 73 | + var codeUnits = UTF16EncodeCodePoint(c); |
| 74 | + // e. For each code unit cu of codeUnits, do |
| 75 | + for (var i = 0; i < codeUnits.length; i++) { |
| 76 | + var cu = codeUnits[i]; |
| 77 | + // i. Set escaped to the string-concatenation of escaped and UnicodeEscape(cu). |
| 78 | + escaped += UnicodeEscape(cu); |
| 79 | + } |
| 80 | + // f. Return escaped. |
| 81 | + return escaped; |
| 82 | + } |
| 83 | + // 6. Return UTF16EncodeCodePoint(c). |
| 84 | + return UTF16EncodeCodePoint(c); |
| 85 | +} |
0 commit comments