@@ -26,6 +26,7 @@ limitations under the License.
26
26
#include " json.hpp"
27
27
#include " md5.h"
28
28
#include " parser.h"
29
+ #include " re2/re2.h"
29
30
#include " state.h"
30
31
#include " static_analysis.h"
31
32
#include " string_utils.h"
@@ -35,6 +36,10 @@ using json = nlohmann::json;
35
36
36
37
namespace {
37
38
39
+ static const Fodder EF; // Empty fodder.
40
+
41
+ static const LocationRange E; // Empty.
42
+
38
43
/* * Turn a path e.g. "/a/b/c" into a dir, e.g. "/a/b/". If there is no path returns "".
39
44
*/
40
45
std::string dir_name (const std::string &path)
@@ -881,6 +886,11 @@ class Interpreter {
881
886
builtins[" parseJson" ] = &Interpreter::builtinParseJson;
882
887
builtins[" encodeUTF8" ] = &Interpreter::builtinEncodeUTF8;
883
888
builtins[" decodeUTF8" ] = &Interpreter::builtinDecodeUTF8;
889
+ builtins[" regexFullMatch" ] = &Interpreter::builtinRegexFullMatch;
890
+ builtins[" regexPartialMatch" ] = &Interpreter::builtinRegexPartialMatch;
891
+ builtins[" regexQuoteMeta" ] = &Interpreter::builtinRegexQuoteMeta;
892
+ builtins[" regexReplace" ] = &Interpreter::builtinRegexReplace;
893
+ builtins[" regexGlobalReplace" ] = &Interpreter::builtinRegexGlobalReplace;
884
894
}
885
895
886
896
/* * Clean up the heap, stack, stash, and builtin function ASTs. */
@@ -1373,6 +1383,129 @@ class Interpreter {
1373
1383
return decodeUTF8 ();
1374
1384
}
1375
1385
1386
+ const AST *regexMatch (const std::string &pattern, const std::string &string, bool full)
1387
+ {
1388
+ RE2 re (pattern, RE2::CannedOptions::Quiet);
1389
+ if (!re.ok ()) {
1390
+ std::stringstream ss;
1391
+ ss << " Invalid regex '" << re.pattern () << " ': " << re.error ();
1392
+ throw makeError (stack.top ().location , ss.str ());
1393
+ }
1394
+
1395
+ int num_groups = re.NumberOfCapturingGroups ();
1396
+
1397
+ std::vector<std::string> rcaptures (num_groups);
1398
+ std::vector<RE2::Arg> rargv (num_groups);
1399
+ std::vector<const RE2::Arg*> rargs (num_groups);
1400
+ for (int i=0 ; i<num_groups; ++i) {
1401
+ rargs[i] = &rargv[i];
1402
+ rargv[i] = &rcaptures[i];
1403
+ }
1404
+
1405
+ if (full ? RE2::FullMatchN (string, re, rargs.data (), num_groups)
1406
+ : RE2::PartialMatchN (string, re, rargs.data (), num_groups)) {
1407
+ std::map<const Identifier *, HeapSimpleObject::Field> fields;
1408
+
1409
+ const Identifier *fid = alloc->makeIdentifier (U" string" );
1410
+ fields[fid].hide = ObjectField::VISIBLE;
1411
+ fields[fid].body = alloc->make <LiteralString>(E, EF, decode_utf8 (string), LiteralString::DOUBLE, " " , " " );
1412
+
1413
+ fid = alloc->makeIdentifier (U" captures" );
1414
+ fields[fid].hide = ObjectField::VISIBLE;
1415
+ std::vector<Array::Element> captures;
1416
+ for (int i=0 ; i<num_groups; ++i) {
1417
+ captures.push_back (Array::Element (
1418
+ alloc->make <LiteralString>(E, EF, decode_utf8 (rcaptures[i]), LiteralString::DOUBLE, " " , " " ),
1419
+ EF));
1420
+ }
1421
+ fields[fid].body = alloc->make <Array>(E, EF, captures, false , EF);
1422
+
1423
+ fid = alloc->makeIdentifier (U" namedCaptures" );
1424
+ fields[fid].hide = ObjectField::VISIBLE;
1425
+ DesugaredObject::Fields named_captures;
1426
+ const std::map<std::string, int > &named_groups = re.NamedCapturingGroups ();
1427
+ for (auto it=named_groups.cbegin (); it!=named_groups.cend (); ++it) {
1428
+ named_captures.push_back (DesugaredObject::Field (
1429
+ ObjectField::VISIBLE,
1430
+ alloc->make <LiteralString>(E, EF, decode_utf8 (it->first ), LiteralString::DOUBLE, " " , " " ),
1431
+ alloc->make <LiteralString>(E, EF, decode_utf8 (rcaptures[it->second -1 ]), LiteralString::DOUBLE, " " , " " )));
1432
+ }
1433
+ fields[fid].body = alloc->make <DesugaredObject>(E, ASTs{}, named_captures);
1434
+
1435
+ scratch = makeObject<HeapSimpleObject>(BindingFrame{}, fields, ASTs{});
1436
+ } else {
1437
+ scratch = makeNull ();
1438
+ }
1439
+ return nullptr ;
1440
+ }
1441
+
1442
+ const AST *builtinRegexFullMatch (const LocationRange &loc, const std::vector<Value> &args)
1443
+ {
1444
+ validateBuiltinArgs (loc, " regexFullMatch" , args, {Value::STRING, Value::STRING});
1445
+
1446
+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1447
+ std::string string = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1448
+
1449
+ return regexMatch (pattern, string, true );
1450
+ }
1451
+
1452
+ const AST *builtinRegexPartialMatch (const LocationRange &loc, const std::vector<Value> &args)
1453
+ {
1454
+ validateBuiltinArgs (loc, " regexPartialMatch" , args, {Value::STRING, Value::STRING});
1455
+
1456
+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1457
+ std::string string = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1458
+
1459
+ return regexMatch (pattern, string, false );
1460
+ }
1461
+
1462
+ const AST *builtinRegexQuoteMeta (const LocationRange &loc, const std::vector<Value> &args)
1463
+ {
1464
+ validateBuiltinArgs (loc, " regexQuoteMeta" , args, {Value::STRING});
1465
+ scratch = makeString (decode_utf8 (RE2::QuoteMeta (encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value ))));
1466
+ return nullptr ;
1467
+ }
1468
+
1469
+ const AST *builtinRegexReplace (const LocationRange &loc, const std::vector<Value> &args)
1470
+ {
1471
+ validateBuiltinArgs (loc, " regexReplace" , args, {Value::STRING, Value::STRING, Value::STRING});
1472
+
1473
+ std::string string = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1474
+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1475
+ std::string replace = encode_utf8 (static_cast <HeapString *>(args[2 ].v .h )->value );
1476
+
1477
+ RE2 re (pattern, RE2::CannedOptions::Quiet);
1478
+ if (!re.ok ()) {
1479
+ std::stringstream ss;
1480
+ ss << " Invalid regex '" << re.pattern () << " ': " << re.error ();
1481
+ throw makeError (stack.top ().location , ss.str ());
1482
+ }
1483
+
1484
+ RE2::Replace (&string, re, replace);
1485
+ scratch = makeString (decode_utf8 (string));
1486
+ return nullptr ;
1487
+ }
1488
+
1489
+ const AST *builtinRegexGlobalReplace (const LocationRange &loc, const std::vector<Value> &args)
1490
+ {
1491
+ validateBuiltinArgs (loc, " regexGlobalReplace" , args, {Value::STRING, Value::STRING, Value::STRING});
1492
+
1493
+ std::string string = encode_utf8 (static_cast <HeapString *>(args[0 ].v .h )->value );
1494
+ std::string pattern = encode_utf8 (static_cast <HeapString *>(args[1 ].v .h )->value );
1495
+ std::string replace = encode_utf8 (static_cast <HeapString *>(args[2 ].v .h )->value );
1496
+
1497
+ RE2 re (pattern, RE2::CannedOptions::Quiet);
1498
+ if (!re.ok ()) {
1499
+ std::stringstream ss;
1500
+ ss << " Invalid regex '" << re.pattern () << " ': " << re.error ();
1501
+ throw makeError (stack.top ().location , ss.str ());
1502
+ }
1503
+
1504
+ RE2::GlobalReplace (&string, re, replace);
1505
+ scratch = makeString (decode_utf8 (string));
1506
+ return nullptr ;
1507
+ }
1508
+
1376
1509
const AST *builtinTrace (const LocationRange &loc, const std::vector<Value> &args)
1377
1510
{
1378
1511
if (args[0 ].t != Value::STRING) {
0 commit comments