Skip to content

Commit bc5d07b

Browse files
committed
Pattern Analysis
Adds functionality to analyze the minimum and maximum # of characters a regex may match.
1 parent ac7cb6f commit bc5d07b

File tree

3 files changed

+251
-4
lines changed

3 files changed

+251
-4
lines changed

include/ctre/evaluation.hpp

+243
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,249 @@ constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, c
447447
// property matching
448448

449449

450+
// pattern analysis - returns the minimum and maximum # of characters in order for a regex to match a string
451+
// a custom std::pair to overload some handy operations that we'll perform w/ a fold
452+
struct analysis_results : std::pair<size_t, size_t> {
453+
// -1 is considered INF, -2 is finite (but perhaps too large to store), all other values are exact counts
454+
static constexpr CTRE_FORCE_INLINE size_t saturate_limit(const size_t& lhs, const size_t& rhs) {
455+
const constexpr size_t inf = size_t{ 0 } -1;
456+
const constexpr size_t lim = size_t{ 0 } -2;
457+
size_t ret = inf;
458+
if (lhs == inf || rhs == inf) {
459+
return ret;
460+
} else {
461+
ret = lhs + rhs;
462+
ret = ret < lhs ? lim : ret == inf ? lim : ret;
463+
}
464+
return ret;
465+
}
466+
467+
static constexpr CTRE_FORCE_INLINE size_t mult_saturate_limit(const size_t& lhs, const size_t& rhs) {
468+
const constexpr size_t inf = size_t{ 0 } -1;
469+
const constexpr size_t lim = size_t{ 0 } -2;
470+
size_t ret = inf;
471+
if (lhs == inf || rhs == inf) {
472+
return ret;
473+
} else if (lhs == 0 || rhs == 0) {
474+
return ret = 0;
475+
} else {
476+
if (lhs > (SIZE_MAX / rhs))
477+
return ret = lim;
478+
ret = lhs * rhs;
479+
ret = ret == inf ? lim : ret;
480+
return ret;
481+
}
482+
}
483+
484+
constexpr inline CTRE_FORCE_INLINE operator bool() const noexcept {
485+
return first;
486+
}
487+
constexpr auto CTRE_FORCE_INLINE operator+(analysis_results other) const noexcept {
488+
return analysis_results{std::make_pair(
489+
saturate_limit(first, other.first),
490+
saturate_limit(second, other.second)
491+
)};
492+
}
493+
constexpr auto CTRE_FORCE_INLINE operator||(analysis_results other) const noexcept {
494+
return analysis_results{std::make_pair(
495+
std::min(first, other.first),
496+
std::max(second, other.second)
497+
)};
498+
}
499+
};
500+
501+
template <typename Pattern>
502+
static constexpr auto trampoline_analysis(Pattern) noexcept;
503+
504+
template <typename... Patterns>
505+
static constexpr auto trampoline_analysis(ctll::list<Patterns...>) noexcept;
506+
507+
template<typename T, typename R>
508+
static constexpr auto trampoline_analysis(T, R captures) noexcept;
509+
510+
//processing for each type
511+
512+
//repeat
513+
template<size_t A, size_t B, typename R, typename... Content>
514+
static constexpr auto _analyze(repeat<A,B,Content...>, R captures) noexcept {
515+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
516+
if constexpr (sizeof...(Content)) {
517+
ret = trampoline_analysis(ctll::list<Content...>(), captures);
518+
ret.first = analysis_results::mult_saturate_limit(ret.first, A);
519+
ret.second = analysis_results::mult_saturate_limit(ret.second, B);
520+
}
521+
return ret;
522+
}
523+
524+
//note: all * ? + operations are specialized variations of repeat {A,B}
525+
//lazy_repeat
526+
template<size_t A, size_t B, typename R, typename... Content>
527+
static constexpr auto _analyze(lazy_repeat<A, B, Content...>, R captures) noexcept {
528+
return _analyze(repeat<A, B, Content...>(), captures);
529+
}
530+
531+
//possessive_repeat
532+
template<size_t A, size_t B, typename R, typename... Content>
533+
static constexpr auto _analyze(possessive_repeat<A, B, Content...>, R captures) noexcept {
534+
return _analyze(repeat<A, B, Content...>(), captures);
535+
}
536+
537+
//star
538+
template<typename R, typename... Content>
539+
static constexpr auto _analyze(star<Content...>, R captures) noexcept {
540+
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
541+
}
542+
543+
//lazy_star
544+
template<typename R, typename... Content>
545+
static constexpr auto _analyze(lazy_star<Content...>, R captures) noexcept {
546+
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
547+
}
548+
549+
//possessive_star
550+
template<typename R, typename... Content>
551+
static constexpr auto _analyze(possessive_star<Content...>, R captures) noexcept {
552+
return _analyze(repeat<0ULL, ~(0ULL), Content...>(), captures);
553+
}
554+
555+
//plus
556+
template<typename R, typename... Content>
557+
static constexpr auto _analyze(plus<Content...>, R captures) noexcept {
558+
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
559+
}
560+
561+
//lazy_plus
562+
template<typename R, typename... Content>
563+
static constexpr auto _analyze(lazy_star<Content...>, R captures) noexcept {
564+
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
565+
}
566+
567+
//possessive_plus
568+
template<typename R, typename... Content>
569+
static constexpr auto _analyze(possessive_star<Content...>, R captures) noexcept {
570+
return _analyze(repeat<1ULL, ~(0ULL), Content...>(), captures);
571+
}
572+
573+
//optional
574+
template<typename R, typename... Content>
575+
static constexpr auto _analyze(optional<Content...>, R captures) noexcept {
576+
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
577+
}
578+
579+
//lazy_optional
580+
template<typename R, typename... Content>
581+
static constexpr auto _analyze(lazy_optional<Content...>, R captures) noexcept {
582+
return _analyze(repeat<0ULL, 1ULL, Content...>(), captures);
583+
}
584+
585+
//back_reference
586+
template<size_t Id, typename R>
587+
static constexpr auto _analyze(back_reference<Id>, R captures) noexcept {
588+
const auto ref = captures.template get<Id>();
589+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
590+
if constexpr (size(ref.get_expression())) {
591+
ret = trampoline_analysis(ref.get_expression(), captures);
592+
}
593+
return ret;
594+
}
595+
596+
//back_reference_with_name
597+
template<typename Name, typename R>
598+
static constexpr auto _analyze(back_reference_with_name<Name>, R captures) noexcept {
599+
const auto ref = captures.template get<Name>();
600+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
601+
if constexpr (size(ref.get_expression())) {
602+
ret = trampoline_analysis(ref.get_expression(), captures);
603+
}
604+
return ret;
605+
}
606+
607+
//select, this is specialized, we need to take the minimum of all minimums and maximum of all maximums
608+
template<typename R, typename... Content>
609+
static constexpr auto _analyze(select<Content...>, R captures) noexcept {
610+
analysis_results ret = trampoline_select_analysis(ctll::list<Content...>(), captures);
611+
return ret;
612+
}
613+
614+
//character, any character contributes exactly one to both counts
615+
template<auto C, typename R>
616+
static constexpr auto _analyze(character<C>, R captures) noexcept {
617+
analysis_results ret{ std::make_pair(1ULL, 1ULL) };
618+
return ret;
619+
}
620+
621+
//strings, any string contributes the # of characters it contains (if we have an empty string that'll be 0)
622+
template<auto... Str, typename R>
623+
static constexpr auto _analyze(string<Str...>, R captures) noexcept {
624+
analysis_results ret{ std::make_pair(sizeof...(Str), sizeof...(Str)) };
625+
return ret;
626+
}
627+
628+
//we'll process anything that has contents as a regex
629+
//ctll::list
630+
template<typename R, typename... Content>
631+
static constexpr auto _analyze(ctll::list<Content...>,R captures) noexcept {
632+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
633+
return ret;
634+
}
635+
636+
//sequence
637+
template<typename R, typename... Content>
638+
static constexpr auto _analyze(sequence<Content...>, R captures) noexcept {
639+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
640+
return ret;
641+
}
642+
643+
//capture
644+
template<size_t Id, typename R, typename... Content>
645+
static constexpr auto _analyze(capture<Id, Content...>, R captures) noexcept {
646+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
647+
return ret;
648+
}
649+
650+
//capture_with_name
651+
template<size_t Id, typename Name, typename R, typename... Content>
652+
static constexpr auto _analyze(capture_with_name<Id, Name, Content...>, R captures) noexcept {
653+
analysis_results ret = trampoline_analysis(ctll::list<Content...>(), captures);
654+
return ret;
655+
}
656+
657+
//everything else, anything we haven't matched already isn't supported and will contribute 0
658+
template<typename T, typename R>
659+
static constexpr auto _analyze(T, R captures) noexcept {
660+
analysis_results ret{ std::make_pair(0ULL, 0ULL) };
661+
return ret;
662+
}
663+
//note: ctll::list wraps patterns just like sequences, we'll treat anything that looks like a regex w/ ctll::list
664+
template <typename... Patterns, typename R>
665+
static constexpr auto trampoline_analysis(ctll::list<Patterns...>, R captures) noexcept {
666+
//fold, for every argument in a ctll::list, calculate its contribution to the limits
667+
auto r = ((_analyze(Patterns(), captures)) + ...);
668+
//note any reordering of parameters will result in the same limits
669+
return r;
670+
}
671+
672+
template <typename... Patterns, typename R>
673+
static constexpr auto trampoline_select_analysis(ctll::list<Patterns...>, R captures) noexcept {
674+
//fold, each argument in a selection of regexes we take the minimum and maximum of all values
675+
auto r = ((trampoline_analysis(Patterns(), captures)) || ...);
676+
//note again, order is unimportant
677+
return r;
678+
}
679+
680+
template <typename... Patterns>
681+
static constexpr auto pattern_analysis(ctll::list<Patterns...>) noexcept {
682+
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
683+
return trampoline_analysis(ctll::list<Patterns...>(), return_type{});
684+
}
685+
686+
template <typename Pattern = empty>
687+
static constexpr auto pattern_analysis(Pattern pattern = {}) noexcept {
688+
using return_type = decltype(regex_results(std::declval<std::basic_string_view<char>::iterator>(), find_captures(pattern)));
689+
return trampoline_analysis(ctll::list<Pattern>(), return_type{});
690+
}
691+
692+
450693
}
451694

452695
#endif

include/ctre/find_captures.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,12 @@ template <typename... Content, typename... Tail, typename Output> constexpr auto
112112

113113

114114
template <size_t Id, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture<Id,Content...>, Tail...>, ctll::list<Output...>) noexcept {
115-
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id>>());
115+
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, void, ctll::list<Content...>>>());
116116
}
117117

118118

119119
template <size_t Id, typename Name, typename... Content, typename... Tail, typename... Output> constexpr auto find_captures(ctll::list<capture_with_name<Id,Name,Content...>, Tail...>, ctll::list<Output...>) noexcept {
120-
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name>>());
120+
return find_captures(ctll::list<Content..., Tail...>(), ctll::list<Output..., captured_content<Id, Name, ctll::list<Content...>>>());
121121
}
122122

123123

include/ctre/return_type.hpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ struct not_matched_tag_t { };
1313

1414
static constexpr inline auto not_matched = not_matched_tag_t{};
1515

16-
template <size_t Id, typename Name = void> struct captured_content {
16+
template <size_t Id, typename Name = void, typename Content = void> struct captured_content {
1717
template <typename Iterator> class storage {
1818
Iterator _begin{};
1919
Iterator _end{};
2020

2121
bool _matched{false};
2222
public:
2323
using char_type = typename std::iterator_traits<Iterator>::value_type;
24-
24+
using content_type = Content;
2525
using name = Name;
2626

2727
constexpr CTRE_FORCE_INLINE storage() noexcept {}
@@ -86,6 +86,10 @@ template <size_t Id, typename Name = void> struct captured_content {
8686
constexpr CTRE_FORCE_INLINE static size_t get_id() noexcept {
8787
return Id;
8888
}
89+
90+
constexpr CTRE_FORCE_INLINE static content_type get_expression() noexcept {
91+
return {};
92+
}
8993
};
9094
};
9195

0 commit comments

Comments
 (0)