Skip to content

fix: add PCRE2 capability to standalone module #3377

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 10, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
399 changes: 237 additions & 162 deletions standalone/regex.c
Original file line number Diff line number Diff line change
@@ -1,162 +1,237 @@
/*
* ModSecurity for Apache 2.x, http://www.modsecurity.org/
* Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address [email protected].
*/

#include <limits.h>

#include "http_core.h"
#include "http_request.h"

#include "modsecurity.h"
#include "apache2.h"
#include "http_main.h"
#include "http_connection.h"

#include "apr_optional.h"
#include "mod_log_config.h"

#include "msc_logging.h"
#include "msc_util.h"

#include "ap_mpm.h"
#include "scoreboard.h"

#include "apr_version.h"

#include "apr_lib.h"
#include "ap_config.h"
#include "http_config.h"


static apr_status_t regex_cleanup(void *preg)
{
ap_regfree((ap_regex_t *) preg);
return APR_SUCCESS;
}

AP_DECLARE(ap_regex_t *) ap_pregcomp(apr_pool_t *p, const char *pattern,
int cflags)
{
ap_regex_t *preg = apr_palloc(p, sizeof *preg);

if (ap_regcomp(preg, pattern, cflags)) {
return NULL;
}

apr_pool_cleanup_register(p, (void *) preg, regex_cleanup,
apr_pool_cleanup_null);

return preg;
}

AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
{
(pcre_free)(preg->re_pcre);
}

AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int options = 0;
int nsub = 0;

if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS;
if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE;

preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
preg->re_erroffset = erroffset;

if (preg->re_pcre == NULL) return AP_REG_INVARG;

pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT, &nsub);
preg->re_nsub = nsub;
return 0;
}

#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD (10)
#endif

AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
apr_size_t nmatch, ap_regmatch_t pmatch[],
int eflags)
{
int rc;
int options = 0;
int *ovector = NULL;
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
int allocated_ovector = 0;

if ((eflags & AP_REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
if ((eflags & AP_REG_NOTEOL) != 0) options |= PCRE_NOTEOL;

((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */

if (nmatch > 0)
{
if (nmatch <= POSIX_MALLOC_THRESHOLD)
{
ovector = &(small_ovector[0]);
}
else
{
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
if (ovector == NULL) return AP_REG_ESPACE;
allocated_ovector = 1;
}
}

rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
0, options, ovector, nmatch * 3);

if (rc == 0) rc = nmatch; /* All captured slots were filled in */

if (rc >= 0)
{
apr_size_t i;
for (i = 0; i < (apr_size_t)rc; i++)
{
pmatch[i].rm_so = ovector[i*2];
pmatch[i].rm_eo = ovector[i*2+1];
}
if (allocated_ovector) free(ovector);
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
}

else
{
if (allocated_ovector) free(ovector);
switch(rc)
{
case PCRE_ERROR_NOMATCH: return AP_REG_NOMATCH;
case PCRE_ERROR_NULL: return AP_REG_INVARG;
case PCRE_ERROR_BADOPTION: return AP_REG_INVARG;
case PCRE_ERROR_BADMAGIC: return AP_REG_INVARG;
case PCRE_ERROR_UNKNOWN_NODE: return AP_REG_ASSERT;
case PCRE_ERROR_NOMEMORY: return AP_REG_ESPACE;
#ifdef PCRE_ERROR_MATCHLIMIT
case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE;
#endif
#ifdef PCRE_ERROR_BADUTF8
case PCRE_ERROR_BADUTF8: return AP_REG_INVARG;
#endif
#ifdef PCRE_ERROR_BADUTF8_OFFSET
case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG;
#endif
default: return AP_REG_ASSERT;
}
}
}

/*
* ModSecurity for Apache 2.x, http://www.modsecurity.org/
* Copyright (c) 2004-2013 Trustwave Holdings, Inc. (http://www.trustwave.com/)
*
* You may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* If any of the files related to licensing are missing or if you have any
* other questions related to licensing please contact Trustwave Holdings, Inc.
* directly using the email address [email protected].
*/

#include <limits.h>

#include "http_core.h"
#include "http_request.h"

#include "modsecurity.h"
#include "apache2.h"
#include "http_main.h"
#include "http_connection.h"

#include "apr_optional.h"
#include "mod_log_config.h"

#include "msc_logging.h"
#include "msc_util.h"

#include "ap_mpm.h"
#include "scoreboard.h"

#include "apr_version.h"

#include "apr_lib.h"
#include "ap_config.h"
#include "http_config.h"


static apr_status_t regex_cleanup(void *preg)
{
ap_regfree((ap_regex_t *) preg);
return APR_SUCCESS;
}

AP_DECLARE(ap_regex_t *) ap_pregcomp(apr_pool_t *p, const char *pattern,
int cflags)
{
ap_regex_t *preg = apr_palloc(p, sizeof *preg);

if (ap_regcomp(preg, pattern, cflags)) {
return NULL;
}

apr_pool_cleanup_register(p, (void *) preg, regex_cleanup,
apr_pool_cleanup_null);

return preg;
}

AP_DECLARE(void) ap_regfree(ap_regex_t *preg)
{
#ifdef WITH_PCRE2
(pcre2_code_free)(preg->re_pcre);
#else
(pcre_free)(preg->re_pcre);
#endif
}

AP_DECLARE(int) ap_regcomp(ap_regex_t *preg, const char *pattern, int cflags)
{
const char *errorptr;
int erroffset;
int options = 0;
int nsub = 0;

#ifdef WITH_PCRE2
if ((cflags & AP_REG_ICASE) != 0) options |= PCRE2_CASELESS;
if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE2_MULTILINE;
int error_number = 0;
PCRE2_SIZE error_offset = 0;
PCRE2_SPTR pcre2_pattern = (PCRE2_SPTR)pattern;

preg->re_pcre = pcre2_compile(pcre2_pattern, PCRE2_ZERO_TERMINATED,
options, &error_number, &error_offset, NULL);
preg->re_erroffset = error_offset;

if (preg->re_pcre == NULL) return AP_REG_INVARG;

pcre2_pattern_info((const pcre2_code *)preg->re_pcre, PCRE2_INFO_CAPTURECOUNT, &nsub);
preg->re_nsub = nsub;

#else
if ((cflags & AP_REG_ICASE) != 0) options |= PCRE_CASELESS;
if ((cflags & AP_REG_NEWLINE) != 0) options |= PCRE_MULTILINE;

preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
preg->re_erroffset = erroffset;

if (preg->re_pcre == NULL) return AP_REG_INVARG;

pcre_fullinfo((const pcre *)preg->re_pcre, NULL, PCRE_INFO_CAPTURECOUNT, &nsub);
preg->re_nsub = nsub;
#endif // end of WITH_PCRE
return 0;
}

#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD (10)
#endif

AP_DECLARE(int) ap_regexec(const ap_regex_t *preg, const char *string,
apr_size_t nmatch, ap_regmatch_t pmatch[],
int eflags)
{
int rc;
int options = 0;
int *ovector = NULL;
int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
int allocated_ovector = 0;

#ifdef WITH_PCRE2
if ((eflags & AP_REG_NOTBOL) != 0) options |= PCRE2_NOTBOL;
if ((eflags & AP_REG_NOTEOL) != 0) options |= PCRE2_NOTEOL;
#else
if ((eflags & AP_REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
if ((eflags & AP_REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
#endif

((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */

if (nmatch > 0)
{
if (nmatch <= POSIX_MALLOC_THRESHOLD)
{
ovector = &(small_ovector[0]);
}
else
{
ovector = (int *)malloc(sizeof(int) * nmatch * 3);
if (ovector == NULL) return AP_REG_ESPACE;
allocated_ovector = 1;
}
}

#ifdef WITH_PCRE2
{
PCRE2_SPTR pcre2_s;
int pcre2_ret;
pcre2_match_data *match_data;
PCRE2_SIZE *pcre2_ovector = NULL;

pcre2_s = (PCRE2_SPTR)string;
match_data = pcre2_match_data_create_from_pattern(preg->re_pcre, NULL);
pcre2_match_context *match_context = pcre2_match_context_create(NULL);

pcre2_ret = pcre2_match((const pcre2_code *)preg->re_pcre, pcre2_s, (int)strlen(string),
0, (uint32_t)options, match_data, match_context);

if (match_data != NULL) {
pcre2_ovector = pcre2_get_ovector_pointer(match_data);
if (pcre2_ovector != NULL) {
for (int i = 0; ((i < pcre2_ret) && ((i*2) <= nmatch * 3)); i++) {
if ((i*2) < nmatch * 3) {
ovector[2*i] = pcre2_ovector[2*i];
ovector[2*i+1] = pcre2_ovector[2*i+1];
}
}
}
pcre2_match_data_free(match_data);
pcre2_match_context_free(match_context);
}
}
#else
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
0, options, ovector, nmatch * 3);
#endif

if (rc == 0) rc = nmatch; /* All captured slots were filled in */

if (rc >= 0)
{
apr_size_t i;
for (i = 0; i < (apr_size_t)rc; i++)
{
pmatch[i].rm_so = ovector[i*2];
pmatch[i].rm_eo = ovector[i*2+1];
}
if (allocated_ovector) free(ovector);
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
return 0;
}

else
{
if (allocated_ovector) free(ovector);
switch(rc)
{
#ifdef WITH_PCRE2
case PCRE2_ERROR_NOMATCH: return AP_REG_NOMATCH;
case PCRE2_ERROR_NULL: return AP_REG_INVARG;
case PCRE2_ERROR_BADOPTION: return AP_REG_INVARG;
case PCRE2_ERROR_BADMAGIC: return AP_REG_INVARG;
// case PCRE2_ERROR_UNKNOWN_NODE: return AP_REG_ASSERT; not defined in PCRE2
case PCRE2_ERROR_NOMEMORY: return AP_REG_ESPACE;
#ifdef PCRE2_ERROR_MATCHLIMIT
case PCRE2_ERROR_MATCHLIMIT: return AP_REG_ESPACE;
#endif
#ifdef PCRE2_ERROR_BADUTF8
case PCRE2_ERROR_BADUTF8: return AP_REG_INVARG;
#endif
#ifdef PCRE2_ERROR_BADUTF8_OFFSET
case PCRE2_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG;
#endif
#else // with old PCRE
case PCRE_ERROR_NOMATCH: return AP_REG_NOMATCH;
case PCRE_ERROR_NULL: return AP_REG_INVARG;
case PCRE_ERROR_BADOPTION: return AP_REG_INVARG;
case PCRE_ERROR_BADMAGIC: return AP_REG_INVARG;
case PCRE_ERROR_UNKNOWN_NODE: return AP_REG_ASSERT;
case PCRE_ERROR_NOMEMORY: return AP_REG_ESPACE;
#ifdef PCRE_ERROR_MATCHLIMIT
case PCRE_ERROR_MATCHLIMIT: return AP_REG_ESPACE;
#endif
#ifdef PCRE_ERROR_BADUTF8
case PCRE_ERROR_BADUTF8: return AP_REG_INVARG;
#endif
#ifdef PCRE_ERROR_BADUTF8_OFFSET
case PCRE_ERROR_BADUTF8_OFFSET: return AP_REG_INVARG;
#endif
#endif // end of WITH_PCRE
default: return AP_REG_ASSERT;
}
}
}