The following example should output
Trying to find '([[:digit:]]+)[^[:digit:]]+([[:digit:]]+)' in 'This 1 is nice 2 so 33 for 4254'
$& is '1 is nice 2' (bytes 5:16)
$1 is '1' (bytes 5:6)
$2 is '2' (bytes 15:16)
$& is '33 for 4254' (bytes 20:31)
$1 is '33' (bytes 20:22)
$2 is '4254' (bytes 27:31)
No more matches.
The first match, finding '1' and '2' is working ok. But after that _regexec() is crashing.
I can't find any error! :(
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <regex.h>
// The following is the size of a buffer to contain any error messages encountered when the regular expression is compiled.
#define MAX_ERROR_MSG 0x1000
// Compile the regular expression described by "regex_text" into "r".
static int compile_regex (_regex_t * r, const char * regex_text)
{
int status = _regcomp (r, regex_text, _REG_EXTENDED|_REG_NEWLINE);
if (status != 0) {
char error_message[MAX_ERROR_MSG];
_regerror (status, r, error_message, MAX_ERROR_MSG);
printf ("Regex error compiling '%s': %s\n",
regex_text, error_message);
return 1;
}
return 0;
}
// Match the string in "to_match" against the compiled regular expression in "r".
static int match_regex (_regex_t * r, const char * to_match)
{
// "P" is a pointer into the string which points to the end of the previous match.
const char * p = to_match;
// "N_matches" is the maximum number of matches allowed.
const int n_matches = 10;
// "M" contains the matches found.
_regmatch_t m[n_matches];
while (1) {
int i = 0;
int nomatch = _regexec (r, p, n_matches, m, 0);
if (nomatch) {
printf ("No more matches.\n");
return nomatch;
}
for (i = 0; i < n_matches; i++) {
int start;
int finish;
if (m[i].rm_so == -1) {
break;
}
start = m[i].rm_so + (p - to_match);
finish = m[i].rm_eo + (p - to_match);
if (i == 0) {
printf ("$& is ");
}
else {
printf ("$%d is ", i);
}
printf ("'%.*s' (bytes %d:%d)\n", (finish - start),
to_match + start, start, finish);
}
p += m[0].rm_eo;
}
return 0;
}
int main(int argc, char ** argv)
{
_regex_t r;
const char * regex_text;
const char * find_text;
if (argc != 3) {
regex_text = "([[:digit:]]+)[^[:digit:]]+([[:digit:]]+)";
find_text = "This 1 is nice 2 so 33 for 4254";
}
else {
regex_text = argv[1];
find_text = argv[2];
}
printf ("Trying to find '%s' in '%s'\n", regex_text, find_text);
compile_regex(& r, regex_text);
match_regex(& r, find_text);
_regfree (& r);
return 0;
}
//p += m[0].rm_eo;
p += m[i].rm_eo;
This is wrong! 'p' should point to the rest string (" 2 so 33 for 4254") to find '33' and '4254'.
Quote from: frankie on January 20, 2015, 01:37:05 PM
//p += m[0].rm_eo;
p += m[i].rm_eo;
So we have no crash, but no second match too! :(
static int match_regex (_regex_t * r, const char * to_match)
{
// "P" is a pointer into the string which points to the end of the previous match.
const char * p = to_match;
// "N_matches" is the maximum number of matches allowed.
const int n_matches = 10;
// "M" contains the matches found.
_regmatch_t m[n_matches];
while (1) {
int i = 0;
int nomatch = _regexec (r, p, n_matches, m, 0);
if (nomatch) {
printf ("No more matches.\n");
return nomatch;
}
for (i = 0; i < n_matches; i++) {
int start;
int finish;
if (m[i].rm_so == -1) {
break;
}
start = m[i].rm_so + (p - to_match);
finish = m[i].rm_eo + (p - to_match);
if (i == 0) {
printf ("$& is ");
}
else {
printf ("$%d is ", i);
}
printf ("'%.*s' (bytes %d:%d)\n", (finish - start),
to_match + start, start, finish);
}
p += m[0].rm_eo;
}
return 0;
}
Look your code where is the _regexec inside the for loop?
Or you intended it inside the while loop?
The code is not so much readable...
Quote from: frankie on January 20, 2015, 02:45:41 PM
Look your code where is the _regexec inside the for loop?
Or you intended it inside the while loop?
It is not (and should not be) inside the for loop.
It is in the while loop and that's ok!
Compliments! You won! This is another bug. ;D
If you change to:
// Match the string in "to_match" against the compiled regular expression in "r".
#define NMATCHES 10
static int match_regex (_regex_t * r, const char * to_match)
{
// "P" is a pointer into the string which points to the end of the previous match.
const char * p = to_match;
// "N_matches" is the maximum number of matches allowed.
const int n_matches = NMATCHES;
// "M" contains the matches found.
_regmatch_t m[NMATCHES];
while (1) {
int i = 0;
int nomatch = _regexec (r, p, n_matches, m, 0);
if (nomatch) {
printf ("No more matches.\n");
return nomatch;
}
for (i = 0; i < n_matches; i++) {
int start;
int finish;
if (m[i].rm_so == -1) {
break;
}
start = m[i].rm_so + (p - to_match);
finish = m[i].rm_eo + (p - to_match);
if (i == 0) {
printf ("$& is ");
}
else {
printf ("$%d is ", i);
}
printf ("'%.*s' (bytes %d:%d)\n", (finish - start),
to_match + start, start, finish);
}
p += m[0].rm_eo;
}
return 0;
}
It works.
For some Strange reason the compiler restores the stack pointer in the while loop (mov esp,ebx), before thescope of the variable is terminated! >:(
I'm moving this topic to bug reports.
Btw. this seems to be a new bug. With 7RC4 all is ok.