preg_match のキャプチャ #2
hnwさんが「おそらくバグ」といっているのだけど、どうなんだろ、これは。
とりあえず PCRE を直に叩いてみた。
#include <iostream> #include <iterator> #include <algorithm> #include <pcre.h> using namespace std; static const int OVECTORSIZE = 30; int main() { const char *error; int erroroffset; int rc; pcre* re; int ovector[OVECTORSIZE]; const char *words[] = {"abc", "ab", "ac"}; re = pcre_compile("\\A (a) (b)? (c)? \\z", PCRE_EXTENDED, &error, &erroroffset, NULL); if (re == NULL) { fprintf(stderr, "pcre pattern compile failed\n"); exit(1); } for (size_t i = 0; i < sizeof(words) / sizeof(words[0]); i++) { const char* word = words[i]; cout << "word: " << word << endl; rc = pcre_exec(re, NULL, word, strlen(word), 0, 0, ovector, OVECTORSIZE); if (re < 0) { fputs("match failed\n", stderr); } else { for (int j = 0; j < rc; j++) { int start = ovector[j * 2]; int end = ovector[j * 2 + 1]; cout << j << ": " << start << ", " << end << '\t'; copy(word + start, word + end, ostream_iterator<char>(cout)); cout << endl; } } cout << endl; } pcre_free(re); return 0; }
% g++ -O2 -Wall -W -o pcre_test pcre_test.cpp -lpcre % ./pcre_test word: abc 0: 0, 3 abc 1: 0, 1 a 2: 1, 2 b 3: 2, 3 c word: ab 0: 0, 2 ab 1: 0, 1 a 2: 1, 2 b word: ac 0: 0, 2 ac 1: 0, 1 a 2: -1, -1 3: 1, 2 c
うーむ。とりあえず PHP のせいではなさげ。PCRE の仕様かなぁ。pcre_exec の man を見てみる。
It returns offsets to captured substrings.
ぬぅ。