preg_match のキャプチャ #2

hnwさんが「おそらくバグ」といっているのだけど、どうなんだろ、これは。
とりあえず PCRE を直に叩いてみた。

#include <iostream>
#include <iterator>
#include <algorithm>
#include <pcre.h>

using namespace std;

static const int OVECTORSIZE = 30;

int main()
{
  const char *error;
  int erroroffset;
  int rc;
  pcre* re;
  int ovector[OVECTORSIZE];
  const char *words[] = {"abc", "ab", "ac"};

  re = pcre_compile("\\A (a) (b)? (c)? \\z", PCRE_EXTENDED,
                    &error, &erroroffset, NULL);
  if (re == NULL)
    {
      fprintf(stderr, "pcre pattern compile failed\n");
      exit(1);
    }

  for (size_t i = 0; i < sizeof(words) / sizeof(words[0]); i++)
    {
      const char* word = words[i];
      cout << "word: " << word << endl;
      rc = pcre_exec(re, NULL, word, strlen(word), 0, 0,
                     ovector, OVECTORSIZE);
      if (re < 0)
        {
          fputs("match failed\n", stderr);
        }
      else
        {
          for (int j = 0; j < rc; j++)
            {
              int start = ovector[j * 2];
              int end = ovector[j * 2 + 1];
              cout << j << ": " << start << ", " << end << '\t';
              copy(word + start, word + end,
                   ostream_iterator<char>(cout));
              cout << endl;
            }
        }
      cout << endl;
    }

  pcre_free(re);

  return 0;
}
% g++ -O2 -Wall -W -o pcre_test pcre_test.cpp -lpcre
% ./pcre_test
word: abc
0: 0, 3 abc
1: 0, 1 a
2: 1, 2 b
3: 2, 3 c

word: ab
0: 0, 2 ab
1: 0, 1 a
2: 1, 2 b

word: ac
0: 0, 2 ac
1: 0, 1 a
2: -1, -1
3: 1, 2 c

うーむ。とりあえず PHP のせいではなさげ。PCRE の仕様かなぁ。pcre_exec の man を見てみる。

It returns offsets to captured substrings.

ぬぅ。