Hi Miry, After your patch is applied, I just set matchArray to pcre2_get_ovector_pointer(patternMatchData) each time when pcre2_match is called, then regex searching/replacing works.
The source code has been updated: https://sourceforge.net/p/xml-copy-editor/code/ci/3d17bca4196670183ad45c0af369acf4acdc7d7e/ Regards, Zane
diff --git a/configure.ac b/configure.ac index d0ab3af..1c1f0dd 100755 --- a/configure.ac +++ b/configure.ac @@ -72,8 +72,7 @@ AC_ARG_ENABLE(debug, ]) # Check pcre is available -AC_CHECK_HEADER(pcre.h, , - AC_MSG_ERROR([PCRE headers not found])) +PKG_CHECK_MODULES([PCRE2], [libpcre2-8]) # Check boost::shared_ptr is available AC_LANG(C++) diff --git a/src/Makefile.am b/src/Makefile.am index 7b0c81c..15bf572 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -83,7 +83,8 @@ xmlcopyeditor_LDADD = $(WX_LIBS) \ $(ENCHANT_LIBS) \ $(GTK_LIBS) \ $(XSLT_LIBS) \ - -lexpat -lpcre -lxerces-c + $(PCRE2_LIBS) \ + -lexpat -lxerces-c nobase_dist_xmlcopyeditor_DATA = $(srcdir)/catalog/catalog \ $(srcdir)/dtd/*.* \ @@ -133,5 +134,5 @@ EXTRA_DIST = \ $(srcdir)/xmlcopyeditor.rc \ $(srcdir)/xmlschemaparser.cpp -AM_CPPFLAGS = $(XML2_CFLAGS) $(ENCHANT_CFLAGS) $(GTK_CFLAGS) +AM_CPPFLAGS = $(XML2_CFLAGS) $(ENCHANT_CFLAGS) $(GTK_CFLAGS) $(PCRE2_CFLAGS) diff --git a/src/rule.cpp b/src/rule.cpp index 487a364..37bfe3c 100644 --- a/src/rule.cpp +++ b/src/rule.cpp @@ -26,12 +26,11 @@ using namespace std; Rule::Rule ( const string& pattern, bool matchCase, - const string& replace, - const int arrayLength ) : WrapRegex ( + const string& replace + ) : WrapRegex ( pattern, matchCase, - replace, - arrayLength ) + replace ) { adjustCaseAttribute = tentativeAttribute = false; } diff --git a/src/rule.h b/src/rule.h index a89289e..1abfbab 100644 --- a/src/rule.h +++ b/src/rule.h @@ -32,8 +32,7 @@ class Rule : public WrapRegex Rule ( const string& pattern, bool matchCase, - const string& replace = "", - const int arrayLength = 60 ); + const string& replace = ""); bool getAdjustCaseAttribute(); bool getTentativeAttribute(); string getReport(); diff --git a/src/wrapregex.cpp b/src/wrapregex.cpp index ff8d622..99d2a7c 100644 --- a/src/wrapregex.cpp +++ b/src/wrapregex.cpp @@ -31,40 +31,39 @@ using namespace std; WrapRegex::WrapRegex ( const string& pattern, bool matchCase, - const string& replaceParameter, - const int arrayLengthParameter ) : + const string& replaceParameter ) : replace ( replaceParameter ), - arrayLength ( arrayLengthParameter ), returnValue ( 0 ) { if ( pattern.empty() || pattern == ".*" ) { disabled = true; - matchArray = NULL; - patternStructure = NULL; - patternExtraStructure = NULL; + patternCode = NULL; + patternMatchData = NULL; + patternMatchContext = NULL; return; } disabled = false; - matchArray = new int[arrayLength]; - // compile - int optionsFlag = ( matchCase ) ? PCRE_UTF8 : PCRE_CASELESS | PCRE_UTF8; - const char *errorPointer; - int errorOffset; - - if ( ( patternStructure = pcre_compile ( - pattern.c_str(), - optionsFlag, - &errorPointer, - &errorOffset, - NULL ) ) == NULL ) + uint32_t optionsFlag = ( matchCase ? 0 : PCRE2_CASELESS ) | PCRE2_UTF | PCRE2_NO_UTF_CHECK; + int errorCode; + PCRE2_SIZE errorOffset; + + if ( ( patternCode = pcre2_compile ( + (PCRE2_SPTR)pattern.c_str(), // pattern + PCRE2_ZERO_TERMINATED, // pattern is zero-terminated + optionsFlag, // options + &errorCode, // error number + &errorOffset, // error offset + NULL ) ) == NULL ) // default compile context { - throw runtime_error ( errorPointer ); + char buf[256]; + pcre2_get_error_message ( errorCode, (PCRE2_UCHAR *)buf, sizeof(buf) ); + throw runtime_error ( string(buf) ); } - - patternExtraStructure = pcre_study ( patternStructure, 0, &errorPointer ); + patternMatchData = pcre2_match_data_create_from_pattern ( patternCode, NULL ); + patternMatchContext = pcre2_match_context_create ( NULL ); } WrapRegex::~WrapRegex() @@ -72,9 +71,9 @@ WrapRegex::~WrapRegex() if ( disabled ) return; - pcre_free ( patternStructure ); - pcre_free ( patternExtraStructure ); - delete[] matchArray; + pcre2_match_data_free ( patternMatchData ); + pcre2_code_free ( patternCode ); + pcre2_match_context_free ( patternMatchContext ); } int WrapRegex::matchPatternGlobal ( @@ -108,18 +107,18 @@ string WrapRegex::replaceGlobal ( string output, match; output.reserve ( buffer.size() ); - while ( ( returnValue = pcre_exec ( - patternStructure, - patternExtraStructure, - s, - strlen ( s ), - 0, - 0, - matchArray, - arrayLength ) ) >= 0 ) + while ( ( returnValue = pcre2_match ( + patternCode, // compiled pattern + (PCRE2_SPTR)s, // subject string + strlen ( s ), // length of the subject + 0, // start at offset 0 in the subject + 0, // default options + patternMatchData, // block where results will be stored + patternMatchContext ) ) >= 0 ) // match context { ++ ( *matchCount ); + PCRE2_SIZE *matchArray = pcre2_get_ovector_pointer ( patternMatchData ); output.append ( s, matchArray[0] ); match.clear(); @@ -150,18 +149,18 @@ int WrapRegex::matchPatternGlobal_ ( matchcount = 0; offset = 0; - while ( ( returnValue = pcre_exec ( - patternStructure, - patternExtraStructure, - s, - buflen, - offset, - 0, - matchArray, - arrayLength ) ) >= 0 ) + while ( ( returnValue = pcre2_match ( + patternCode, // compiled pattern + (PCRE2_SPTR)s, // subject string + buflen, // length of the subject + offset, // start at this offset in the subject + 0, // default options + patternMatchData, // block where results will be stored + patternMatchContext ) ) >= 0 ) // match context { ++matchcount; + PCRE2_SIZE *matchArray = pcre2_get_ovector_pointer ( patternMatchData ); if ( context ) { match = ContextHandler::getContext ( @@ -255,11 +254,17 @@ string WrapRegex::getSubpattern_ ( const char *s, unsigned subpattern ) if ( disabled ) return ""; - const char *sub; - int ret = pcre_get_substring ( s, matchArray, returnValue, subpattern, &sub ); - if ( ret == PCRE_ERROR_NOSUBSTRING || ret == PCRE_ERROR_NOMEMORY ) + char *sub = NULL; + size_t sublen; + int ret = pcre2_substring_get_bynumber ( + patternMatchData, + subpattern, + (PCRE2_UCHAR **)sub, + &sublen + ); + if ( ret == PCRE2_ERROR_NOMATCH || ret == PCRE2_ERROR_BADDATA ) return ""; string subString ( sub ); - pcre_free_substring ( sub ); + pcre2_substring_free ( (PCRE2_UCHAR *)sub ); return subString; } diff --git a/src/wrapregex.h b/src/wrapregex.h index 3046dcc..b59b7c6 100644 --- a/src/wrapregex.h +++ b/src/wrapregex.h @@ -21,10 +21,14 @@ #ifndef WRAPREGEX_H #define WRAPREGEX_H +#ifndef PCRE2_CODE_UNIT_WIDTH +#define PCRE2_CODE_UNIT_WIDTH 8 +#endif + #include <iostream> #include <string> #include <vector> -#include <pcre.h> +#include <pcre2.h> #include <boost/utility.hpp> #include "contexthandler.h" @@ -36,8 +40,7 @@ class WrapRegex : private boost::noncopyable WrapRegex ( const string& pattern, bool matchCase, - const string& replaceParameter = "", - const int arrayLengthParameter = 60 ); + const string& replaceParameter = "" ); virtual ~WrapRegex(); string replaceGlobal ( const string& buffer, @@ -49,13 +52,12 @@ class WrapRegex : private boost::noncopyable int context = 0 ); private: string replace; - const int arrayLength; int returnValue; bool disabled; - pcre *patternStructure; - pcre_extra *patternExtraStructure; - int *matchArray; + pcre2_code *patternCode; + pcre2_match_data *patternMatchData; + pcre2_match_context *patternMatchContext; string getInterpolatedString_ ( const char *buffer, const char *source );