00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "precomp.h"
00030 #include "regexp.h"
00031 #include "string_format.h"
00032 #include "exception.h"
00033
00035
00036
00037 CL_RegExp::CL_RegExp(const char *expression, int compile_flags, bool study)
00038 : code(0), extra(0)
00039 {
00040 compile(expression, compile_flags, study);
00041 }
00042
00043 CL_RegExp::CL_RegExp(const CL_StringA &expression, int compile_flags, bool study)
00044 : code(0), extra(0)
00045 {
00046 compile(expression.c_str(), compile_flags, study);
00047 }
00048
00049 CL_RegExp::~CL_RegExp()
00050 {
00051 if (extra)
00052 pcre_free(extra);
00053 if (code)
00054 pcre_free(code);
00055 }
00056
00058
00059
00060 int CL_RegExp::get_string_number(const char *name) const
00061 {
00062 return pcre_get_stringnumber(code, name);
00063 }
00064
00065 int CL_RegExp::get_string_number(const CL_StringA &name) const
00066 {
00067 return get_string_number(name.c_str());
00068 }
00069
00071
00072
00073 CL_RegExpMatch CL_RegExp::search(
00074 const char *subject,
00075 int length,
00076 int start_offset,
00077 int search_flags) const
00078 {
00079 CL_RegExpMatch match;
00080 search(subject, length, start_offset, search_flags, match);
00081 return match;
00082 }
00083
00084 CL_RegExpMatch CL_RegExp::search(
00085 const CL_StringA &subject,
00086 int start_offset,
00087 int search_flags) const
00088 {
00089 CL_RegExpMatch match;
00090 search(subject.c_str(), subject.length(), start_offset, search_flags, match);
00091 return match;
00092 }
00093
00094 void CL_RegExp::search(
00095 const char *subject,
00096 int length,
00097 int start_offset,
00098 int search_flags,
00099 CL_RegExpMatch &result) const
00100 {
00101 int flags = 0;
00102 if (search_flags & search_anchored)
00103 flags |= PCRE_ANCHORED;
00104 if (search_flags & search_not_bol)
00105 flags |= PCRE_NOTBOL;
00106 if (search_flags & search_not_eol)
00107 flags |= PCRE_NOTEOL;
00108 if (search_flags & search_not_empty)
00109 flags |= PCRE_NOTEMPTY;
00110 if (search_flags & search_no_utf8_check)
00111 flags |= PCRE_NO_UTF8_CHECK;
00112 if (search_flags & search_partial)
00113 flags |= PCRE_PARTIAL;
00114
00115 int captures_count = 0;
00116 int r = pcre_fullinfo(code, extra, PCRE_INFO_CAPTURECOUNT, &captures_count);
00117 if (r != 0)
00118 throw CL_Exception(TEXT("Regular expression error: Unable to get captures count"));
00119 result.set_vector_size((captures_count+1)*3);
00120
00121 r = pcre_exec(
00122 code,
00123 extra,
00124 subject,
00125 length,
00126 start_offset,
00127 flags,
00128 result.get_vector(),
00129 result.get_vector_size());
00130 if (r == PCRE_ERROR_NOMATCH)
00131 {
00132 result.set_captures_count(0);
00133 }
00134 else if (r == PCRE_ERROR_PARTIAL)
00135 {
00136 result.set_partial_match(true);
00137 }
00138 else if (r > 0)
00139 {
00140 result.set_captures_count(r);
00141 }
00142 else
00143 {
00144 switch (r)
00145 {
00146 case PCRE_ERROR_NULL:
00147 throw CL_Exception("Regular expression error: NULL pointer passed");
00148 case PCRE_ERROR_BADOPTION:
00149 throw CL_Exception("Regular expression error: Unrecognized bit was set in the options argument");
00150 case PCRE_ERROR_BADMAGIC:
00151 throw CL_Exception("Regular expression error: Bad Magic! Accessing deleted CL_RegExp object?");
00152 case PCRE_ERROR_UNKNOWN_NODE:
00153 throw CL_Exception("Regular expression error: While running the pattern match, an unknown item was encountered in the compiled pattern");
00154 case PCRE_ERROR_NOMEMORY:
00155 throw CL_Exception("Regular expression error: Out of memory");
00156 case PCRE_ERROR_NOSUBSTRING:
00157 throw CL_Exception("Regular expression error: No sub string found");
00158 case PCRE_ERROR_MATCHLIMIT:
00159 throw CL_Exception("Regular expression error: Match recursion limit reached");
00160 case PCRE_ERROR_CALLOUT:
00161 throw CL_Exception("Regular expression error: Error signalled from callout handler");
00162 case PCRE_ERROR_BADUTF8:
00163 throw CL_Exception("Regular expression error: A string that contains an invalid UTF-8 byte sequence was passed as a subject");
00164 case PCRE_ERROR_BADUTF8_OFFSET:
00165 throw CL_Exception("Regular expression error: Start offset did not point to the beginning of a UTF-8 character");
00166 case PCRE_ERROR_BADPARTIAL:
00167 throw CL_Exception("Regular expression error: Expression includes items not supported by partial matching");
00168 case PCRE_ERROR_INTERNAL:
00169 throw CL_Exception("Regular expression error: An unexpected internal error has occurred.");
00170 case PCRE_ERROR_BADCOUNT:
00171 throw CL_Exception("Regular expression error: The value of the ovecsize argument is negative");
00172 default:
00173 throw CL_Exception("Regular expression error: Unknown error");
00174 }
00175 }
00176 }
00177
00178 void CL_RegExp::search(
00179 const CL_StringA &subject,
00180 int length,
00181 int start_offset,
00182 int search_flags,
00183 CL_RegExpMatch &result) const
00184 {
00185 search(subject.c_str(), subject.length(), start_offset, search_flags, result);
00186 }
00187
00189
00190
00191 void CL_RegExp::compile(const char *expression, int compile_flags, bool study)
00192 {
00193 const char *error = 0;
00194 int error_offset = 0;
00195 const unsigned char *tableptr = 0;
00196
00197 int flags = 0;
00198 if (compile_flags & compile_anchored)
00199 flags |= PCRE_ANCHORED;
00200 if (compile_flags & compile_auto_callout)
00201 flags |= PCRE_AUTO_CALLOUT;
00202 if (compile_flags & compile_caseless)
00203 flags |= PCRE_CASELESS;
00204 if (compile_flags & compile_dollar_endonly)
00205 flags |= PCRE_DOLLAR_ENDONLY;
00206 if (compile_flags & compile_dot_all)
00207 flags |= PCRE_DOTALL;
00208 if (compile_flags & compile_extended)
00209 flags |= PCRE_EXTENDED;
00210 if (compile_flags & compile_extra)
00211 flags |= PCRE_EXTRA;
00212 if (compile_flags & compile_multi_line)
00213 flags |= PCRE_MULTILINE;
00214 if (compile_flags & compile_no_auto_capture)
00215 flags |= PCRE_NO_AUTO_CAPTURE;
00216 if (compile_flags & compile_ungreedy)
00217 flags |= PCRE_UNGREEDY;
00218 if (compile_flags & compile_utf8)
00219 flags |= PCRE_UTF8;
00220 if (compile_flags & compile_no_utf8_check)
00221 flags |= PCRE_NO_UTF8_CHECK;
00222
00223 code = pcre_compile(
00224 expression,
00225 flags,
00226 &error,
00227 &error_offset,
00228 tableptr);
00229 if (code == 0)
00230 {
00231 CL_StringFormat s("Error in regular expression: %1 (at position %2)");
00232 s.set_arg(1, error);
00233 s.set_arg(2, error_offset);
00234 throw CL_Exception(s.get_result());
00235 }
00236
00237 if (study)
00238 {
00239 extra = pcre_study(
00240 code,
00241 0,
00242 &error);
00243 if (extra == 0)
00244 {
00245 CL_StringFormat s("Error studying regular expression: %1");
00246 s.set_arg(1, error);
00247 pcre_free(code);
00248 throw CL_Exception(s.get_result());
00249 }
00250 }
00251 }