Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

regexp.cpp

Go to the documentation of this file.
00001 /*
00002 **  ClanLib SDK
00003 **  Copyright (c) 1997-2005 The ClanLib Team
00004 **
00005 **  This software is provided 'as-is', without any express or implied
00006 **  warranty.  In no event will the authors be held liable for any damages
00007 **  arising from the use of this software.
00008 **
00009 **  Permission is granted to anyone to use this software for any purpose,
00010 **  including commercial applications, and to alter it and redistribute it
00011 **  freely, subject to the following restrictions:
00012 **
00013 **  1. The origin of this software must not be misrepresented; you must not
00014 **     claim that you wrote the original software. If you use this software
00015 **     in a product, an acknowledgment in the product documentation would be
00016 **     appreciated but is not required.
00017 **  2. Altered source versions must be plainly marked as such, and must not be
00018 **     misrepresented as being the original software.
00019 **  3. This notice may not be removed or altered from any source distribution.
00020 **
00021 **  Note: Some of the libraries ClanLib link to may have additional
00022 **  requirements or restrictions.
00023 **
00024 **  File Author(s):
00025 **
00026 **    Magnus Norddahl
00027 */
00028 
00029 #include "precomp.h"
00030 #include "regexp.h"
00031 #include "string_format.h"
00032 #include "exception.h"
00033 
00035 // CL_RegExp Construction:
00036 
00037 CL_RegExp::CL_RegExp(const char *expression, int compile_flags, bool study)
00038 : code(0), extra(0)
00039 {
00040         compile(expression, compile_flags, study);
00041 }
00042 
00043 CL_RegExp::CL_RegExp(const CL_StringA &expression, int compile_flags, bool study)
00044 : code(0), extra(0)
00045 {
00046         compile(expression.c_str(), compile_flags, study);
00047 }
00048 
00049 CL_RegExp::~CL_RegExp()
00050 {
00051         if (extra)
00052                 pcre_free(extra);
00053         if (code)
00054                 pcre_free(code);
00055 }
00056 
00058 // CL_RegExp Attributes:
00059 
00060 int CL_RegExp::get_string_number(const char *name) const
00061 {
00062         return pcre_get_stringnumber(code, name);
00063 }
00064         
00065 int CL_RegExp::get_string_number(const CL_StringA &name) const
00066 {
00067         return get_string_number(name.c_str());
00068 }
00069 
00071 // CL_RegExp Operations:
00072 
00073 CL_RegExpMatch CL_RegExp::search(
00074         const char *subject,
00075         int length,
00076         int start_offset,
00077         int search_flags) const
00078 {
00079         CL_RegExpMatch match;
00080         search(subject, length, start_offset, search_flags, match);
00081         return match;
00082 }
00083 
00084 CL_RegExpMatch CL_RegExp::search(
00085         const CL_StringA &subject,
00086         int start_offset,
00087         int search_flags) const
00088 {
00089         CL_RegExpMatch match;
00090         search(subject.c_str(), subject.length(), start_offset, search_flags, match);
00091         return match;
00092 }
00093 
00094 void CL_RegExp::search(
00095         const char *subject,
00096         int length,
00097         int start_offset,
00098         int search_flags,
00099         CL_RegExpMatch &result) const
00100 {
00101         int flags = 0;
00102         if (search_flags & search_anchored)
00103                 flags |= PCRE_ANCHORED;
00104         if (search_flags & search_not_bol)
00105                 flags |= PCRE_NOTBOL;
00106         if (search_flags & search_not_eol)
00107                 flags |= PCRE_NOTEOL;
00108         if (search_flags & search_not_empty)
00109                 flags |= PCRE_NOTEMPTY;
00110         if (search_flags & search_no_utf8_check)
00111                 flags |= PCRE_NO_UTF8_CHECK;
00112         if (search_flags & search_partial)
00113                 flags |= PCRE_PARTIAL;
00114 
00115         int captures_count = 0;
00116         int r = pcre_fullinfo(code, extra, PCRE_INFO_CAPTURECOUNT, &captures_count);
00117         if (r != 0)
00118                 throw CL_Exception(TEXT("Regular expression error: Unable to get captures count"));
00119         result.set_vector_size((captures_count+1)*3);
00120                 
00121         r = pcre_exec(
00122                 code,
00123                 extra,
00124                 subject,
00125                 length,
00126                 start_offset,
00127                 flags,
00128                 result.get_vector(),
00129                 result.get_vector_size());
00130         if (r == PCRE_ERROR_NOMATCH)
00131         {
00132                 result.set_captures_count(0);
00133         }
00134         else if (r == PCRE_ERROR_PARTIAL)
00135         {
00136                 result.set_partial_match(true);
00137         }
00138         else if (r > 0)
00139         {
00140                 result.set_captures_count(r);
00141         }
00142         else
00143         {
00144                 switch (r)
00145                 {
00146                 case PCRE_ERROR_NULL:
00147                         throw CL_Exception("Regular expression error: NULL pointer passed");
00148                 case PCRE_ERROR_BADOPTION:
00149                         throw CL_Exception("Regular expression error: Unrecognized bit was set in the options argument");
00150                 case PCRE_ERROR_BADMAGIC:
00151                         throw CL_Exception("Regular expression error: Bad Magic! Accessing deleted CL_RegExp object?");
00152                 case PCRE_ERROR_UNKNOWN_NODE:
00153                         throw CL_Exception("Regular expression error: While running the pattern match, an unknown item was encountered in the compiled pattern");
00154                 case PCRE_ERROR_NOMEMORY:
00155                         throw CL_Exception("Regular expression error: Out of memory");
00156                 case PCRE_ERROR_NOSUBSTRING:
00157                         throw CL_Exception("Regular expression error: No sub string found");
00158                 case PCRE_ERROR_MATCHLIMIT:
00159                         throw CL_Exception("Regular expression error: Match recursion limit reached");
00160                 case PCRE_ERROR_CALLOUT:
00161                         throw CL_Exception("Regular expression error: Error signalled from callout handler");
00162                 case PCRE_ERROR_BADUTF8:
00163                         throw CL_Exception("Regular expression error: A string that contains an invalid UTF-8 byte sequence was passed as a subject");
00164                 case PCRE_ERROR_BADUTF8_OFFSET:
00165                         throw CL_Exception("Regular expression error: Start offset did not point to the beginning of a UTF-8 character");
00166                 case PCRE_ERROR_BADPARTIAL:
00167                         throw CL_Exception("Regular expression error: Expression includes items not supported by partial matching");
00168                 case PCRE_ERROR_INTERNAL:
00169                         throw CL_Exception("Regular expression error: An unexpected internal error has occurred.");
00170                 case PCRE_ERROR_BADCOUNT:
00171                         throw CL_Exception("Regular expression error: The value of the ovecsize argument is negative");
00172                 default:
00173                         throw CL_Exception("Regular expression error: Unknown error");
00174                 }
00175         }
00176 }
00177 
00178 void CL_RegExp::search(
00179         const CL_StringA &subject,
00180         int length,
00181         int start_offset,
00182         int search_flags,
00183         CL_RegExpMatch &result) const
00184 {
00185         search(subject.c_str(), subject.length(), start_offset, search_flags, result);
00186 }
00187 
00189 // CL_RegExp Implementation:
00190 
00191 void CL_RegExp::compile(const char *expression, int compile_flags, bool study)
00192 {
00193         const char *error = 0;
00194         int error_offset = 0;
00195         const unsigned char *tableptr = 0;
00196         
00197         int flags = 0;
00198         if (compile_flags & compile_anchored)
00199                 flags |= PCRE_ANCHORED;
00200         if (compile_flags & compile_auto_callout)
00201                 flags |= PCRE_AUTO_CALLOUT;
00202         if (compile_flags & compile_caseless)
00203                 flags |= PCRE_CASELESS;
00204         if (compile_flags & compile_dollar_endonly)
00205                 flags |= PCRE_DOLLAR_ENDONLY;
00206         if (compile_flags & compile_dot_all)
00207                 flags |= PCRE_DOTALL;
00208         if (compile_flags & compile_extended)
00209                 flags |= PCRE_EXTENDED;
00210         if (compile_flags & compile_extra)
00211                 flags |= PCRE_EXTRA;
00212         if (compile_flags & compile_multi_line)
00213                 flags |= PCRE_MULTILINE;
00214         if (compile_flags & compile_no_auto_capture)
00215                 flags |= PCRE_NO_AUTO_CAPTURE;
00216         if (compile_flags & compile_ungreedy)
00217                 flags |= PCRE_UNGREEDY;
00218         if (compile_flags & compile_utf8)
00219                 flags |= PCRE_UTF8;
00220         if (compile_flags & compile_no_utf8_check)
00221                 flags |= PCRE_NO_UTF8_CHECK;
00222 
00223         code = pcre_compile(
00224                 expression,
00225                 flags,
00226                 &error,
00227                 &error_offset,
00228                 tableptr);
00229         if (code == 0)
00230         {
00231                 CL_StringFormat s("Error in regular expression: %1 (at position %2)");
00232                 s.set_arg(1, error);
00233                 s.set_arg(2, error_offset);
00234                 throw CL_Exception(s.get_result());
00235         }
00236         
00237         if (study)
00238         {
00239                 extra = pcre_study(
00240                         code,
00241                         0,
00242                         &error);
00243                 if (extra == 0)
00244                 {
00245                         CL_StringFormat s("Error studying regular expression: %1");
00246                         s.set_arg(1, error);
00247                         pcre_free(code);
00248                         throw CL_Exception(s.get_result());
00249                 }
00250         }
00251 }

Generated on Sat Feb 19 22:51:16 2005 for npcore by  doxygen 1.4.1