Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

xml_tokenizer.cpp

Go to the documentation of this file.
00001 /*
00002 **  ClanLib SDK
00003 **  Copyright (c) 1997-2005 The ClanLib Team
00004 **
00005 **  This software is provided 'as-is', without any express or implied
00006 **  warranty.  In no event will the authors be held liable for any damages
00007 **  arising from the use of this software.
00008 **
00009 **  Permission is granted to anyone to use this software for any purpose,
00010 **  including commercial applications, and to alter it and redistribute it
00011 **  freely, subject to the following restrictions:
00012 **
00013 **  1. The origin of this software must not be misrepresented; you must not
00014 **     claim that you wrote the original software. If you use this software
00015 **     in a product, an acknowledgment in the product documentation would be
00016 **     appreciated but is not required.
00017 **  2. Altered source versions must be plainly marked as such, and must not be
00018 **     misrepresented as being the original software.
00019 **  3. This notice may not be removed or altered from any source distribution.
00020 **
00021 **  Note: Some of the libraries ClanLib link to may have additional
00022 **  requirements or restrictions.
00023 **
00024 **  File Author(s):
00025 **
00026 **    Magnus Norddahl
00027 */
00028 
00029 #include "precomp.h"
00030 #include "xml_tokenizer.h"
00031 #include "xml_token_load.h"
00032 #include "string_help.h"
00033 #include "string_format.h"
00034 #include "exception.h"
00035 #include "xml_tokenizer_generic.h"
00036 #include "xml_token_string.h"
00037 
00038 #include <algorithm>
00039 #include <utility>
00040 
00042 // CL_XMLTokenizer construction:
00043 
00044 CL_XMLTokenizer::CL_XMLTokenizer()
00045 {
00046 }
00047 
00048 CL_XMLTokenizer::CL_XMLTokenizer(const CL_XMLTokenizer &copy) : impl(copy.impl)
00049 {
00050 }
00051 
00052 CL_XMLTokenizer::CL_XMLTokenizer(CL_IODevice *input, bool delete_input) : impl(new CL_XMLTokenizer_Generic)
00053 {
00054         impl->input = input;
00055         impl->delete_input = delete_input;
00056         impl->size = input->get_size();
00057         impl->data.resize(impl->size);
00058         input->receive(&impl->data[0], (int) impl->size);
00059         impl->pos = 0;
00060 }
00061 
00062 CL_XMLTokenizer::~CL_XMLTokenizer()
00063 {
00064 }
00065 
00067 // CL_XMLTokenizer attributes:
00068 
00069 bool CL_XMLTokenizer::get_eat_whitespace() const
00070 {
00071         return impl->eat_whitespace;
00072 }
00073 
00074 void CL_XMLTokenizer::set_eat_whitespace(bool enable)
00075 {
00076         impl->eat_whitespace = enable;
00077 }
00078 
00080 // CL_XMLTokenizer operations:
00081 
00082 CL_XMLTokenLoad CL_XMLTokenizer::next()
00083 {
00084         if (impl == 0)
00085                 return CL_XMLTokenLoad();
00086 
00087         if (impl->pos == impl->size)
00088                 return CL_XMLTokenLoad(); // EOF, return null token.
00089 
00090         bool is_need_escape = true;
00091 
00092         if (impl->data[impl->pos] != '<') // Text node
00093         {
00094                 std::string::size_type start_pos = impl->pos;
00095                 std::string::size_type end_pos = impl->data.find('<', start_pos);
00096                 if (end_pos == impl->data.npos) end_pos = impl->size;
00097                 impl->pos = end_pos;
00098 
00099                 CL_XMLTokenString text(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape);
00100                 if (impl->eat_whitespace)
00101                 {
00102                         text = trim_whitespace(text);
00103                         if (text.empty())
00104                                 return next();
00105                 }
00106 
00107                 CL_XMLTokenLoad token;
00108                 token.set_type(CL_XMLToken::TEXT_TOKEN);
00109                 token.set_value(text);
00110 
00111                 return token;
00112         }
00113         else // Tag node
00114         {
00115                 impl->pos++;
00116                 if (impl->pos == impl->size)
00117                         throw CL_Exception(TEXT("Premature end of XML data!"));
00118 
00119                 // Try to early predict what sort of node it might be:
00120                 bool closing = false;
00121                 bool questionMark = false;
00122                 bool exclamationMark = false;
00123                 if (impl->data[impl->pos] == '/')
00124                         closing = true;
00125                 else
00126                         if (impl->data[impl->pos] == '?')
00127                                 questionMark = true;
00128                         else
00129                                 if (impl->data[impl->pos] == '!')
00130                                         exclamationMark = true;
00131 
00132                 if (closing || questionMark || exclamationMark)
00133                 {
00134                         impl->pos++;
00135                         if (impl->pos == impl->size) throw CL_Exception(TEXT("Premature end of XML data!"));
00136                 }
00137 
00138                 if (exclamationMark) // check for cdata section or comments
00139                 {
00140                         if (impl->data.compare(impl->pos, 2, "--") == 0) // comment block
00141                         {
00142                                 std::string::size_type start_pos = impl->pos+2;
00143                                 std::string::size_type end_pos = impl->data.find("-->", start_pos);
00144                                 if (end_pos == impl->data.npos)
00145                                         throw CL_Exception(TEXT("Premature end of XML data!"));
00146                                 impl->pos = end_pos+3;
00147 
00148                                 CL_XMLTokenLoad token;
00149                                 token.set_type(CL_XMLToken::COMMENT_TOKEN);
00150                                 token.set_variant(CL_XMLToken::SINGLE);
00151                                 token.set_value(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00152                                 return token;
00153                         }
00154 
00155                         if (impl->data.compare(impl->pos, 7, "[CDATA[") != 0)
00156                                 throw CL_Exception(cl_format(TEXT("Error in XML stream at position %1"), static_cast<int>(impl->pos)));
00157                         std::string::size_type start_pos = impl->pos+7;
00158                         std::string::size_type end_pos = impl->data.find("]]>", start_pos);
00159                         if (end_pos == impl->data.npos)
00160                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00161                         impl->pos = end_pos+3;
00162 
00163                         CL_XMLTokenLoad token;
00164                         token.set_type(CL_XMLToken::CDATA_SECTION_TOKEN);
00165                         token.set_variant(CL_XMLToken::SINGLE);
00166                         token.set_value(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00167                         return token;
00168                 }
00169 
00170                 // Extract the tag name:
00171                 std::string::size_type start_pos = impl->pos;
00172                 std::string::size_type end_pos = impl->data.find_first_of(" \r\n\t?/>", start_pos);
00173                 if (end_pos == impl->data.npos)
00174                         throw CL_Exception(TEXT("Premature end of XML data!"));
00175                 impl->pos = end_pos;
00176 
00177                 CL_XMLTokenLoad token;
00178                 token.set_type(questionMark ? CL_XMLToken::PROCESSING_INSTRUCTION_TOKEN : CL_XMLToken::ELEMENT_TOKEN);
00179                 token.set_variant(closing ? CL_XMLToken::END : CL_XMLToken::BEGIN);
00180                 token.set_name(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00181                 //token.set_name(replace_escapes_fast(impl->data.begin() + start_pos, impl->data.begin() + end_pos));
00182 
00183                 // Check for possible attributes:
00184                 while (true)
00185                 {
00186                         // Strip whitespace:
00187                         impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
00188                         if (impl->pos == impl->data.npos)
00189                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00190 
00191                         // End of tag, stop searching for more attributes:
00192                         if (impl->data[impl->pos] == '/' || impl->data[impl->pos] == '?' || impl->data[impl->pos] == '>')
00193                                 break;
00194 
00195                         // Extract attribute name:
00196                         std::string::size_type start_pos = impl->pos;
00197                         std::string::size_type end_pos = impl->data.find_first_of(" \r\n\t=", start_pos);
00198                         if (end_pos == impl->data.npos)
00199                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00200                         impl->pos = end_pos;
00201 
00202                         CL_XMLTokenString attributeName(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape);
00203 
00204                         // Find seperator:
00205                         impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
00206                         if (impl->pos == impl->data.npos || impl->pos == impl->size-1)
00207                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00208                         if (impl->data[impl->pos++] != '=')
00209                                 throw CL_Exception(cl_format("XML error(s), parser confused at line %1 (tag=%2, attributeName=%3)", impl->get_line_number(), token.get_name(), attributeName.to_string()));
00210 
00211                         // Strip whitespace:
00212                         impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
00213                         if (impl->pos == impl->data.npos)
00214                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00215 
00216                         // Extract attribute value:
00217                         char const * first_of = " \r\n\t";
00218                         if (impl->data[impl->pos] == '"')
00219                         {
00220                                 first_of = "\"";
00221                                 impl->pos++;
00222                                 if (impl->pos == impl->size)
00223                                         throw CL_Exception(TEXT("Premature end of XML data!"));
00224                         }
00225                         else
00226                                 if (impl->data[impl->pos] == '\'')
00227                                 {
00228                                         first_of = "'";
00229                                         impl->pos++;
00230                                         if (impl->pos == impl->size)
00231                                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00232                                 }
00233 
00234                         start_pos = impl->pos;
00235                         end_pos = impl->data.find_first_of(first_of, start_pos);
00236                         if (end_pos == impl->data.npos)
00237                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00238                         
00239                         CL_XMLTokenString attributeValue(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00240 
00241                         impl->pos = end_pos + 1;
00242                         if (impl->pos == impl->size)
00243                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00244 
00245                         // Finally apply attribute to token:
00246                         token.set_attribute(attributeName, attributeValue);
00247                 }
00248 
00249                 // Check if its singular:
00250                 if (impl->data[impl->pos] == '/' || impl->data[impl->pos] == '?')
00251                 {
00252                         token.set_variant(CL_XMLToken::SINGLE);
00253                         impl->pos++;
00254                         if (impl->pos == impl->size)
00255                                 throw CL_Exception(TEXT("Premature end of XML data!"));
00256                 }
00257 
00258                 // Data stream should be ending now.
00259                 if (impl->data[impl->pos] != '>')
00260                         throw CL_Exception(cl_format("Error in XML stream, line %1 (expected end of tag)", impl->get_line_number()));
00261                 impl->pos++;
00262 
00263                 return token;
00264         }
00265 }
00266 
00268 // CL_XMLTokenizer implementation:

Generated on Sat Feb 19 22:51:16 2005 for npcore by  doxygen 1.4.1