00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "precomp.h"
00030 #include "xml_tokenizer.h"
00031 #include "xml_token_load.h"
00032 #include "string_help.h"
00033 #include "string_format.h"
00034 #include "exception.h"
00035 #include "xml_tokenizer_generic.h"
00036 #include "xml_token_string.h"
00037
00038 #include <algorithm>
00039 #include <utility>
00040
00042
00043
00044 CL_XMLTokenizer::CL_XMLTokenizer()
00045 {
00046 }
00047
00048 CL_XMLTokenizer::CL_XMLTokenizer(const CL_XMLTokenizer ©) : impl(copy.impl)
00049 {
00050 }
00051
00052 CL_XMLTokenizer::CL_XMLTokenizer(CL_IODevice *input, bool delete_input) : impl(new CL_XMLTokenizer_Generic)
00053 {
00054 impl->input = input;
00055 impl->delete_input = delete_input;
00056 impl->size = input->get_size();
00057 impl->data.resize(impl->size);
00058 input->receive(&impl->data[0], (int) impl->size);
00059 impl->pos = 0;
00060 }
00061
00062 CL_XMLTokenizer::~CL_XMLTokenizer()
00063 {
00064 }
00065
00067
00068
00069 bool CL_XMLTokenizer::get_eat_whitespace() const
00070 {
00071 return impl->eat_whitespace;
00072 }
00073
00074 void CL_XMLTokenizer::set_eat_whitespace(bool enable)
00075 {
00076 impl->eat_whitespace = enable;
00077 }
00078
00080
00081
00082 CL_XMLTokenLoad CL_XMLTokenizer::next()
00083 {
00084 if (impl == 0)
00085 return CL_XMLTokenLoad();
00086
00087 if (impl->pos == impl->size)
00088 return CL_XMLTokenLoad();
00089
00090 bool is_need_escape = true;
00091
00092 if (impl->data[impl->pos] != '<')
00093 {
00094 std::string::size_type start_pos = impl->pos;
00095 std::string::size_type end_pos = impl->data.find('<', start_pos);
00096 if (end_pos == impl->data.npos) end_pos = impl->size;
00097 impl->pos = end_pos;
00098
00099 CL_XMLTokenString text(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape);
00100 if (impl->eat_whitespace)
00101 {
00102 text = trim_whitespace(text);
00103 if (text.empty())
00104 return next();
00105 }
00106
00107 CL_XMLTokenLoad token;
00108 token.set_type(CL_XMLToken::TEXT_TOKEN);
00109 token.set_value(text);
00110
00111 return token;
00112 }
00113 else
00114 {
00115 impl->pos++;
00116 if (impl->pos == impl->size)
00117 throw CL_Exception(TEXT("Premature end of XML data!"));
00118
00119
00120 bool closing = false;
00121 bool questionMark = false;
00122 bool exclamationMark = false;
00123 if (impl->data[impl->pos] == '/')
00124 closing = true;
00125 else
00126 if (impl->data[impl->pos] == '?')
00127 questionMark = true;
00128 else
00129 if (impl->data[impl->pos] == '!')
00130 exclamationMark = true;
00131
00132 if (closing || questionMark || exclamationMark)
00133 {
00134 impl->pos++;
00135 if (impl->pos == impl->size) throw CL_Exception(TEXT("Premature end of XML data!"));
00136 }
00137
00138 if (exclamationMark)
00139 {
00140 if (impl->data.compare(impl->pos, 2, "--") == 0)
00141 {
00142 std::string::size_type start_pos = impl->pos+2;
00143 std::string::size_type end_pos = impl->data.find("-->", start_pos);
00144 if (end_pos == impl->data.npos)
00145 throw CL_Exception(TEXT("Premature end of XML data!"));
00146 impl->pos = end_pos+3;
00147
00148 CL_XMLTokenLoad token;
00149 token.set_type(CL_XMLToken::COMMENT_TOKEN);
00150 token.set_variant(CL_XMLToken::SINGLE);
00151 token.set_value(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00152 return token;
00153 }
00154
00155 if (impl->data.compare(impl->pos, 7, "[CDATA[") != 0)
00156 throw CL_Exception(cl_format(TEXT("Error in XML stream at position %1"), static_cast<int>(impl->pos)));
00157 std::string::size_type start_pos = impl->pos+7;
00158 std::string::size_type end_pos = impl->data.find("]]>", start_pos);
00159 if (end_pos == impl->data.npos)
00160 throw CL_Exception(TEXT("Premature end of XML data!"));
00161 impl->pos = end_pos+3;
00162
00163 CL_XMLTokenLoad token;
00164 token.set_type(CL_XMLToken::CDATA_SECTION_TOKEN);
00165 token.set_variant(CL_XMLToken::SINGLE);
00166 token.set_value(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00167 return token;
00168 }
00169
00170
00171 std::string::size_type start_pos = impl->pos;
00172 std::string::size_type end_pos = impl->data.find_first_of(" \r\n\t?/>", start_pos);
00173 if (end_pos == impl->data.npos)
00174 throw CL_Exception(TEXT("Premature end of XML data!"));
00175 impl->pos = end_pos;
00176
00177 CL_XMLTokenLoad token;
00178 token.set_type(questionMark ? CL_XMLToken::PROCESSING_INSTRUCTION_TOKEN : CL_XMLToken::ELEMENT_TOKEN);
00179 token.set_variant(closing ? CL_XMLToken::END : CL_XMLToken::BEGIN);
00180 token.set_name(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00181
00182
00183
00184 while (true)
00185 {
00186
00187 impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
00188 if (impl->pos == impl->data.npos)
00189 throw CL_Exception(TEXT("Premature end of XML data!"));
00190
00191
00192 if (impl->data[impl->pos] == '/' || impl->data[impl->pos] == '?' || impl->data[impl->pos] == '>')
00193 break;
00194
00195
00196 std::string::size_type start_pos = impl->pos;
00197 std::string::size_type end_pos = impl->data.find_first_of(" \r\n\t=", start_pos);
00198 if (end_pos == impl->data.npos)
00199 throw CL_Exception(TEXT("Premature end of XML data!"));
00200 impl->pos = end_pos;
00201
00202 CL_XMLTokenString attributeName(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape);
00203
00204
00205 impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
00206 if (impl->pos == impl->data.npos || impl->pos == impl->size-1)
00207 throw CL_Exception(TEXT("Premature end of XML data!"));
00208 if (impl->data[impl->pos++] != '=')
00209 throw CL_Exception(cl_format("XML error(s), parser confused at line %1 (tag=%2, attributeName=%3)", impl->get_line_number(), token.get_name(), attributeName.to_string()));
00210
00211
00212 impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
00213 if (impl->pos == impl->data.npos)
00214 throw CL_Exception(TEXT("Premature end of XML data!"));
00215
00216
00217 char const * first_of = " \r\n\t";
00218 if (impl->data[impl->pos] == '"')
00219 {
00220 first_of = "\"";
00221 impl->pos++;
00222 if (impl->pos == impl->size)
00223 throw CL_Exception(TEXT("Premature end of XML data!"));
00224 }
00225 else
00226 if (impl->data[impl->pos] == '\'')
00227 {
00228 first_of = "'";
00229 impl->pos++;
00230 if (impl->pos == impl->size)
00231 throw CL_Exception(TEXT("Premature end of XML data!"));
00232 }
00233
00234 start_pos = impl->pos;
00235 end_pos = impl->data.find_first_of(first_of, start_pos);
00236 if (end_pos == impl->data.npos)
00237 throw CL_Exception(TEXT("Premature end of XML data!"));
00238
00239 CL_XMLTokenString attributeValue(CL_XMLTokenString(&impl->data[start_pos], int(end_pos-start_pos), is_need_escape));
00240
00241 impl->pos = end_pos + 1;
00242 if (impl->pos == impl->size)
00243 throw CL_Exception(TEXT("Premature end of XML data!"));
00244
00245
00246 token.set_attribute(attributeName, attributeValue);
00247 }
00248
00249
00250 if (impl->data[impl->pos] == '/' || impl->data[impl->pos] == '?')
00251 {
00252 token.set_variant(CL_XMLToken::SINGLE);
00253 impl->pos++;
00254 if (impl->pos == impl->size)
00255 throw CL_Exception(TEXT("Premature end of XML data!"));
00256 }
00257
00258
00259 if (impl->data[impl->pos] != '>')
00260 throw CL_Exception(cl_format("Error in XML stream, line %1 (expected end of tag)", impl->get_line_number()));
00261 impl->pos++;
00262
00263 return token;
00264 }
00265 }
00266
00268