Orocos Real-Time Toolkit: rtt/marsh/tinyxmlparser.cpp Source File

00001 /***************************************************************************
00002   tag: Peter Soetens  do nov 2 13:06:01 CET 2006  tinyxmlparser.cpp
00003 
00004                         tinyxmlparser.cpp -  description
00005                            -------------------
00006     begin                : do november 02 2006
00007     copyright            : (C) 2006 Peter Soetens
00008     email                : peter.soetens@gmail.com
00009 
00010  ***************************************************************************
00011  *   This library is free software; you can redistribute it and/or         *
00012  *   modify it under the terms of the GNU General Public                   *
00013  *   License as published by the Free Software Foundation;                 *
00014  *   version 2 of the License.                                             *
00015  *                                                                         *
00016  *   As a special exception, you may use this file as part of a free       *
00017  *   software library without restriction.  Specifically, if other files   *
00018  *   instantiate templates or use macros or inline functions from this     *
00019  *   file, or you compile this file and link it with other files to        *
00020  *   produce an executable, this file does not by itself cause the         *
00021  *   resulting executable to be covered by the GNU General Public          *
00022  *   License.  This exception does not however invalidate any other        *
00023  *   reasons why the executable file might be covered by the GNU General   *
00024  *   Public License.                                                       *
00025  *                                                                         *
00026  *   This library is distributed in the hope that it will be useful,       *
00027  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00028  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *
00029  *   Lesser General Public License for more details.                       *
00030  *                                                                         *
00031  *   You should have received a copy of the GNU General Public             *
00032  *   License along with this library; if not, write to the Free Software   *
00033  *   Foundation, Inc., 59 Temple Place,                                    *
00034  *   Suite 330, Boston, MA  02111-1307  USA                                *
00035  *                                                                         *
00036  ***************************************************************************/
00037 
00038 
00039 /*
00040 www.sourceforge.net/projects/tinyxml
00041 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
00042 
00043 This software is provided 'as-is', without any express or implied
00044 warranty. In no event will the authors be held liable for any
00045 damages arising from the use of this software.
00046 
00047 Permission is granted to anyone to use this software for any
00048 purpose, including commercial applications, and to alter it and
00049 redistribute it freely, subject to the following restrictions:
00050 
00051 1. The origin of this software must not be misrepresented; you must
00052 not claim that you wrote the original software. If you use this
00053 software in a product, an acknowledgment in the product documentation
00054 would be appreciated but is not required.
00055 
00056 2. Altered source versions must be plainly marked as such, and
00057 must not be misrepresented as being the original software.
00058 
00059 3. This notice may not be removed or altered from any source
00060 distribution.
00061 */
00062 
00063 #include "tinyxml.h"
00064 #include <ctype.h>
00065 #include <stddef.h>
00066 
00067 //#define DEBUG_PARSER
00068 #if defined( DEBUG_PARSER )
00069 #   if defined( DEBUG ) && defined( _MSC_VER )
00070 #       include <windows.h>
00071 #       define TIXML_LOG OutputDebugString
00072 #   else
00073 #       define TIXML_LOG printf
00074 #   endif
00075 #endif
00076 
00077 namespace RTT { namespace marsh {
00078 
00079 // Note tha "PutString" hardcodes the same list. This
00080 // is less flexible than it appears. Changing the entries
00081 // or order will break putstring.
00082 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
00083 {
00084     { "&amp;",  5, '&' },
00085     { "&lt;",   4, '<' },
00086     { "&gt;",   4, '>' },
00087     { "&quot;", 6, '\"' },
00088     { "&apos;", 6, '\'' }
00089 };
00090 
00091 // Bunch of unicode info at:
00092 //      http://www.unicode.org/faq/utf_bom.html
00093 // Including the basic of this table, which determines the #bytes in the
00094 // sequence from the lead byte. 1 placed for invalid sequences --
00095 // although the result will be junk, pass it through as much as possible.
00096 // Beware of the non-characters in UTF-8:
00097 //              ef bb bf (Microsoft "lead bytes")
00098 //              ef bf be
00099 //              ef bf bf
00100 
00101 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
00102 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
00103 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
00104 
00105 const int TiXmlBase::utf8ByteTable[256] =
00106 {
00107     //  0   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f
00108         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x00
00109         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x10
00110         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x20
00111         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x30
00112         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x40
00113         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x50
00114         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x60
00115         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x70 End of ASCII range
00116         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x80 0x80 to 0xc1 invalid
00117         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0x90
00118         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0xa0
00119         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  // 0xb0
00120         1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  // 0xc0 0xc2 to 0xdf 2 byte
00121         2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  // 0xd0
00122         3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  // 0xe0 0xe0 to 0xef 3 byte
00123         4,  4,  4,  4,  4,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1   // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
00124 };
00125 
00126 
00127 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
00128 {
00129     const unsigned long BYTE_MASK = 0xBF;
00130     const unsigned long BYTE_MARK = 0x80;
00131     const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
00132 
00133     if (input < 0x80)
00134         *length = 1;
00135     else if ( input < 0x800 )
00136         *length = 2;
00137     else if ( input < 0x10000 )
00138         *length = 3;
00139     else if ( input < 0x200000 )
00140         *length = 4;
00141     else
00142         { *length = 0; return; }    // This code won't covert this correctly anyway.
00143 
00144     output += *length;
00145 
00146     // Scary scary fall throughs.
00147     switch (*length)
00148     {
00149         case 4:
00150             --output;
00151             *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00152             input >>= 6;
00153         case 3:
00154             --output;
00155             *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00156             input >>= 6;
00157         case 2:
00158             --output;
00159             *output = (char)((input | BYTE_MARK) & BYTE_MASK);
00160             input >>= 6;
00161         case 1:
00162             --output;
00163             *output = (char)(input | FIRST_BYTE_MARK[*length]);
00164     }
00165 }
00166 
00167 
00168 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00169 {
00170     // This will only work for low-ascii, everything else is assumed to be a valid
00171     // letter. I'm not sure this is the best approach, but it is quite tricky trying
00172     // to figure out alhabetical vs. not across encoding. So take a very
00173     // conservative approach.
00174 
00175 //  if ( encoding == TIXML_ENCODING_UTF8 )
00176 //  {
00177         if ( anyByte < 127 )
00178             return isalpha( anyByte );
00179         else
00180             return 1;   // What else to do? The unicode set is huge...get the english ones right.
00181 //  }
00182 //  else
00183 //  {
00184 //      return isalpha( anyByte );
00185 //  }
00186 }
00187 
00188 
00189 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
00190 {
00191     // This will only work for low-ascii, everything else is assumed to be a valid
00192     // letter. I'm not sure this is the best approach, but it is quite tricky trying
00193     // to figure out alhabetical vs. not across encoding. So take a very
00194     // conservative approach.
00195 
00196 //  if ( encoding == TIXML_ENCODING_UTF8 )
00197 //  {
00198         if ( anyByte < 127 )
00199             return isalnum( anyByte );
00200         else
00201             return 1;   // What else to do? The unicode set is huge...get the english ones right.
00202 //  }
00203 //  else
00204 //  {
00205 //      return isalnum( anyByte );
00206 //  }
00207 }
00208 
00209 
00210 class TiXmlParsingData
00211 {
00212     friend class TiXmlDocument;
00213   public:
00214     void Stamp( const char* now, TiXmlEncoding encoding );
00215 
00216     const TiXmlCursor& Cursor() { return cursor; }
00217 
00218   private:
00219     // Only used by the document!
00220     TiXmlParsingData( const char* start, int _tabsize, int row, int col )
00221     {
00222         assert( start );
00223         stamp = start;
00224         tabsize = _tabsize;
00225         cursor.row = row;
00226         cursor.col = col;
00227     }
00228 
00229     TiXmlCursor     cursor;
00230     const char*     stamp;
00231     int             tabsize;
00232 };
00233 
00234 
00235 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
00236 {
00237     assert( now );
00238 
00239     // Do nothing if the tabsize is 0.
00240     if ( tabsize < 1 )
00241     {
00242         return;
00243     }
00244 
00245     // Get the current row, column.
00246     int row = cursor.row;
00247     int col = cursor.col;
00248     const char* p = stamp;
00249     assert( p );
00250 
00251     while ( p < now )
00252     {
00253         // Treat p as unsigned, so we have a happy compiler.
00254         const unsigned char* pU = (const unsigned char*)p;
00255 
00256         // Code contributed by Fletcher Dunn: (modified by lee)
00257         switch (*pU) {
00258             case 0:
00259                 // We *should* never get here, but in case we do, don't
00260                 // advance past the terminating null character, ever
00261                 return;
00262 
00263             case '\r':
00264                 // bump down to the next line
00265                 ++row;
00266                 col = 0;
00267                 // Eat the character
00268                 ++p;
00269 
00270                 // Check for \r\n sequence, and treat this as a single character
00271                 if (*p == '\n') {
00272                     ++p;
00273                 }
00274                 break;
00275 
00276             case '\n':
00277                 // bump down to the next line
00278                 ++row;
00279                 col = 0;
00280 
00281                 // Eat the character
00282                 ++p;
00283 
00284                 // Check for \n\r sequence, and treat this as a single
00285                 // character.  (Yes, this bizarre thing does occur still
00286                 // on some arcane platforms...)
00287                 if (*p == '\r') {
00288                     ++p;
00289                 }
00290                 break;
00291 
00292             case '\t':
00293                 // Eat the character
00294                 ++p;
00295 
00296                 // Skip to next tab stop
00297                 col = (col / tabsize + 1) * tabsize;
00298                 break;
00299 
00300             case TIXML_UTF_LEAD_0:
00301                 if ( encoding == TIXML_ENCODING_UTF8 )
00302                 {
00303                     if ( *(p+1) && *(p+2) )
00304                     {
00305                         // In these cases, don't advance the column. These are
00306                         // 0-width spaces.
00307                         if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
00308                             p += 3;
00309                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
00310                             p += 3;
00311                         else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
00312                             p += 3;
00313                         else
00314                             { p +=3; ++col; }   // A normal character.
00315                     }
00316                 }
00317                 else
00318                 {
00319                     ++p;
00320                     ++col;
00321                 }
00322                 break;
00323 
00324             default:
00325                 if ( encoding == TIXML_ENCODING_UTF8 )
00326                 {
00327                     // Eat the 1 to 4 byte utf8 character.
00328                     int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
00329                     if ( step == 0 )
00330                         step = 1;       // Error case from bad encoding, but handle gracefully.
00331                     p += step;
00332 
00333                     // Just advance one column, of course.
00334                     ++col;
00335                 }
00336                 else
00337                 {
00338                     ++p;
00339                     ++col;
00340                 }
00341                 break;
00342         }
00343     }
00344     cursor.row = row;
00345     cursor.col = col;
00346     assert( cursor.row >= -1 );
00347     assert( cursor.col >= -1 );
00348     stamp = p;
00349     assert( stamp );
00350 }
00351 
00352 
00353 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
00354 {
00355     if ( !p || !*p )
00356     {
00357         return 0;
00358     }
00359     if ( encoding == TIXML_ENCODING_UTF8 )
00360     {
00361         while ( *p )
00362         {
00363             const unsigned char* pU = (const unsigned char*)p;
00364 
00365             // Skip the stupid Microsoft UTF-8 Byte order marks
00366             if (    *(pU+0)==TIXML_UTF_LEAD_0
00367                  && *(pU+1)==TIXML_UTF_LEAD_1
00368                  && *(pU+2)==TIXML_UTF_LEAD_2 )
00369             {
00370                 p += 3;
00371                 continue;
00372             }
00373             else if(*(pU+0)==TIXML_UTF_LEAD_0
00374                  && *(pU+1)==0xbfU
00375                  && *(pU+2)==0xbeU )
00376             {
00377                 p += 3;
00378                 continue;
00379             }
00380             else if(*(pU+0)==TIXML_UTF_LEAD_0
00381                  && *(pU+1)==0xbfU
00382                  && *(pU+2)==0xbfU )
00383             {
00384                 p += 3;
00385                 continue;
00386             }
00387 
00388             if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )        // Still using old rules for white space.
00389                 ++p;
00390             else
00391                 break;
00392         }
00393     }
00394     else
00395     {
00396         while ( *p && (IsWhiteSpace( *p ) || *p == '\n' || *p =='\r') )
00397             ++p;
00398     }
00399 
00400     return p;
00401 }
00402 
00403 #ifdef TIXML_USE_STL
00404 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
00405 {
00406     for( ;; )
00407     {
00408         if ( !in->good() ) return false;
00409 
00410         int c = in->peek();
00411         // At this scope, we can't get to a document. So fail silently.
00412         if ( !IsWhiteSpace( c ) || c <= 0 )
00413             return true;
00414 
00415         *tag += (char) in->get();
00416     }
00417 }
00418 
00419 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
00420 {
00421     //assert( character > 0 && character < 128 );   // else it won't work in utf-8
00422     while ( in->good() )
00423     {
00424         int c = in->peek();
00425         if ( c == character )
00426             return true;
00427         if ( c <= 0 )       // Silent failure: can't get document at this scope
00428             return false;
00429 
00430         in->get();
00431         *tag += (char) c;
00432     }
00433     return false;
00434 }
00435 #endif
00436 
00437 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
00438 {
00439     *name = "";
00440     assert( p );
00441 
00442     // Names start with letters or underscores.
00443     // Of course, in unicode, tinyxml has no idea what a letter *is*. The
00444     // algorithm is generous.
00445     //
00446     // After that, they can be letters, underscores, numbers,
00447     // hyphens, or colons. (Colons are valid ony for namespaces,
00448     // but tinyxml can't tell namespaces from names.)
00449     if (    p && *p
00450          && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
00451     {
00452         while(      p && *p
00453                 &&  (       IsAlphaNum( (unsigned char ) *p, encoding )
00454                          || *p == '_'
00455                          || *p == '-'
00456                          || *p == '.'
00457                          || *p == ':' ) )
00458         {
00459             (*name) += *p;
00460             ++p;
00461         }
00462         return p;
00463     }
00464     return 0;
00465 }
00466 
00467 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
00468 {
00469     // Presume an entity, and pull it out.
00470     TIXML_STRING ent;
00471     int i;
00472     *length = 0;
00473 
00474     if ( *(p+1) && *(p+1) == '#' && *(p+2) )
00475     {
00476         unsigned long ucs = 0;
00477         ptrdiff_t delta = 0;
00478         unsigned mult = 1;
00479 
00480         if ( *(p+2) == 'x' )
00481         {
00482             // Hexadecimal.
00483             if ( !*(p+3) ) return 0;
00484 
00485             const char* q = p+3;
00486             q = strchr( q, ';' );
00487 
00488             if ( !q || !*q ) return 0;
00489 
00490             delta = q-p;
00491             --q;
00492 
00493             while ( *q != 'x' )
00494             {
00495                 if ( *q >= '0' && *q <= '9' )
00496                     ucs += mult * (*q - '0');
00497                 else if ( *q >= 'a' && *q <= 'f' )
00498                     ucs += mult * (*q - 'a' + 10);
00499                 else if ( *q >= 'A' && *q <= 'F' )
00500                     ucs += mult * (*q - 'A' + 10 );
00501                 else
00502                     return 0;
00503                 mult *= 16;
00504                 --q;
00505             }
00506         }
00507         else
00508         {
00509             // Decimal.
00510             if ( !*(p+2) ) return 0;
00511 
00512             const char* q = p+2;
00513             q = strchr( q, ';' );
00514 
00515             if ( !q || !*q ) return 0;
00516 
00517             delta = q-p;
00518             --q;
00519 
00520             while ( *q != '#' )
00521             {
00522                 if ( *q >= '0' && *q <= '9' )
00523                     ucs += mult * (*q - '0');
00524                 else
00525                     return 0;
00526                 mult *= 10;
00527                 --q;
00528             }
00529         }
00530         if ( encoding == TIXML_ENCODING_UTF8 )
00531         {
00532             // convert the UCS to UTF-8
00533             ConvertUTF32ToUTF8( ucs, value, length );
00534         }
00535         else
00536         {
00537             *value = (char)ucs;
00538             *length = 1;
00539         }
00540         return p + delta + 1;
00541     }
00542 
00543     // Now try to match it.
00544     for( i=0; i<NUM_ENTITY; ++i )
00545     {
00546         if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
00547         {
00548             assert( strlen( entity[i].str ) == entity[i].strLength );
00549             *value = entity[i].chr;
00550             *length = 1;
00551             return ( p + entity[i].strLength );
00552         }
00553     }
00554 
00555     // So it wasn't an entity, its unrecognized, or something like that.
00556     *value = *p;    // Don't put back the last one, since we return it!
00557     return p+1;
00558 }
00559 
00560 
00561 bool TiXmlBase::StringEqual( const char* p,
00562                              const char* tag,
00563                              bool ignoreCase,
00564                              TiXmlEncoding encoding )
00565 {
00566     assert( p );
00567     assert( tag );
00568     if ( !p || !*p )
00569     {
00570         assert( 0 );
00571         return false;
00572     }
00573 
00574     const char* q = p;
00575 
00576     if ( ignoreCase )
00577     {
00578         while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
00579         {
00580             ++q;
00581             ++tag;
00582         }
00583 
00584         if ( *tag == 0 )
00585             return true;
00586     }
00587     else
00588     {
00589         while ( *q && *tag && *q == *tag )
00590         {
00591             ++q;
00592             ++tag;
00593         }
00594 
00595         if ( *tag == 0 )        // Have we found the end of the tag, and everything equal?
00596             return true;
00597     }
00598     return false;
00599 }
00600 
00601 const char* TiXmlBase::ReadText(    const char* p,
00602                                     TIXML_STRING * text,
00603                                     bool trimWhiteSpace,
00604                                     const char* endTag,
00605                                     bool caseInsensitive,
00606                                     TiXmlEncoding encoding )
00607 {
00608     *text = "";
00609     if (    !trimWhiteSpace         // certain tags always keep whitespace
00610          || !condenseWhiteSpace )   // if true, whitespace is always kept
00611     {
00612         // Keep all the white space.
00613         while (    p && *p
00614                 && !StringEqual( p, endTag, caseInsensitive, encoding )
00615               )
00616         {
00617             int len;
00618             char cArr[4] = { 0, 0, 0, 0 };
00619             p = GetChar( p, cArr, &len, encoding );
00620             text->append( cArr, len );
00621         }
00622     }
00623     else
00624     {
00625         bool whitespace = false;
00626 
00627         // Remove leading white space:
00628         p = SkipWhiteSpace( p, encoding );
00629         while (    p && *p
00630                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )
00631         {
00632             if ( *p == '\r' || *p == '\n' )
00633             {
00634                 whitespace = true;
00635                 ++p;
00636             }
00637             else if ( IsWhiteSpace( *p ) )
00638             {
00639                 whitespace = true;
00640                 ++p;
00641             }
00642             else
00643             {
00644                 // If we've found whitespace, add it before the
00645                 // new character. Any whitespace just becomes a space.
00646                 if ( whitespace )
00647                 {
00648                     (*text) += ' ';
00649                     whitespace = false;
00650                 }
00651                 int len;
00652                 char cArr[4] = { 0, 0, 0, 0 };
00653                 p = GetChar( p, cArr, &len, encoding );
00654                 if ( len == 1 )
00655                     (*text) += cArr[0]; // more efficient
00656                 else
00657                     text->append( cArr, len );
00658             }
00659         }
00660     }
00661     return p + strlen( endTag );
00662 }
00663 
00664 #ifdef TIXML_USE_STL
00665 
00666 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
00667 {
00668     // The basic issue with a document is that we don't know what we're
00669     // streaming. Read something presumed to be a tag (and hope), then
00670     // identify it, and call the appropriate stream method on the tag.
00671     //
00672     // This "pre-streaming" will never read the closing ">" so the
00673     // sub-tag can orient itself.
00674 
00675     if ( !StreamTo( in, '<', tag ) )
00676     {
00677         SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00678         return;
00679     }
00680 
00681     while ( in->good() )
00682     {
00683         int tagIndex = (int) tag->length();
00684         while ( in->good() && in->peek() != '>' )
00685         {
00686             int c = in->get();
00687             if ( c <= 0 )
00688             {
00689                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00690                 break;
00691             }
00692             (*tag) += (char) c;
00693         }
00694 
00695         if ( in->good() )
00696         {
00697             // We now have something we presume to be a node of
00698             // some sort. Identify it, and call the node to
00699             // continue streaming.
00700             TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
00701 
00702             if ( node )
00703             {
00704                 node->StreamIn( in, tag );
00705                 bool isElement = node->ToElement() != 0;
00706                 delete node;
00707                 node = 0;
00708 
00709                 // If this is the root element, we're done. Parsing will be
00710                 // done by the >> operator.
00711                 if ( isElement )
00712                 {
00713                     return;
00714                 }
00715             }
00716             else
00717             {
00718                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00719                 return;
00720             }
00721         }
00722     }
00723     // We should have returned sooner.
00724     SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
00725 }
00726 
00727 #endif
00728 
00729 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
00730 {
00731     ClearError();
00732 
00733     // Parse away, at the document level. Since a document
00734     // contains nothing but other tags, most of what happens
00735     // here is skipping white space.
00736     if ( !p || !*p )
00737     {
00738         SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00739         return 0;
00740     }
00741 
00742     // Note that, for a document, this needs to come
00743     // before the while space skip, so that parsing
00744     // starts from the pointer we are given.
00745     location.Clear();
00746     if ( prevData )
00747     {
00748         location.row = prevData->cursor.row;
00749         location.col = prevData->cursor.col;
00750     }
00751     else
00752     {
00753         location.row = 0;
00754         location.col = 0;
00755     }
00756     TiXmlParsingData data( p, TabSize(), location.row, location.col );
00757     location = data.Cursor();
00758 
00759     if ( encoding == TIXML_ENCODING_UNKNOWN )
00760     {
00761         // Check for the Microsoft UTF-8 lead bytes.
00762         const unsigned char* pU = (const unsigned char*)p;
00763         if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
00764              && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
00765              && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
00766         {
00767             encoding = TIXML_ENCODING_UTF8;
00768             useMicrosoftBOM = true;
00769         }
00770     }
00771 
00772     p = SkipWhiteSpace( p, encoding );
00773     if ( !p )
00774     {
00775         SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
00776         return 0;
00777     }
00778 
00779     while ( p && *p )
00780     {
00781         TiXmlNode* node = Identify( p, encoding );
00782         if ( node )
00783         {
00784             p = node->Parse( p, &data, encoding );
00785             LinkEndChild( node );
00786         }
00787         else
00788         {
00789             break;
00790         }
00791 
00792         // Did we get encoding info?
00793         if (    encoding == TIXML_ENCODING_UNKNOWN
00794              && node->ToDeclaration() )
00795         {
00796             TiXmlDeclaration* dec = node->ToDeclaration();
00797             const char* enc = dec->Encoding();
00798             assert( enc );
00799 
00800             if ( *enc == 0 )
00801                 encoding = TIXML_ENCODING_UTF8;
00802             else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
00803                 encoding = TIXML_ENCODING_UTF8;
00804             else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
00805                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
00806             else
00807                 encoding = TIXML_ENCODING_LEGACY;
00808         }
00809 
00810         p = SkipWhiteSpace( p, encoding );
00811     }
00812 
00813     // Was this empty?
00814     if ( !firstChild ) {
00815         SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
00816         return 0;
00817     }
00818 
00819     // All is well.
00820     return p;
00821 }
00822 
00823 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
00824 {
00825     // The first error in a chain is more accurate - don't set again!
00826     if ( error )
00827         return;
00828 
00829     assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
00830     error   = true;
00831     errorId = err;
00832     errorDesc = errorString[ errorId ];
00833 
00834     errorLocation.Clear();
00835     if ( pError && data )
00836     {
00837         data->Stamp( pError, encoding );
00838         errorLocation = data->Cursor();
00839     }
00840 }
00841 
00842 
00843 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
00844 {
00845     TiXmlNode* returnNode = 0;
00846 
00847     p = SkipWhiteSpace( p, encoding );
00848     if( !p || !*p || *p != '<' )
00849     {
00850         return 0;
00851     }
00852 
00853     TiXmlDocument* doc = GetDocument();
00854     p = SkipWhiteSpace( p, encoding );
00855 
00856     if ( !p || !*p )
00857     {
00858         return 0;
00859     }
00860 
00861     // What is this thing?
00862     // - Elements start with a letter or underscore, but xml is reserved.
00863     // - Comments: <!--
00864     // - Decleration: <?xml
00865     // - Everthing else is unknown to tinyxml.
00866     //
00867 
00868     const char* xmlHeader = { "<?xml" };
00869     const char* commentHeader = { "<!--" };
00870     const char* dtdHeader = { "<!" };
00871     const char* cdataHeader = { "<![CDATA[" };
00872 
00873     if ( StringEqual( p, xmlHeader, true, encoding ) )
00874     {
00875         #ifdef DEBUG_PARSER
00876             TIXML_LOG( "XML parsing Declaration\n" );
00877         #endif
00878         returnNode = new TiXmlDeclaration();
00879     }
00880     else if ( StringEqual( p, commentHeader, false, encoding ) )
00881     {
00882         #ifdef DEBUG_PARSER
00883             TIXML_LOG( "XML parsing Comment\n" );
00884         #endif
00885         returnNode = new TiXmlComment();
00886     }
00887     else if ( StringEqual( p, cdataHeader, false, encoding ) )
00888     {
00889         #ifdef DEBUG_PARSER
00890             TIXML_LOG( "XML parsing CDATA\n" );
00891         #endif
00892         TiXmlText* text = new TiXmlText( "" );
00893         text->SetCDATA( true );
00894         returnNode = text;
00895     }
00896     else if ( StringEqual( p, dtdHeader, false, encoding ) )
00897     {
00898         #ifdef DEBUG_PARSER
00899             TIXML_LOG( "XML parsing Unknown(1)\n" );
00900         #endif
00901         returnNode = new TiXmlUnknown();
00902     }
00903     else if (    IsAlpha( *(p+1), encoding )
00904               || *(p+1) == '_' )
00905     {
00906         #ifdef DEBUG_PARSER
00907             TIXML_LOG( "XML parsing Element\n" );
00908         #endif
00909         returnNode = new TiXmlElement( "" );
00910     }
00911     else
00912     {
00913         #ifdef DEBUG_PARSER
00914             TIXML_LOG( "XML parsing Unknown(2)\n" );
00915         #endif
00916         returnNode = new TiXmlUnknown();
00917     }
00918 
00919     if ( returnNode )
00920     {
00921         // Set the parent, so it can report errors
00922         returnNode->parent = this;
00923     }
00924     else
00925     {
00926         if ( doc )
00927             doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
00928     }
00929     return returnNode;
00930 }
00931 
00932 #ifdef TIXML_USE_STL
00933 
00934 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
00935 {
00936     // We're called with some amount of pre-parsing. That is, some of "this"
00937     // element is in "tag". Go ahead and stream to the closing ">"
00938     while( in->good() )
00939     {
00940         int c = in->get();
00941         if ( c <= 0 )
00942         {
00943             TiXmlDocument* document = GetDocument();
00944             if ( document )
00945                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
00946             return;
00947         }
00948         (*tag) += (char) c ;
00949 
00950         if ( c == '>' )
00951             break;
00952     }
00953 
00954     if ( tag->length() < 3 ) return;
00955 
00956     // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
00957     // If not, identify and stream.
00958 
00959     if (    tag->at( tag->length() - 1 ) == '>'
00960          && tag->at( tag->length() - 2 ) == '/' )
00961     {
00962         // All good!
00963         return;
00964     }
00965     else if ( tag->at( tag->length() - 1 ) == '>' )
00966     {
00967         // There is more. Could be:
00968         //      text
00969         //      closing tag
00970         //      another node.
00971         for ( ;; )
00972         {
00973             StreamWhiteSpace( in, tag );
00974 
00975             // Do we have text?
00976             if ( in->good() && in->peek() != '<' )
00977             {
00978                 // Yep, text.
00979                 TiXmlText text( "" );
00980                 text.StreamIn( in, tag );
00981 
00982                 // What follows text is a closing tag or another node.
00983                 // Go around again and figure it out.
00984                 continue;
00985             }
00986 
00987             // We now have either a closing tag...or another node.
00988             // We should be at a "<", regardless.
00989             if ( !in->good() ) return;
00990             assert( in->peek() == '<' );
00991             int tagIndex = (int) tag->length();
00992 
00993             bool closingTag = false;
00994             bool firstCharFound = false;
00995 
00996             for( ;; )
00997             {
00998                 if ( !in->good() )
00999                     return;
01000 
01001                 int c = in->peek();
01002                 if ( c <= 0 )
01003                 {
01004                     TiXmlDocument* document = GetDocument();
01005                     if ( document )
01006                         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01007                     return;
01008                 }
01009 
01010                 if ( c == '>' )
01011                     break;
01012 
01013                 *tag += (char) c;
01014                 in->get();
01015 
01016                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
01017                 {
01018                     firstCharFound = true;
01019                     if ( c == '/' )
01020                         closingTag = true;
01021                 }
01022             }
01023             // If it was a closing tag, then read in the closing '>' to clean up the input stream.
01024             // If it was not, the streaming will be done by the tag.
01025             if ( closingTag )
01026             {
01027                 if ( !in->good() )
01028                     return;
01029 
01030                 int c = in->get();
01031                 if ( c <= 0 )
01032                 {
01033                     TiXmlDocument* document = GetDocument();
01034                     if ( document )
01035                         document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01036                     return;
01037                 }
01038                 assert( c == '>' );
01039                 *tag += (char) c;
01040 
01041                 // We are done, once we've found our closing tag.
01042                 return;
01043             }
01044             else
01045             {
01046                 // If not a closing tag, id it, and stream.
01047                 const char* tagloc = tag->c_str() + tagIndex;
01048                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
01049                 if ( !node )
01050                     return;
01051                 node->StreamIn( in, tag );
01052                 delete node;
01053                 node = 0;
01054 
01055                 // No return: go around from the beginning: text, closing tag, or node.
01056             }
01057         }
01058     }
01059 }
01060 #endif
01061 
01062 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01063 {
01064     p = SkipWhiteSpace( p, encoding );
01065     TiXmlDocument* document = GetDocument();
01066 
01067     if ( !p || !*p )
01068     {
01069         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
01070         return 0;
01071     }
01072 
01073     if ( data )
01074     {
01075         data->Stamp( p, encoding );
01076         location = data->Cursor();
01077     }
01078 
01079     if ( *p != '<' )
01080     {
01081         if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
01082         return 0;
01083     }
01084 
01085     p = SkipWhiteSpace( p+1, encoding );
01086 
01087     // Read the name.
01088     const char* pErr = p;
01089 
01090     p = ReadName( p, &value, encoding );
01091     if ( !p || !*p )
01092     {
01093         if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
01094         return 0;
01095     }
01096 
01097     TIXML_STRING endTag ("</");
01098     endTag += value;
01099     endTag += ">";
01100 
01101     // Check for and read attributes. Also look for an empty
01102     // tag or an end tag.
01103     while ( p && *p )
01104     {
01105         pErr = p;
01106         p = SkipWhiteSpace( p, encoding );
01107         if ( !p || !*p )
01108         {
01109             if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01110             return 0;
01111         }
01112         if ( *p == '/' )
01113         {
01114             ++p;
01115             // Empty tag.
01116             if ( *p  != '>' )
01117             {
01118                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
01119                 return 0;
01120             }
01121             return (p+1);
01122         }
01123         else if ( *p == '>' )
01124         {
01125             // Done with attributes (if there were any.)
01126             // Read the value -- which can include other
01127             // elements -- read the end tag, and return.
01128             ++p;
01129             p = ReadValue( p, data, encoding );     // Note this is an Element method, and will set the error if one happens.
01130             if ( !p || !*p )
01131                 return 0;
01132 
01133             // We should find the end tag now
01134             if ( StringEqual( p, endTag.c_str(), false, encoding ) )
01135             {
01136                 p += endTag.length();
01137                 return p;
01138             }
01139             else
01140             {
01141                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
01142                 return 0;
01143             }
01144         }
01145         else
01146         {
01147             // Try to read an attribute:
01148             TiXmlAttribute* attrib = new TiXmlAttribute();
01149             if ( !attrib )
01150             {
01151                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
01152                 return 0;
01153             }
01154 
01155             attrib->SetDocument( document );
01156             const char* pErr = p;
01157             p = attrib->Parse( p, data, encoding );
01158 
01159             if ( !p || !*p )
01160             {
01161                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
01162                 delete attrib;
01163                 return 0;
01164             }
01165 
01166             // Handle the strange case of double attributes:
01167             TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
01168             if ( node )
01169             {
01170                 node->SetValue( attrib->Value() );
01171                 delete attrib;
01172                 return 0;
01173             }
01174 
01175             attributeSet.Add( attrib );
01176         }
01177     }
01178     return p;
01179 }
01180 
01181 
01182 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01183 {
01184     TiXmlDocument* document = GetDocument();
01185 
01186     // Read in text and elements in any order.
01187     const char* pWithWhiteSpace = p;
01188     p = SkipWhiteSpace( p, encoding );
01189 
01190     while ( p && *p )
01191     {
01192         if ( *p != '<' )
01193         {
01194             // Take what we have, make a text element.
01195             TiXmlText* textNode = new TiXmlText( "" );
01196 
01197             if ( !textNode )
01198             {
01199                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
01200                     return 0;
01201             }
01202 
01203             if ( TiXmlBase::IsWhiteSpaceCondensed() )
01204             {
01205                 p = textNode->Parse( p, data, encoding );
01206             }
01207             else
01208             {
01209                 // Special case: we want to keep the white space
01210                 // so that leading spaces aren't removed.
01211                 p = textNode->Parse( pWithWhiteSpace, data, encoding );
01212             }
01213 
01214             if ( !textNode->Blank() )
01215                 LinkEndChild( textNode );
01216             else
01217                 delete textNode;
01218         }
01219         else
01220         {
01221             // We hit a '<'
01222             // Have we hit a new element or an end tag? This could also be
01223             // a TiXmlText in the "CDATA" style.
01224             if ( StringEqual( p, "</", false, encoding ) )
01225             {
01226                 return p;
01227             }
01228             else
01229             {
01230                 TiXmlNode* node = Identify( p, encoding );
01231                 if ( node )
01232                 {
01233                     p = node->Parse( p, data, encoding );
01234                     LinkEndChild( node );
01235                 }
01236                 else
01237                 {
01238                     return 0;
01239                 }
01240             }
01241         }
01242         pWithWhiteSpace = p;
01243         p = SkipWhiteSpace( p, encoding );
01244     }
01245 
01246     if ( !p )
01247     {
01248         if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
01249     }
01250     return p;
01251 }
01252 
01253 
01254 #ifdef TIXML_USE_STL
01255 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01256 {
01257     while ( in->good() )
01258     {
01259         int c = in->get();
01260         if ( c <= 0 )
01261         {
01262             TiXmlDocument* document = GetDocument();
01263             if ( document )
01264                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01265             return;
01266         }
01267         (*tag) += (char) c;
01268 
01269         if ( c == '>' )
01270         {
01271             // All is well.
01272             return;
01273         }
01274     }
01275 }
01276 #endif
01277 
01278 
01279 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01280 {
01281     TiXmlDocument* document = GetDocument();
01282     p = SkipWhiteSpace( p, encoding );
01283 
01284     if ( data )
01285     {
01286         data->Stamp( p, encoding );
01287         location = data->Cursor();
01288     }
01289     if ( !p || !*p || *p != '<' )
01290     {
01291         if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
01292         return 0;
01293     }
01294     ++p;
01295     value = "";
01296 
01297     while ( p && *p && *p != '>' )
01298     {
01299         value += *p;
01300         ++p;
01301     }
01302 
01303     if ( !p )
01304     {
01305         if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
01306     }
01307     if ( *p == '>' )
01308         return p+1;
01309     return p;
01310 }
01311 
01312 #ifdef TIXML_USE_STL
01313 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01314 {
01315     while ( in->good() )
01316     {
01317         int c = in->get();
01318         if ( c <= 0 )
01319         {
01320             TiXmlDocument* document = GetDocument();
01321             if ( document )
01322                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01323             return;
01324         }
01325 
01326         (*tag) += (char) c;
01327 
01328         if ( c == '>'
01329              && tag->at( tag->length() - 2 ) == '-'
01330              && tag->at( tag->length() - 3 ) == '-' )
01331         {
01332             // All is well.
01333             return;
01334         }
01335     }
01336 }
01337 #endif
01338 
01339 
01340 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01341 {
01342     TiXmlDocument* document = GetDocument();
01343     value = "";
01344 
01345     p = SkipWhiteSpace( p, encoding );
01346 
01347     if ( data )
01348     {
01349         data->Stamp( p, encoding );
01350         location = data->Cursor();
01351     }
01352     const char* startTag = "<!--";
01353     const char* endTag   = "-->";
01354 
01355     if ( !StringEqual( p, startTag, false, encoding ) )
01356     {
01357         document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
01358         return 0;
01359     }
01360     p += strlen( startTag );
01361     p = ReadText( p, &value, false, endTag, false, encoding );
01362     return p;
01363 }
01364 
01365 
01366 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01367 {
01368     p = SkipWhiteSpace( p, encoding );
01369     if ( !p || !*p ) return 0;
01370 
01371     int tabsize = 4;
01372     if ( document )
01373         tabsize = document->TabSize();
01374 
01375     if ( data )
01376     {
01377         data->Stamp( p, encoding );
01378         location = data->Cursor();
01379     }
01380     // Read the name, the '=' and the value.
01381     const char* pErr = p;
01382     p = ReadName( p, &name, encoding );
01383     if ( !p || !*p )
01384     {
01385         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
01386         return 0;
01387     }
01388     p = SkipWhiteSpace( p, encoding );
01389     if ( !p || !*p || *p != '=' )
01390     {
01391         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01392         return 0;
01393     }
01394 
01395     ++p;    // skip '='
01396     p = SkipWhiteSpace( p, encoding );
01397     if ( !p || !*p )
01398     {
01399         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
01400         return 0;
01401     }
01402 
01403     const char* end;
01404 
01405     if ( *p == '\'' )
01406     {
01407         ++p;
01408         end = "\'";
01409         p = ReadText( p, &value, false, end, false, encoding );
01410     }
01411     else if ( *p == '"' )
01412     {
01413         ++p;
01414         end = "\"";
01415         p = ReadText( p, &value, false, end, false, encoding );
01416     }
01417     else
01418     {
01419         // All attribute values should be in single or double quotes.
01420         // But this is such a common error that the parser will try
01421         // its best, even without them.
01422         value = "";
01423         while (    p && *p                                      // existence
01424                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'  // whitespace
01425                 && *p != '/' && *p != '>' )                     // tag end
01426         {
01427             value += *p;
01428             ++p;
01429         }
01430     }
01431     return p;
01432 }
01433 
01434 #ifdef TIXML_USE_STL
01435 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01436 {
01437     if ( cdata )
01438     {
01439         int c = in->get();
01440         if ( c <= 0 )
01441         {
01442             TiXmlDocument* document = GetDocument();
01443             if ( document )
01444                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01445             return;
01446         }
01447 
01448         (*tag) += (char) c;
01449 
01450         if ( c == '>'
01451              && tag->at( tag->length() - 2 ) == ']'
01452              && tag->at( tag->length() - 3 ) == ']' )
01453         {
01454             // All is well.
01455             return;
01456         }
01457     }
01458     else
01459     {
01460         while ( in->good() )
01461         {
01462             int c = in->peek();
01463             if ( c == '<' )
01464                 return;
01465             if ( c <= 0 )
01466             {
01467                 TiXmlDocument* document = GetDocument();
01468                 if ( document )
01469                     document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01470                 return;
01471             }
01472 
01473             (*tag) += (char) c;
01474             in->get();
01475         }
01476     }
01477 }
01478 #endif
01479 
01480 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
01481 {
01482     value = "";
01483     TiXmlDocument* document = GetDocument();
01484 
01485     if ( data )
01486     {
01487         data->Stamp( p, encoding );
01488         location = data->Cursor();
01489     }
01490 
01491     const char* const startTag = "<![CDATA[";
01492     const char* const endTag   = "]]>";
01493 
01494     if ( cdata || StringEqual( p, startTag, false, encoding ) )
01495     {
01496         cdata = true;
01497 
01498         if ( !StringEqual( p, startTag, false, encoding ) )
01499         {
01500             document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
01501             return 0;
01502         }
01503         p += strlen( startTag );
01504 
01505         // Keep all the white space, ignore the encoding, etc.
01506         while (    p && *p
01507                 && !StringEqual( p, endTag, false, encoding )
01508               )
01509         {
01510             value += *p;
01511             ++p;
01512         }
01513 
01514         TIXML_STRING dummy;
01515         p = ReadText( p, &dummy, false, endTag, false, encoding );
01516         return p;
01517     }
01518     else
01519     {
01520         bool ignoreWhite = true;
01521 
01522         const char* end = "<";
01523         p = ReadText( p, &value, ignoreWhite, end, false, encoding );
01524         if ( p )
01525             return p-1; // don't truncate the '<'
01526         return 0;
01527     }
01528 }
01529 
01530 #ifdef TIXML_USE_STL
01531 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
01532 {
01533     while ( in->good() )
01534     {
01535         int c = in->get();
01536         if ( c <= 0 )
01537         {
01538             TiXmlDocument* document = GetDocument();
01539             if ( document )
01540                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
01541             return;
01542         }
01543         (*tag) += (char) c;
01544 
01545         if ( c == '>' )
01546         {
01547             // All is well.
01548             return;
01549         }
01550     }
01551 }
01552 #endif
01553 
01554 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
01555 {
01556     p = SkipWhiteSpace( p, _encoding );
01557     // Find the beginning, find the end, and look for
01558     // the stuff in-between.
01559     TiXmlDocument* document = GetDocument();
01560     if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
01561     {
01562         if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
01563         return 0;
01564     }
01565     if ( data )
01566     {
01567         data->Stamp( p, _encoding );
01568         location = data->Cursor();
01569     }
01570     p += 5;
01571 
01572     version = "";
01573     encoding = "";
01574     standalone = "";
01575 
01576     while ( p && *p )
01577     {
01578         if ( *p == '>' )
01579         {
01580             ++p;
01581             return p;
01582         }
01583 
01584         p = SkipWhiteSpace( p, _encoding );
01585         if ( StringEqual( p, "version", true, _encoding ) )
01586         {
01587             TiXmlAttribute attrib;
01588             p = attrib.Parse( p, data, _encoding );
01589             version = attrib.Value();
01590         }
01591         else if ( StringEqual( p, "encoding", true, _encoding ) )
01592         {
01593             TiXmlAttribute attrib;
01594             p = attrib.Parse( p, data, _encoding );
01595             encoding = attrib.Value();
01596         }
01597         else if ( StringEqual( p, "standalone", true, _encoding ) )
01598         {
01599             TiXmlAttribute attrib;
01600             p = attrib.Parse( p, data, _encoding );
01601             standalone = attrib.Value();
01602         }
01603         else
01604         {
01605             // Read over whatever it is.
01606             while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
01607                 ++p;
01608         }
01609     }
01610     return 0;
01611 }
01612 
01613 bool TiXmlText::Blank() const
01614 {
01615     for ( unsigned i=0; i<value.length(); i++ )
01616         if ( !IsWhiteSpace( value[i] ) )
01617             return false;
01618     return true;
01619 }
01620 
01621 }}