Follow this link to skip to the main content

pug::xml_parser Class Reference

#include <pugxml.h>

Collaboration diagram for pug::xml_parser:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 xml_parser (unsigned long optmsk=parse_default, bool autdel=true, long growby=parse_grow)
 xml_parser (TCHAR *xmlstr, unsigned long optmsk=parse_default, bool autdel=true, long growby=parse_grow)
virtual ~xml_parser ()
 operator xml_node_struct * ()
 operator xml_node ()
xml_node document ()
void create ()
void clear ()
xml_node_structattach (xml_node_struct *root)
xml_node_structdetach ()
unsigned long options ()
unsigned long options (unsigned long optmsk)
unsigned long growby ()
unsigned long growby (long grow)
TCHARstrpos ()
TCHARparse (TCHAR *s, unsigned long optmsk=parse_noset)
bool parse_file (const TCHAR *path, unsigned long optmsk=parse_noset, unsigned long tempsize=4096)
TCHARparse (TCHAR *s, xml_node_struct *xmldoc, long growby, unsigned long optmsk=parse_default)

Protected Attributes

xml_node_struct_xmldoc
long _growby
bool _autdel
TCHAR_buffer
TCHAR_strpos
unsigned long _optmsk

Detailed Description

Definition at line 3814 of file pugxml.h.


Constructor & Destructor Documentation

pug::xml_parser::xml_parser ( unsigned long  optmsk = parse_default,
bool  autdel = true,
long  growby = parse_grow 
) [inline]

Definition at line 3845 of file pugxml.h.

03845                                                                                                   :
03846         _xmldoc(0),
03847         _growby(growby),
03848         _autdel(autdel),
03849         _buffer(0),
03850         _strpos(0),
03851         _optmsk(optmsk)
03852 #ifdef PUGOPT_MEMFIL
03853         ,
03854         _mmfile(0),
03855         _mmfmap(0),
03856         _mmaddr(0),
03857         _mfsize(0),
03858         _addeos(false)
03859 #endif
03860         {
03861         }

pug::xml_parser::xml_parser ( TCHAR xmlstr,
unsigned long  optmsk = parse_default,
bool  autdel = true,
long  growby = parse_grow 
) [inline]

Definition at line 3872 of file pugxml.h.

03872                                                                                                                  :
03873         _xmldoc(0),
03874         _growby(growby),
03875         _autdel(autdel),
03876         _buffer(0),
03877         _strpos(0),
03878         _optmsk(optmsk)
03879 #ifdef PUGOPT_MEMFIL
03880         ,
03881         _mmfile(0),
03882         _mmfmap(0),
03883         _mmaddr(0),
03884         _mfsize(0),
03885         _addeos(false)
03886 #endif
03887         {
03888           parse( xmlstr, _optmsk ); //Parse it.
03889         }

virtual pug::xml_parser::~xml_parser (  )  [inline, virtual]

Definition at line 3893 of file pugxml.h.

References pug::free_node().

03894         {
03895           if(_autdel && _xmldoc) free_node(_xmldoc);
03896           if(_buffer) free(_buffer);
03897 #ifdef PUGOPT_MEMFIL
03898           close_memfile();
03899 #endif
03900         }

Here is the call graph for this function:


Member Function Documentation

pug::xml_parser::operator xml_node_struct * (  )  [inline]

Definition at line 3905 of file pugxml.h.

03905 { return _xmldoc; } //Cast as xml_node_struct pointer to root.

pug::xml_parser::operator xml_node (  )  [inline]

Definition at line 3906 of file pugxml.h.

03906 { return xml_node(_xmldoc); } //Cast as xml_node (same as document).

xml_node pug::xml_parser::document (  )  [inline]

Definition at line 3907 of file pugxml.h.

Referenced by claraty::Parameter_Parser::_get_variable(), claraty::Parameter_Parser::document(), and claraty::Parameter_Parser::has_tag_name().

03907 { return xml_node(_xmldoc); } //Returns the root wrapped by an xml_node.

void pug::xml_parser::create (  )  [inline]

Definition at line 3914 of file pugxml.h.

References claraty::clear(), pug::new_node(), and pug::node_document.

03915         {
03916           clear(); //Free any allocated memory.
03917           _xmldoc = new_node(node_document); //Allocate a new root.
03918           _xmldoc->parent = _xmldoc; //Point to self.
03919         }

Here is the call graph for this function:

void pug::xml_parser::clear (  )  [inline]

Definition at line 3923 of file pugxml.h.

References pug::free_node().

03924         {
03925           if(_xmldoc){ free_node(_xmldoc); _xmldoc = 0; }
03926           if(_buffer){ free(_buffer); _buffer = 0; }
03927 #ifdef PUGOPT_MEMFIL
03928           close_memfile();
03929 #endif
03930         }

Here is the call graph for this function:

xml_node_struct* pug::xml_parser::attach ( xml_node_struct root  )  [inline]

Definition at line 3972 of file pugxml.h.

References pug::t_xml_node_struct::parent.

03973         {
03974           xml_node_struct* t = _xmldoc; //Save this root.
03975           _xmldoc = root; //Assign.
03976           _xmldoc->parent = _xmldoc; //Ensure we are the root.
03977           return t; //Return the old root if any.
03978         }

xml_node_struct* pug::xml_parser::detach (  )  [inline]

Definition at line 3982 of file pugxml.h.

03983         {
03984           xml_node_struct* t = _xmldoc; //Save this root.
03985           _xmldoc = 0; //So we don't delete later on if autodelete set.
03986           return t; //Return the old root if any.
03987         }

unsigned long pug::xml_parser::options (  )  [inline]

Definition at line 3991 of file pugxml.h.

03991 { return _optmsk; }

unsigned long pug::xml_parser::options ( unsigned long  optmsk  )  [inline]

Definition at line 3996 of file pugxml.h.

03997         {
03998           unsigned long prev = _optmsk;
03999           _optmsk = optmsk;
04000           return prev;
04001         }

unsigned long pug::xml_parser::growby (  )  [inline]

Definition at line 4005 of file pugxml.h.

04005 { return _growby; }

unsigned long pug::xml_parser::growby ( long  grow  )  [inline]

Definition at line 4010 of file pugxml.h.

04011         {
04012           long prev = _growby;
04013           _growby = grow;
04014           return prev;
04015         }

TCHAR* pug::xml_parser::strpos (  )  [inline]

Definition at line 4023 of file pugxml.h.

04024         {
04025           return _strpos;
04026         }

TCHAR* pug::xml_parser::parse ( TCHAR s,
unsigned long  optmsk = parse_noset 
) [inline]

Definition at line 4036 of file pugxml.h.

References claraty::clear(), pug::new_node(), pug::node_document, and pug::parse_noset.

Referenced by parse().

04037         {
04038           if(!s) return s;
04039           clear(); //Free any allocated memory.
04040           _xmldoc = new_node(node_document); //Allocate a new root.
04041           _xmldoc->parent = _xmldoc; //Point to self.
04042           if(optmsk != parse_noset) _optmsk = optmsk;
04043           return parse( s, _xmldoc, _growby, _optmsk ); // Parse the input string.
04044           // return pug::parse( s, _xmldoc, _growby, _optmsk ); //Parse the input string.
04045         }

Here is the call graph for this function:

bool pug::xml_parser::parse_file ( const TCHAR path,
unsigned long  optmsk = parse_noset,
unsigned long  tempsize = 4096 
) [inline]

Definition at line 4056 of file pugxml.h.

References claraty::clear(), pug::load_file(), pug::new_node(), pug::node_document, pug::parse_noset, and pug::parse_wnorm.

Referenced by claraty::Parameter_Parser::Parameter_Parser().

04057         {
04058 #ifdef PUGOPT_NONSEG
04059           assert((optmsk & parse_wnorm) == 0); // Normalization isn't implemented for non-segmented strings, as of 24 Mar 2003
04060 #endif
04061 
04062           if(!path) return false;
04063           clear(); //clear any existing data.
04064           unsigned long bytes;
04065           if(optmsk != parse_noset) _optmsk = optmsk;
04066           if(load_file(path, &_buffer, &bytes, tempsize) && bytes > 0) {
04067             _xmldoc = pug::new_node(node_document);
04068             _xmldoc->parent = _xmldoc; //Point to self.
04069             // TCHAR* s = pug::parse(_buffer,_xmldoc,_growby,_optmsk);
04070             TCHAR* s = parse( _buffer, _xmldoc, _growby, _optmsk );
04071             _strpos = s;
04072             return true;
04073           }
04074           return false;
04075         }

Here is the call graph for this function:

TCHAR * pug::xml_parser::parse ( TCHAR s,
xml_node_struct xmldoc,
long  growby,
unsigned long  optmsk = parse_default 
) [inline]

Definition at line 4209 of file pugxml.h.

References _T, _tcscmp, _tcsncmp, pug::append_attribute(), pug::append_node(), pug::chartype_close(), pug::chartype_dash(), pug::chartype_enter(), pug::chartype_equals(), pug::chartype_lbracket(), pug::chartype_leave(), pug::chartype_pi(), pug::chartype_quote(), pug::chartype_rbracket(), pug::chartype_space(), pug::chartype_special(), pug::chartype_symbol(), ENDSEG, pug::t_xml_node_struct::name, pug::t_xml_attribute_struct::name, pug::node_cdata, pug::node_comment, pug::node_doctype, pug::node_dtd_attlist, pug::node_dtd_element, pug::node_dtd_entity, pug::node_dtd_notation, pug::node_include, pug::node_pcdata, pug::node_pi, OPTSET, parse(), pug::parse_cdata, pug::parse_comments, pug::parse_doctype, pug::parse_dtd, pug::parse_dtd_only, pug::parse_pi, pug::parse_trim_attribute, pug::parse_trim_cdata, pug::parse_trim_comment, pug::parse_trim_doctype, pug::parse_trim_entity, pug::parse_trim_pcdata, pug::parse_wnorm, POPNODE, PUSHNODE, SCANFOR, SCANWHILE, SKIPWS, pug::strwnorm(), pug::strwtrim(), pug::t_xml_node_struct::type, pug::t_xml_attribute_struct::value, and pug::t_xml_node_struct::value.

04211     {
04212       if(!s || !xmldoc) return s;
04213       TCHAR ch = 0; //Current char, in cases where we must null-terminate before we test.
04214       xml_node_struct* cursor = xmldoc; //Tree node cursor.
04215       TCHAR* mark = s; //Marked string position for temporary look-ahead.
04216       while(*s!=0)
04217         {
04218         LOC_SEARCH: //Obliviously search for next element.
04219           SCANFOR(chartype_enter(*s)); //Find the next '<'.
04220           if(chartype_enter(*s))
04221             {
04222               ++s;
04223             LOC_CLASSIFY: //What kind of element?
04224               if(chartype_pi(*s)) //'<?...'
04225                 {
04226                   ++s;
04227                   if(chartype_symbol(*s) && OPTSET(parse_pi))
04228                     {
04229                       mark = s;
04230                       SCANFOR(chartype_pi(*s)); //Look for terminating '?'.
04231 #ifndef PUGOPT_NONSEG
04232                       if(chartype_pi(*s)) *s = _T('/'); //Same semantics as for '<.../>', so fudge it.
04233 #endif
04234                       s = mark;
04235                       PUSHNODE(node_pi); //Append a new node on the tree.
04236                       goto LOC_ELEMENT; //Go read the element name.
04237                     }
04238                   else //Bad PI or parse_pi not set.
04239                     {
04240                       SCANFOR(chartype_leave(*s)); //Look for '>'.
04241                       ++s;
04242                       mark = 0;
04243                       continue;
04244                     }
04245                 }
04246               else if(chartype_special(*s)) //'<!...'
04247                 {
04248                   ++s;
04249                   if(chartype_dash(*s)) //'<!-...'
04250                     {
04251                       ++s;
04252                       if(OPTSET(parse_comments) && chartype_dash(*s)) //'<!--...'
04253                         {
04254                           ++s;
04255                           PUSHNODE(node_comment); //Append a new node on the tree.
04256                           cursor->value = s; //Save the offset.
04257                           while(*s!=0 && *(s+1) && *(s+2) && !((chartype_dash(*s) && chartype_dash(*(s+1))) && chartype_leave(*(s+2)))) ++s; //Scan for terminating '-->'.
04258                           if(*s==0) return s;
04259 #ifdef PUGOPT_NONSEG
04260                           SETLEN(); //NF 19 Jan 2003.
04261 #else
04262                           *s = 0; //Zero-terminate this segment at the first terminating '-'.
04263 #endif
04264                           if(OPTSET(parse_trim_comment)) //Trim whitespace.
04265                             {
04266 #ifdef PUGOPT_NONSEG
04267                               strwtrim(&cursor->value,cursor->value_size);
04268 #else
04269                               if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04270                               else strwtrim(&cursor->value);
04271 #endif
04272                             }
04273                           s += 2; //Step over the '\0-'.
04274                           POPNODE(); //Pop since this is a standalone.
04275                           goto LOC_LEAVE; //Look for any following PCDATA.
04276                         }
04277                       else
04278                         {
04279                           while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !((chartype_dash(*s) && chartype_dash(*(s+1))) && chartype_leave(*(s+2)))) ++s; //Scan for terminating '-->'.
04280                           if(*s==0) return s;
04281                           s += 2;
04282                           goto LOC_LEAVE; //Look for any following PCDATA.
04283                         }
04284                     }
04285                   else if(chartype_lbracket(*s)) //'<![...'
04286                     {
04287                       ++s;
04288                       if(*s==_T('I')) //'<![I...'
04289                         {
04290                           ++s;
04291                           if(*s==_T('N')) //'<![IN...'
04292                             {
04293                               ++s;
04294                               if(*s==_T('C')) //'<![INC...'
04295                                 {
04296                                   ++s;
04297                                   if(*s==_T('L')) //'<![INCL...'
04298                                     {
04299                                       ++s;
04300                                       if(*s==_T('U')) //'<![INCLU...'
04301                                         {
04302                                           ++s;
04303                                           if(*s==_T('D')) //'<![INCLUD...'
04304                                             {
04305                                               ++s;
04306                                               if(*s==_T('E')) //'<![INCLUDE...'
04307                                                 {
04308                                                   ++s;
04309                                                   if(chartype_lbracket(*s)) //'<![INCLUDE[...'
04310                                                     {
04311                                                       ++s;
04312                                                       if(OPTSET(node_cdata))
04313                                                         {
04314                                                           PUSHNODE(node_include); //Append a new node on the tree.
04315                                                           cursor->value = s; //Save the offset.
04316                                                           while(!(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
04317                                                           if(chartype_rbracket(*s))
04318                                                             {
04319 #ifdef PUGOPT_NONSEG
04320                                                               SETLEN(); //NF 19 Jan 2003.
04321 #else
04322                                                               *s = 0; //Zero-terminate this segment.
04323 #endif
04324                                                               ++s;
04325                                                               if(OPTSET(parse_trim_cdata)) //Trim whitespace.
04326                                                                 {
04327 #ifdef PUGOPT_NONSEG
04328                                                                   strwtrim(&cursor->value, cursor->value_size);
04329 #else
04330                                                                   if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04331                                                                   else strwtrim(&cursor->value);
04332 #endif
04333                                                                 }
04334                                                             }
04335                                                           POPNODE(); //Pop since this is a standalone.
04336                                                         }
04337                                                       else //Flagged for discard, but we still have to scan for the terminator.
04338                                                         {
04339                                                           while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
04340                                                           ++s;
04341                                                         }
04342                                                       ++s; //Step over the last ']'.
04343                                                       goto LOC_LEAVE; //Look for any following PCDATA.
04344                                                     }
04345                                                 }
04346                                             }
04347                                         }
04348                                     }
04349                                 }
04350                             }
04351                         }
04352                       else if(*s==_T('C')) //'<![C...'
04353                         {
04354                           ++s;
04355                           if(*s==_T('D')) //'<![CD...'
04356                             {
04357                               ++s;
04358                               if(*s==_T('A')) //'<![CDA...'
04359                                 {
04360                                   ++s;
04361                                   if(*s==_T('T')) //'<![CDAT...'
04362                                     {
04363                                       ++s;
04364                                       if(*s==_T('A')) //'<![CDATA...'
04365                                         {
04366                                           ++s;
04367                                           if(chartype_lbracket(*s)) //'<![CDATA[...'
04368                                             {
04369                                               ++s;
04370                                               if(OPTSET(parse_cdata))
04371                                                 {
04372                                                   PUSHNODE(node_cdata); //Append a new node on the tree.
04373                                                   cursor->value = s; //Save the offset.
04374                                                   while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
04375                                                   if(*(s+2)==0) return s; //Very badly formed.
04376                                                   if(chartype_rbracket(*s))
04377                                                     {
04378 #ifdef PUGOPT_NONSEG
04379                                                       SETLEN(); //NF 19 Jan 2003.
04380 #else
04381                                                       *s = 0; //Zero-terminate this segment.
04382 #endif
04383                                                       ++s;
04384                                                       if(OPTSET(parse_trim_cdata)) //Trim whitespace.
04385                                                         {
04386 #ifdef PUGOPT_NONSEG
04387                                                           strwtrim(&cursor->value,cursor->value_size);
04388 #else
04389                                                           if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04390                                                           else strwtrim(&cursor->value);
04391 #endif
04392                                                         }
04393                                                     }
04394                                                   POPNODE(); //Pop since this is a standalone.
04395                                                 }
04396                                               else //Flagged for discard, but we still have to scan for the terminator.
04397                                                 {
04398                                                   while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'.
04399                                                   ++s;
04400                                                 }
04401                                               ++s; //Step over the last ']'.
04402                                               goto LOC_LEAVE; //Look for any following PCDATA.
04403                                             }
04404                                         }
04405                                     }
04406                                 }
04407                             }
04408                         }
04409                       continue; //Probably a corrupted CDATA section, so just eat it.
04410                     }
04411                   else if(*s==_T('D')) //'<!D...'
04412                     {
04413                       ++s;
04414                       if(*s==_T('O')) //'<!DO...'
04415                         {
04416                           ++s;
04417                           if(*s==_T('C')) //'<!DOC...'
04418                             {
04419                               ++s;
04420                               if(*s==_T('T')) //'<!DOCT...'
04421                                 {
04422                                   ++s;
04423                                   if(*s==_T('Y')) //'<!DOCTY...'
04424                                     {
04425                                       ++s;
04426                                       if(*s==_T('P')) //'<!DOCTYP...'
04427                                         {
04428                                           ++s;
04429                                           if(*s==_T('E')) //'<!DOCTYPE...'
04430                                             {
04431                                               ++s;
04432                                               SKIPWS(); //Eat any whitespace.
04433                                               xml_attribute_struct* a = 0;
04434                                               if(OPTSET(parse_doctype))
04435                                                 {
04436                                                   PUSHNODE(node_doctype); //Append a new node on the tree.
04437                                                   a = append_attribute(cursor,3); //Store the DOCTYPE name.
04438                                                   a->value = a->name = s; //Save the offset.
04439                                                   // Why are value & name set to the same thing here and below. NF 25 Mar 2003
04440                                                   // Also why not store this in node.name. See my CP post. NF 25 Mar 2003
04441                                                 }
04442                                               SCANWHILE(chartype_symbol(*s)); //'<!DOCTYPE symbol...'
04443 #ifdef PUGOPT_NONSEG
04444                                               if(OPTSET(parse_doctype))
04445                                                 a->name_size = a->value_size = s - a->value; //Save the length. rem: Before ENDSEG()
04446 #endif
04447                                               ENDSEG(); //Save char in 'ch', terminate & step over.
04448                                               if(chartype_space(ch)) SKIPWS(); //Eat any whitespace.
04449                                             LOC_DOCTYPE_SYMBOL:
04450                                               if(chartype_symbol(*s))
04451                                                 {
04452                                                   mark = s;
04453                                                   SCANWHILE(chartype_symbol(*s)); //'...symbol SYSTEM...'
04454                                                   if(OPTSET(parse_doctype))
04455                                                     {
04456                                                       a = append_attribute(cursor,1);
04457                                                       a->value = a->name = mark;
04458 #ifdef PUGOPT_NONSEG
04459                                                       a->value_size = a->name_size = s - mark; //NF 19 Jan 2003.
04460 #else
04461                                                       *s = 0;
04462 #endif
04463                                                     }
04464                                                   ++s;
04465                                                   SKIPWS();
04466                                                 }
04467                                               if(chartype_quote(*s)) //'...SYSTEM "..."'
04468                                                 {
04469                                                 LOC_DOCTYPE_QUOTE:
04470                                                   ch = *s;
04471                                                   ++s;
04472                                                   mark = s;
04473                                                   while(*s!=0 && *s != ch) ++s;
04474                                                   if(*s!=0)
04475                                                     {
04476                                                       if(OPTSET(parse_doctype))
04477                                                         {
04478                                                           a = append_attribute(cursor,1);
04479                                                           a->value = mark;
04480 #ifdef PUGOPT_NONSEG
04481                                                           a->value_size = s - mark; //NF 19 Jan 2003.
04482 #else
04483                                                           *s = 0;
04484 #endif
04485                                                         }
04486                                                       ++s;
04487                                                       SKIPWS(); //Eat whitespace.
04488                                                       if(chartype_quote(*s)) goto LOC_DOCTYPE_QUOTE; //Another quoted section to store.
04489                                                       else if(chartype_symbol(*s)) goto LOC_DOCTYPE_SYMBOL; //Not wellformed, but just parse it.
04490                                                     }
04491                                                 }
04492                                               if(chartype_lbracket(*s)) //'...[...'
04493                                                 {
04494                                                   ++s; //Step over the bracket.
04495                                                   if(OPTSET(parse_doctype)) cursor->value = s; //Store the offset.
04496                                                   unsigned int bd = 1; //Bracket depth counter.
04497                                                   while(*s!=0) //Loop till we're out of all brackets.
04498                                                     {
04499                                                       if(chartype_rbracket(*s)) --bd;
04500                                                       else if(chartype_lbracket(*s)) ++bd;
04501                                                       if(bd == 0) break;
04502                                                       ++s;
04503                                                     }
04504                                                   //Note: 's' now points to end of DTD, i.e.: ']'.
04505                                                   if(OPTSET(parse_doctype))
04506                                                     {
04507                                                       //Note: If we aren't parsing the DTD ('!parse_dtd', etc.) then it is stored in the DOM as one whole chunk.
04508 #ifdef PUGOPT_NONSEG
04509                                                       SETLEN(); //NF 19 Jan 2003
04510 #else
04511                                                       *s = 0; //Zero-terminate.
04512 #endif
04513                                                       if(OPTSET(parse_dtd)||OPTSET(parse_dtd_only))
04514                                                         {
04515                                                           if(OPTSET(parse_dtd))
04516                                                             {
04517 #ifdef PUGOPT_NONSEG
04518                                                               TCHAR svch = *s;
04519                                                               try
04520                                                                 {
04521                                                                   *s = 0; //Zero-terminate.
04522                                                                   parse( cursor->value, cursor, growby, optmsk ); //Parse it.
04523                                                                   // cursor->value will contain the entire DTD.
04524                                                                   // probably better to set it to empty. See cmt below. NF 25 Mar 2003
04525                                                                 }
04526                                                               catch(...){ assert(false); }
04527                                                               *s = svch;
04528 #else
04529                                                               // this will change cursor->value. we'll finish up with
04530                                                               // a 0 after the first DTD item. I can't see any point
04531                                                               // in having cursor->value set after parse(). See cmt above & my CP post. NF 25 Mar 2003
04532                                                               parse( cursor->value, cursor, growby, optmsk ); //Parse it.
04533 #endif
04534                                                             }
04535                                                           if(OPTSET(parse_dtd_only)) return (s+1); //Flagged to parse DTD only, so leave here.
04536                                                         }
04537                                                       else if(OPTSET(parse_trim_doctype)) //Trim whitespace.
04538                                                         {
04539 #ifdef PUGOPT_NONSEG
04540                                                           strwtrim(&cursor->value, cursor->value_size);
04541 #else
04542                                                           if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04543                                                           else strwtrim(&cursor->value);
04544 #endif
04545                                                         }
04546                                                       ++s; //Step over the zero.
04547                                                       POPNODE(); //Pop since this is a standalone.
04548                                                     }
04549                                                   SCANFOR(chartype_leave(*s));
04550                                                   continue;
04551                                                 }
04552                                               //Fall-through; make sure we pop.
04553                                               POPNODE(); //Pop since this is a standalone.
04554                                               continue;
04555                                             }
04556                                         }
04557                                     }
04558                                 }
04559                             }
04560                         }
04561                     }
04562                   else if(chartype_symbol(*s)) //An inline DTD tag.
04563                     {
04564                       mark = s;
04565                       SCANWHILE(chartype_symbol(*s));
04566                       ENDSEG(); //Save char in 'ch', terminate & step over.
04567                       xml_node_type e = node_dtd_entity;
04568 #ifdef PUGOPT_NONSEG
04569                       const unsigned int dtdilen = (s - 1) - mark;
04570                       if(_tcsncmp(mark,_T("ATTLIST"),max((7*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_attlist;
04571                       else if(_tcsncmp(mark,_T("ELEMENT"),max((7*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_element;
04572                       else if(_tcsncmp(mark,_T("NOTATION"),max((8*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_notation;
04573 #else
04574                       if(_tcscmp(mark,_T("ATTLIST"))==0) e = node_dtd_attlist;
04575                       else if(_tcscmp(mark,_T("ELEMENT"))==0) e = node_dtd_element;
04576                       else if(_tcscmp(mark,_T("NOTATION"))==0) e = node_dtd_notation;
04577 #endif
04578                       PUSHNODE(e); //Append a new node on the tree.
04579                       if(*s!=0 && chartype_space(ch))
04580                         {
04581                           SKIPWS(); //Eat whitespace.
04582                           if(chartype_symbol(*s) || *s==_T('%'))
04583                             {
04584                               mark = s;
04585                               if(*s==_T('%')) //Could be '<!ENTITY % name' -or- '<!ENTITY %name'
04586                                 {
04587 #ifdef PUGOPT_NONSEG
04588                                   //Note: For memory-mapped file support we need to treat 's' as read-only so we can't do '*(s-1) = _T('%');' below.
04589                                   cursor->name = mark; //Sort out extraneous whitespace when we retrieve it. TODO: Whitespace cleanup.
04590 #endif
04591                                   ++s;
04592                                   if(chartype_space(*s))
04593                                     {
04594                                       SKIPWS(); //Eat whitespace.
04595 #ifndef PUGOPT_NONSEG
04596                                       *(s-1) = _T('%');
04597                                       cursor->name = (s-1);
04598 #endif
04599                                     }
04600 #ifndef PUGOPT_NONSEG
04601                                   else cursor->name = mark;
04602 #endif
04603                                 }
04604                               else cursor->name = s;
04605                               SCANWHILE(chartype_symbol(*s));
04606 #ifdef PUGOPT_NONSEG
04607                               cursor->name_size = s - cursor->name;
04608 #endif
04609                               ENDSEG(); //Save char in 'ch', terminate & step over.
04610                               if(chartype_space(ch))
04611                                 {
04612                                   SKIPWS(); //Eat whitespace.
04613                                   if(e == node_dtd_entity) //Special case; may have multiple quoted sections w/anything inside.
04614                                     {
04615                                       cursor->value = s; //Just store everything here.
04616                                       bool qq = false; //Quote in/out flag.
04617                                       while(*s != 0) //Loop till we find the right sequence.
04618                                         {
04619                                           if(!qq && chartype_quote(*s)){ ch = *s; qq = true; }
04620                                           else if(qq && *s == ch) qq = false;
04621                                           else if(!qq && chartype_leave(*s)) //Not in quoted reqion and '>' hit.
04622                                             {
04623 #ifdef PUGOPT_NONSEG
04624                                               SETLEN(); //NF 19 Jan 2003.
04625 #else
04626                                               *s = 0;
04627 #endif
04628                                               ++s;
04629                                               if(OPTSET(parse_trim_entity))
04630                                                 {
04631 #ifdef PUGOPT_NONSEG
04632                                                   strwtrim(&cursor->value,cursor->value_size);
04633 #else
04634                                                   if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04635                                                   else strwtrim(&cursor->value);
04636 #endif
04637                                                 }
04638                                               POPNODE();
04639                                               goto LOC_SEARCH;
04640                                             }
04641                                           ++s;
04642                                         }
04643                                       if(OPTSET(parse_trim_entity))
04644                                         {
04645 #ifdef PUGOPT_NONSEG
04646                                           strwtrim(&cursor->value, cursor->value_size);
04647 #else
04648                                           if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04649                                           else strwtrim(&cursor->value);
04650 #endif
04651                                         }
04652                                     }
04653                                   else
04654                                     {
04655                                       cursor->value = s;
04656                                       SCANFOR(chartype_leave(*s)); //Just look for '>'.
04657 #ifdef PUGOPT_NONSEG
04658                                       SETLEN(); //NF 19 Jan 2003.
04659 #else
04660                                       *s = 0;
04661 #endif
04662                                       ++s;
04663                                       if(OPTSET(parse_trim_entity))
04664                                         {
04665 #ifdef PUGOPT_NONSEG
04666                                           strwtrim(&cursor->value, cursor->value_size);
04667 #else
04668                                           if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04669                                           else strwtrim(&cursor->value);
04670 #endif
04671                                         }
04672                                       POPNODE();
04673                                       goto LOC_SEARCH;
04674                                     }
04675                                 }
04676                             }
04677                         }
04678                       POPNODE();
04679                     }
04680                 }
04681               else if(chartype_symbol(*s)) //'<#...'
04682                 {
04683                   cursor = append_node(cursor,growby); //Append a new node to the tree.
04684                 LOC_ELEMENT: //Scan for & store element name.
04685                   cursor->name = s;
04686                   SCANWHILE(chartype_symbol(*s)); //Scan for a terminator.
04687 #ifdef PUGOPT_NONSEG
04688                   cursor->name_size = s - cursor->name; //Note: Before ENDSEG().
04689 #endif
04690                   ENDSEG(); //Save char in 'ch', terminate & step over.
04691                   if
04692                     (
04693                      *s!=0 &&
04694                      (
04695                       chartype_close(ch) //'</...'
04696 #ifdef PUGOPT_NONSEG
04697                       //||
04698                       //chartype_pi(ch) //Treat '?>' as '/>' NF 19 Jan 2003
04699 #endif
04700                       )
04701                      )
04702                     {
04703                       SCANFOR(chartype_leave(*s)); //Scan for '>', stepping over the tag name.
04704                       POPNODE(); //Pop.
04705                       continue;
04706                     }
04707                   else if(*s!=0 && !chartype_space(ch)) goto LOC_PCDATA; //No attributes, so scan for PCDATA.
04708                   else if(*s!=0 && chartype_space(ch))
04709                     {
04710                       SKIPWS(); //Eat any whitespace.
04711                     LOC_ATTRIBUTE:
04712                       if(chartype_symbol(*s)) //<... #...
04713                         {
04714                           xml_attribute_struct* a = append_attribute(cursor,growby); //Make space for this attribute.
04715                           a->name = s; //Save the offset.
04716                           SCANWHILE(chartype_symbol(*s)); //Scan for a terminator.
04717 #ifdef PUGOPT_NONSEG
04718                           ENDSEGNAM(a);
04719 #else
04720                           ENDSEG(); //Save char in 'ch', terminate & step over.
04721 #endif
04722                           if(*s!=0 && chartype_space(ch)) SKIPWS(); //Eat any whitespace.
04723                           if(*s!=0 && (chartype_equals(ch) || chartype_equals(*s))) //'<... #=...'
04724                             {
04725                               if(chartype_equals(*s)) ++s;
04726                               SKIPWS(); //Eat any whitespace.
04727                               if(chartype_quote(*s)) //'<... #="...'
04728                                 {
04729                                   ch = *s; //Save quote char to avoid breaking on "''" -or- '""'.
04730                                   ++s; //Step over the quote.
04731                                   a->value = s; //Save the offset.
04732                                   SCANFOR(*s == ch); //Scan for the terminating quote, or '>'.
04733 #ifdef PUGOPT_NONSEG
04734                                   ENDSEGATT(a);
04735 #else
04736                                   ENDSEG(); //Save char in 'ch', terminate & step over.
04737 #endif
04738                                   if(OPTSET(parse_trim_attribute)) //Trim whitespace.
04739                                     {
04740 #ifdef PUGOPT_NONSEG
04741                                       strwtrim(&a->value,a->value_size);
04742 #else
04743                                       if(OPTSET(parse_wnorm)) strwnorm(&a->value);
04744                                       else strwtrim(&a->value);
04745 #endif
04746                                     }
04747 
04748 #ifdef PUGOPT_NODE_FLAGS        // NF 29 May 2003
04749                                 // If we have an "expanded=true" attribute set expand().
04750                                   if ( _tcsncmp( a->name, _T("expanded"), max( (8*sizeof(TCHAR)), a->name_size ) ) == 0
04751                                        && _tcsncmp( a->value, _T("true"), max( 4*sizeof(TCHAR), a->value_size ) ) == 0
04752                                        )
04753                                     {
04754                                       cursor->expand( true );
04755                                       _expanded_nodes.push_back( cursor );
04756                                     }
04757 #endif
04758 
04759                                   if(chartype_leave(*s)){ ++s; goto LOC_PCDATA; }
04760                                   else if(chartype_close(*s))
04761                                     {
04762                                       ++s;
04763                                       POPNODE();
04764                                       SKIPWS(); //Eat any whitespace.
04765                                       if(chartype_leave(*s)) ++s;
04766                                       goto LOC_PCDATA;
04767                                     }
04768                                   if(chartype_space(*s)) //This may indicate a following attribute.
04769                                     {
04770                                       SKIPWS(); //Eat any whitespace.
04771                                       goto LOC_ATTRIBUTE; //Go scan for additional attributes.
04772                                     }
04773                                 }
04774                             }
04775                           if(chartype_symbol(*s)) goto LOC_ATTRIBUTE;
04776                           else if(*s!=0 && cursor->type == node_pi)
04777                             {
04778 #ifdef PUGOPT_NONSEG
04779                               SCANFOR(chartype_pi(*s)); //compliments change where we don't fudge to '/>' when we find the PI. NF 20 Jan 2003
04780                               SKIPWS(); //Eat any whitespace.
04781                               if(chartype_pi(*s)) ++s;
04782 #else
04783                               SCANFOR(chartype_close(*s));
04784                               SKIPWS(); //Eat any whitespace.
04785                               if(chartype_close(*s)) ++s;
04786 #endif
04787                               SKIPWS(); //Eat any whitespace.
04788                               if(chartype_leave(*s)) ++s;
04789                               POPNODE();
04790                               goto LOC_PCDATA;
04791                             }
04792                         }
04793                     }
04794                 LOC_LEAVE:
04795                   if(chartype_leave(*s)) //'...>'
04796                     {
04797                       ++s; //Step over the '>'.
04798                     LOC_PCDATA: //'>...<'
04799                       mark = s; //Save this offset while searching for a terminator.
04800                       SKIPWS(); //Eat whitespace if no genuine PCDATA here.
04801                       if(chartype_enter(*s)) //We hit a '<...', with only whitespace, so don't bother storing anything.
04802                         {
04803                           if(chartype_close(*(s+1))) //'</...'
04804                             {
04805                               SCANFOR(chartype_leave(*s)); //Scan for '>', stepping over any end-tag name.
04806                               POPNODE(); //Pop.
04807                               continue; //Continue scanning.
04808                             }
04809                           else goto LOC_SEARCH; //Expect a new element enter, so go scan for it.
04810                         }
04811                       s = mark; //We hit something other than whitespace; restore the original offset.
04812                       PUSHNODE(node_pcdata); //Append a new node on the tree.
04813                       cursor->value = s; //Save the offset.
04814                       SCANFOR(chartype_enter(*s)); //'...<'
04815 #ifdef PUGOPT_NONSEG
04816                       ENDSEGDAT();
04817 #else
04818                       ENDSEG(); //Save char in 'ch', terminate & step over.
04819 #endif
04820                       if(OPTSET(parse_trim_pcdata)) //Trim whitespace.
04821                         {
04822 #ifdef PUGOPT_NONSEG
04823                           strwtrim(&cursor->value,cursor->value_size);
04824 #else
04825                           if(OPTSET(parse_wnorm)) strwnorm(&cursor->value);
04826                           else strwtrim(&cursor->value);
04827 #endif
04828                         }
04829                       POPNODE(); //Pop since this is a standalone.
04830                       if(chartype_enter(ch)) //Did we hit a '<...'?
04831                         {
04832                           if(chartype_close(*s)) //'</...'
04833                             {
04834                               SCANFOR(chartype_leave(*s)); //'...>'
04835                               POPNODE(); //Pop.
04836                               goto LOC_LEAVE;
04837                             }
04838                           else if(chartype_special(*s)) goto LOC_CLASSIFY; //We hit a '<!...'. We must test this here if we want comments intermixed w/PCDATA.
04839                           else if(*s) goto LOC_CLASSIFY;
04840                           else return s;
04841                         }
04842                     }
04843                   //Fall-through A.
04844                   else if(chartype_close(*s)) //'.../'
04845                     {
04846                       ++s;
04847                       if(chartype_leave(*s)) //'.../>'
04848                         {
04849                           POPNODE(); //Pop.
04850                           ++s;
04851                           continue;
04852                         }
04853                     }
04854                 }
04855               //Fall-through B.
04856               else if(chartype_close(*s)) //'.../'
04857                 {
04858                   SCANFOR(chartype_leave(*s)); //'.../>'
04859                   POPNODE(); //Pop.
04860                   continue;
04861                 }
04862             }
04863         }
04864       return s;
04865     }  // xml_parser::parse()

Here is the call graph for this function:


Member Data Documentation

Definition at line 3819 of file pugxml.h.

long pug::xml_parser::_growby [protected]

Definition at line 3820 of file pugxml.h.

bool pug::xml_parser::_autdel [protected]

Definition at line 3821 of file pugxml.h.

Definition at line 3822 of file pugxml.h.

Definition at line 3823 of file pugxml.h.

unsigned long pug::xml_parser::_optmsk [protected]

Definition at line 3824 of file pugxml.h.


The documentation for this class was generated from the following file: