pug::xml_parser Class Reference
#include <pugxml.h>
Collaboration diagram for pug::xml_parser:

Public Member Functions | |
| xml_parser (unsigned long optmsk=parse_default, bool autdel=true, long growby=parse_grow) | |
| xml_parser (TCHAR *xmlstr, unsigned long optmsk=parse_default, bool autdel=true, long growby=parse_grow) | |
| virtual | ~xml_parser () |
| operator xml_node_struct * () | |
| operator xml_node () | |
| xml_node | document () |
| void | create () |
| void | clear () |
| xml_node_struct * | attach (xml_node_struct *root) |
| xml_node_struct * | detach () |
| unsigned long | options () |
| unsigned long | options (unsigned long optmsk) |
| unsigned long | growby () |
| unsigned long | growby (long grow) |
| TCHAR * | strpos () |
| TCHAR * | parse (TCHAR *s, unsigned long optmsk=parse_noset) |
| bool | parse_file (const TCHAR *path, unsigned long optmsk=parse_noset, unsigned long tempsize=4096) |
| TCHAR * | parse (TCHAR *s, xml_node_struct *xmldoc, long growby, unsigned long optmsk=parse_default) |
Protected Attributes | |
| xml_node_struct * | _xmldoc |
| long | _growby |
| bool | _autdel |
| TCHAR * | _buffer |
| TCHAR * | _strpos |
| unsigned long | _optmsk |
Detailed Description
Definition at line 3814 of file pugxml.h.
Constructor & Destructor Documentation
| pug::xml_parser::xml_parser | ( | unsigned long | optmsk = parse_default, |
|
| bool | autdel = true, |
|||
| long | growby = parse_grow | |||
| ) | [inline] |
Definition at line 3845 of file pugxml.h.
03845 : 03846 _xmldoc(0), 03847 _growby(growby), 03848 _autdel(autdel), 03849 _buffer(0), 03850 _strpos(0), 03851 _optmsk(optmsk) 03852 #ifdef PUGOPT_MEMFIL 03853 , 03854 _mmfile(0), 03855 _mmfmap(0), 03856 _mmaddr(0), 03857 _mfsize(0), 03858 _addeos(false) 03859 #endif 03860 { 03861 }
| pug::xml_parser::xml_parser | ( | TCHAR * | xmlstr, | |
| unsigned long | optmsk = parse_default, |
|||
| bool | autdel = true, |
|||
| long | growby = parse_grow | |||
| ) | [inline] |
Definition at line 3872 of file pugxml.h.
03872 : 03873 _xmldoc(0), 03874 _growby(growby), 03875 _autdel(autdel), 03876 _buffer(0), 03877 _strpos(0), 03878 _optmsk(optmsk) 03879 #ifdef PUGOPT_MEMFIL 03880 , 03881 _mmfile(0), 03882 _mmfmap(0), 03883 _mmaddr(0), 03884 _mfsize(0), 03885 _addeos(false) 03886 #endif 03887 { 03888 parse( xmlstr, _optmsk ); //Parse it. 03889 }
| virtual pug::xml_parser::~xml_parser | ( | ) | [inline, virtual] |
Member Function Documentation
| pug::xml_parser::operator xml_node_struct * | ( | ) | [inline] |
| pug::xml_parser::operator xml_node | ( | ) | [inline] |
| xml_node pug::xml_parser::document | ( | ) | [inline] |
Definition at line 3907 of file pugxml.h.
Referenced by claraty::Parameter_Parser::_get_variable(), claraty::Parameter_Parser::document(), and claraty::Parameter_Parser::has_tag_name().
03907 { return xml_node(_xmldoc); } //Returns the root wrapped by an xml_node.
| void pug::xml_parser::create | ( | ) | [inline] |
Definition at line 3914 of file pugxml.h.
References claraty::clear(), pug::new_node(), and pug::node_document.
03915 { 03916 clear(); //Free any allocated memory. 03917 _xmldoc = new_node(node_document); //Allocate a new root. 03918 _xmldoc->parent = _xmldoc; //Point to self. 03919 }
Here is the call graph for this function:

| void pug::xml_parser::clear | ( | ) | [inline] |
| xml_node_struct* pug::xml_parser::attach | ( | xml_node_struct * | root | ) | [inline] |
Definition at line 3972 of file pugxml.h.
References pug::t_xml_node_struct::parent.
03973 { 03974 xml_node_struct* t = _xmldoc; //Save this root. 03975 _xmldoc = root; //Assign. 03976 _xmldoc->parent = _xmldoc; //Ensure we are the root. 03977 return t; //Return the old root if any. 03978 }
| xml_node_struct* pug::xml_parser::detach | ( | ) | [inline] |
Definition at line 3982 of file pugxml.h.
03983 { 03984 xml_node_struct* t = _xmldoc; //Save this root. 03985 _xmldoc = 0; //So we don't delete later on if autodelete set. 03986 return t; //Return the old root if any. 03987 }
| unsigned long pug::xml_parser::options | ( | ) | [inline] |
| unsigned long pug::xml_parser::options | ( | unsigned long | optmsk | ) | [inline] |
| unsigned long pug::xml_parser::growby | ( | ) | [inline] |
| unsigned long pug::xml_parser::growby | ( | long | grow | ) | [inline] |
| TCHAR* pug::xml_parser::strpos | ( | ) | [inline] |
| TCHAR* pug::xml_parser::parse | ( | TCHAR * | s, | |
| unsigned long | optmsk = parse_noset | |||
| ) | [inline] |
Definition at line 4036 of file pugxml.h.
References claraty::clear(), pug::new_node(), pug::node_document, and pug::parse_noset.
Referenced by parse().
04037 { 04038 if(!s) return s; 04039 clear(); //Free any allocated memory. 04040 _xmldoc = new_node(node_document); //Allocate a new root. 04041 _xmldoc->parent = _xmldoc; //Point to self. 04042 if(optmsk != parse_noset) _optmsk = optmsk; 04043 return parse( s, _xmldoc, _growby, _optmsk ); // Parse the input string. 04044 // return pug::parse( s, _xmldoc, _growby, _optmsk ); //Parse the input string. 04045 }
Here is the call graph for this function:

| bool pug::xml_parser::parse_file | ( | const TCHAR * | path, | |
| unsigned long | optmsk = parse_noset, |
|||
| unsigned long | tempsize = 4096 | |||
| ) | [inline] |
Definition at line 4056 of file pugxml.h.
References claraty::clear(), pug::load_file(), pug::new_node(), pug::node_document, pug::parse_noset, and pug::parse_wnorm.
Referenced by claraty::Parameter_Parser::Parameter_Parser().
04057 { 04058 #ifdef PUGOPT_NONSEG 04059 assert((optmsk & parse_wnorm) == 0); // Normalization isn't implemented for non-segmented strings, as of 24 Mar 2003 04060 #endif 04061 04062 if(!path) return false; 04063 clear(); //clear any existing data. 04064 unsigned long bytes; 04065 if(optmsk != parse_noset) _optmsk = optmsk; 04066 if(load_file(path, &_buffer, &bytes, tempsize) && bytes > 0) { 04067 _xmldoc = pug::new_node(node_document); 04068 _xmldoc->parent = _xmldoc; //Point to self. 04069 // TCHAR* s = pug::parse(_buffer,_xmldoc,_growby,_optmsk); 04070 TCHAR* s = parse( _buffer, _xmldoc, _growby, _optmsk ); 04071 _strpos = s; 04072 return true; 04073 } 04074 return false; 04075 }
Here is the call graph for this function:

| TCHAR * pug::xml_parser::parse | ( | TCHAR * | s, | |
| xml_node_struct * | xmldoc, | |||
| long | growby, | |||
| unsigned long | optmsk = parse_default | |||
| ) | [inline] |
Definition at line 4209 of file pugxml.h.
References _T, _tcscmp, _tcsncmp, pug::append_attribute(), pug::append_node(), pug::chartype_close(), pug::chartype_dash(), pug::chartype_enter(), pug::chartype_equals(), pug::chartype_lbracket(), pug::chartype_leave(), pug::chartype_pi(), pug::chartype_quote(), pug::chartype_rbracket(), pug::chartype_space(), pug::chartype_special(), pug::chartype_symbol(), ENDSEG, pug::t_xml_node_struct::name, pug::t_xml_attribute_struct::name, pug::node_cdata, pug::node_comment, pug::node_doctype, pug::node_dtd_attlist, pug::node_dtd_element, pug::node_dtd_entity, pug::node_dtd_notation, pug::node_include, pug::node_pcdata, pug::node_pi, OPTSET, parse(), pug::parse_cdata, pug::parse_comments, pug::parse_doctype, pug::parse_dtd, pug::parse_dtd_only, pug::parse_pi, pug::parse_trim_attribute, pug::parse_trim_cdata, pug::parse_trim_comment, pug::parse_trim_doctype, pug::parse_trim_entity, pug::parse_trim_pcdata, pug::parse_wnorm, POPNODE, PUSHNODE, SCANFOR, SCANWHILE, SKIPWS, pug::strwnorm(), pug::strwtrim(), pug::t_xml_node_struct::type, pug::t_xml_attribute_struct::value, and pug::t_xml_node_struct::value.
04211 { 04212 if(!s || !xmldoc) return s; 04213 TCHAR ch = 0; //Current char, in cases where we must null-terminate before we test. 04214 xml_node_struct* cursor = xmldoc; //Tree node cursor. 04215 TCHAR* mark = s; //Marked string position for temporary look-ahead. 04216 while(*s!=0) 04217 { 04218 LOC_SEARCH: //Obliviously search for next element. 04219 SCANFOR(chartype_enter(*s)); //Find the next '<'. 04220 if(chartype_enter(*s)) 04221 { 04222 ++s; 04223 LOC_CLASSIFY: //What kind of element? 04224 if(chartype_pi(*s)) //'<?...' 04225 { 04226 ++s; 04227 if(chartype_symbol(*s) && OPTSET(parse_pi)) 04228 { 04229 mark = s; 04230 SCANFOR(chartype_pi(*s)); //Look for terminating '?'. 04231 #ifndef PUGOPT_NONSEG 04232 if(chartype_pi(*s)) *s = _T('/'); //Same semantics as for '<.../>', so fudge it. 04233 #endif 04234 s = mark; 04235 PUSHNODE(node_pi); //Append a new node on the tree. 04236 goto LOC_ELEMENT; //Go read the element name. 04237 } 04238 else //Bad PI or parse_pi not set. 04239 { 04240 SCANFOR(chartype_leave(*s)); //Look for '>'. 04241 ++s; 04242 mark = 0; 04243 continue; 04244 } 04245 } 04246 else if(chartype_special(*s)) //'<!...' 04247 { 04248 ++s; 04249 if(chartype_dash(*s)) //'<!-...' 04250 { 04251 ++s; 04252 if(OPTSET(parse_comments) && chartype_dash(*s)) //'<!--...' 04253 { 04254 ++s; 04255 PUSHNODE(node_comment); //Append a new node on the tree. 04256 cursor->value = s; //Save the offset. 04257 while(*s!=0 && *(s+1) && *(s+2) && !((chartype_dash(*s) && chartype_dash(*(s+1))) && chartype_leave(*(s+2)))) ++s; //Scan for terminating '-->'. 04258 if(*s==0) return s; 04259 #ifdef PUGOPT_NONSEG 04260 SETLEN(); //NF 19 Jan 2003. 04261 #else 04262 *s = 0; //Zero-terminate this segment at the first terminating '-'. 04263 #endif 04264 if(OPTSET(parse_trim_comment)) //Trim whitespace. 04265 { 04266 #ifdef PUGOPT_NONSEG 04267 strwtrim(&cursor->value,cursor->value_size); 04268 #else 04269 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04270 else strwtrim(&cursor->value); 04271 #endif 04272 } 04273 s += 2; //Step over the '\0-'. 04274 POPNODE(); //Pop since this is a standalone. 04275 goto LOC_LEAVE; //Look for any following PCDATA. 04276 } 04277 else 04278 { 04279 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !((chartype_dash(*s) && chartype_dash(*(s+1))) && chartype_leave(*(s+2)))) ++s; //Scan for terminating '-->'. 04280 if(*s==0) return s; 04281 s += 2; 04282 goto LOC_LEAVE; //Look for any following PCDATA. 04283 } 04284 } 04285 else if(chartype_lbracket(*s)) //'<![...' 04286 { 04287 ++s; 04288 if(*s==_T('I')) //'<![I...' 04289 { 04290 ++s; 04291 if(*s==_T('N')) //'<![IN...' 04292 { 04293 ++s; 04294 if(*s==_T('C')) //'<![INC...' 04295 { 04296 ++s; 04297 if(*s==_T('L')) //'<![INCL...' 04298 { 04299 ++s; 04300 if(*s==_T('U')) //'<![INCLU...' 04301 { 04302 ++s; 04303 if(*s==_T('D')) //'<![INCLUD...' 04304 { 04305 ++s; 04306 if(*s==_T('E')) //'<![INCLUDE...' 04307 { 04308 ++s; 04309 if(chartype_lbracket(*s)) //'<![INCLUDE[...' 04310 { 04311 ++s; 04312 if(OPTSET(node_cdata)) 04313 { 04314 PUSHNODE(node_include); //Append a new node on the tree. 04315 cursor->value = s; //Save the offset. 04316 while(!(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'. 04317 if(chartype_rbracket(*s)) 04318 { 04319 #ifdef PUGOPT_NONSEG 04320 SETLEN(); //NF 19 Jan 2003. 04321 #else 04322 *s = 0; //Zero-terminate this segment. 04323 #endif 04324 ++s; 04325 if(OPTSET(parse_trim_cdata)) //Trim whitespace. 04326 { 04327 #ifdef PUGOPT_NONSEG 04328 strwtrim(&cursor->value, cursor->value_size); 04329 #else 04330 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04331 else strwtrim(&cursor->value); 04332 #endif 04333 } 04334 } 04335 POPNODE(); //Pop since this is a standalone. 04336 } 04337 else //Flagged for discard, but we still have to scan for the terminator. 04338 { 04339 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'. 04340 ++s; 04341 } 04342 ++s; //Step over the last ']'. 04343 goto LOC_LEAVE; //Look for any following PCDATA. 04344 } 04345 } 04346 } 04347 } 04348 } 04349 } 04350 } 04351 } 04352 else if(*s==_T('C')) //'<![C...' 04353 { 04354 ++s; 04355 if(*s==_T('D')) //'<![CD...' 04356 { 04357 ++s; 04358 if(*s==_T('A')) //'<![CDA...' 04359 { 04360 ++s; 04361 if(*s==_T('T')) //'<![CDAT...' 04362 { 04363 ++s; 04364 if(*s==_T('A')) //'<![CDATA...' 04365 { 04366 ++s; 04367 if(chartype_lbracket(*s)) //'<![CDATA[...' 04368 { 04369 ++s; 04370 if(OPTSET(parse_cdata)) 04371 { 04372 PUSHNODE(node_cdata); //Append a new node on the tree. 04373 cursor->value = s; //Save the offset. 04374 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'. 04375 if(*(s+2)==0) return s; //Very badly formed. 04376 if(chartype_rbracket(*s)) 04377 { 04378 #ifdef PUGOPT_NONSEG 04379 SETLEN(); //NF 19 Jan 2003. 04380 #else 04381 *s = 0; //Zero-terminate this segment. 04382 #endif 04383 ++s; 04384 if(OPTSET(parse_trim_cdata)) //Trim whitespace. 04385 { 04386 #ifdef PUGOPT_NONSEG 04387 strwtrim(&cursor->value,cursor->value_size); 04388 #else 04389 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04390 else strwtrim(&cursor->value); 04391 #endif 04392 } 04393 } 04394 POPNODE(); //Pop since this is a standalone. 04395 } 04396 else //Flagged for discard, but we still have to scan for the terminator. 04397 { 04398 while(*s!=0 && *(s+1)!=0 && *(s+2)!=0 && !(chartype_rbracket(*s) && chartype_rbracket(*(s+1)) && chartype_leave(*(s+2)))) ++s; //Scan for terminating ']]>'. 04399 ++s; 04400 } 04401 ++s; //Step over the last ']'. 04402 goto LOC_LEAVE; //Look for any following PCDATA. 04403 } 04404 } 04405 } 04406 } 04407 } 04408 } 04409 continue; //Probably a corrupted CDATA section, so just eat it. 04410 } 04411 else if(*s==_T('D')) //'<!D...' 04412 { 04413 ++s; 04414 if(*s==_T('O')) //'<!DO...' 04415 { 04416 ++s; 04417 if(*s==_T('C')) //'<!DOC...' 04418 { 04419 ++s; 04420 if(*s==_T('T')) //'<!DOCT...' 04421 { 04422 ++s; 04423 if(*s==_T('Y')) //'<!DOCTY...' 04424 { 04425 ++s; 04426 if(*s==_T('P')) //'<!DOCTYP...' 04427 { 04428 ++s; 04429 if(*s==_T('E')) //'<!DOCTYPE...' 04430 { 04431 ++s; 04432 SKIPWS(); //Eat any whitespace. 04433 xml_attribute_struct* a = 0; 04434 if(OPTSET(parse_doctype)) 04435 { 04436 PUSHNODE(node_doctype); //Append a new node on the tree. 04437 a = append_attribute(cursor,3); //Store the DOCTYPE name. 04438 a->value = a->name = s; //Save the offset. 04439 // Why are value & name set to the same thing here and below. NF 25 Mar 2003 04440 // Also why not store this in node.name. See my CP post. NF 25 Mar 2003 04441 } 04442 SCANWHILE(chartype_symbol(*s)); //'<!DOCTYPE symbol...' 04443 #ifdef PUGOPT_NONSEG 04444 if(OPTSET(parse_doctype)) 04445 a->name_size = a->value_size = s - a->value; //Save the length. rem: Before ENDSEG() 04446 #endif 04447 ENDSEG(); //Save char in 'ch', terminate & step over. 04448 if(chartype_space(ch)) SKIPWS(); //Eat any whitespace. 04449 LOC_DOCTYPE_SYMBOL: 04450 if(chartype_symbol(*s)) 04451 { 04452 mark = s; 04453 SCANWHILE(chartype_symbol(*s)); //'...symbol SYSTEM...' 04454 if(OPTSET(parse_doctype)) 04455 { 04456 a = append_attribute(cursor,1); 04457 a->value = a->name = mark; 04458 #ifdef PUGOPT_NONSEG 04459 a->value_size = a->name_size = s - mark; //NF 19 Jan 2003. 04460 #else 04461 *s = 0; 04462 #endif 04463 } 04464 ++s; 04465 SKIPWS(); 04466 } 04467 if(chartype_quote(*s)) //'...SYSTEM "..."' 04468 { 04469 LOC_DOCTYPE_QUOTE: 04470 ch = *s; 04471 ++s; 04472 mark = s; 04473 while(*s!=0 && *s != ch) ++s; 04474 if(*s!=0) 04475 { 04476 if(OPTSET(parse_doctype)) 04477 { 04478 a = append_attribute(cursor,1); 04479 a->value = mark; 04480 #ifdef PUGOPT_NONSEG 04481 a->value_size = s - mark; //NF 19 Jan 2003. 04482 #else 04483 *s = 0; 04484 #endif 04485 } 04486 ++s; 04487 SKIPWS(); //Eat whitespace. 04488 if(chartype_quote(*s)) goto LOC_DOCTYPE_QUOTE; //Another quoted section to store. 04489 else if(chartype_symbol(*s)) goto LOC_DOCTYPE_SYMBOL; //Not wellformed, but just parse it. 04490 } 04491 } 04492 if(chartype_lbracket(*s)) //'...[...' 04493 { 04494 ++s; //Step over the bracket. 04495 if(OPTSET(parse_doctype)) cursor->value = s; //Store the offset. 04496 unsigned int bd = 1; //Bracket depth counter. 04497 while(*s!=0) //Loop till we're out of all brackets. 04498 { 04499 if(chartype_rbracket(*s)) --bd; 04500 else if(chartype_lbracket(*s)) ++bd; 04501 if(bd == 0) break; 04502 ++s; 04503 } 04504 //Note: 's' now points to end of DTD, i.e.: ']'. 04505 if(OPTSET(parse_doctype)) 04506 { 04507 //Note: If we aren't parsing the DTD ('!parse_dtd', etc.) then it is stored in the DOM as one whole chunk. 04508 #ifdef PUGOPT_NONSEG 04509 SETLEN(); //NF 19 Jan 2003 04510 #else 04511 *s = 0; //Zero-terminate. 04512 #endif 04513 if(OPTSET(parse_dtd)||OPTSET(parse_dtd_only)) 04514 { 04515 if(OPTSET(parse_dtd)) 04516 { 04517 #ifdef PUGOPT_NONSEG 04518 TCHAR svch = *s; 04519 try 04520 { 04521 *s = 0; //Zero-terminate. 04522 parse( cursor->value, cursor, growby, optmsk ); //Parse it. 04523 // cursor->value will contain the entire DTD. 04524 // probably better to set it to empty. See cmt below. NF 25 Mar 2003 04525 } 04526 catch(...){ assert(false); } 04527 *s = svch; 04528 #else 04529 // this will change cursor->value. we'll finish up with 04530 // a 0 after the first DTD item. I can't see any point 04531 // in having cursor->value set after parse(). See cmt above & my CP post. NF 25 Mar 2003 04532 parse( cursor->value, cursor, growby, optmsk ); //Parse it. 04533 #endif 04534 } 04535 if(OPTSET(parse_dtd_only)) return (s+1); //Flagged to parse DTD only, so leave here. 04536 } 04537 else if(OPTSET(parse_trim_doctype)) //Trim whitespace. 04538 { 04539 #ifdef PUGOPT_NONSEG 04540 strwtrim(&cursor->value, cursor->value_size); 04541 #else 04542 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04543 else strwtrim(&cursor->value); 04544 #endif 04545 } 04546 ++s; //Step over the zero. 04547 POPNODE(); //Pop since this is a standalone. 04548 } 04549 SCANFOR(chartype_leave(*s)); 04550 continue; 04551 } 04552 //Fall-through; make sure we pop. 04553 POPNODE(); //Pop since this is a standalone. 04554 continue; 04555 } 04556 } 04557 } 04558 } 04559 } 04560 } 04561 } 04562 else if(chartype_symbol(*s)) //An inline DTD tag. 04563 { 04564 mark = s; 04565 SCANWHILE(chartype_symbol(*s)); 04566 ENDSEG(); //Save char in 'ch', terminate & step over. 04567 xml_node_type e = node_dtd_entity; 04568 #ifdef PUGOPT_NONSEG 04569 const unsigned int dtdilen = (s - 1) - mark; 04570 if(_tcsncmp(mark,_T("ATTLIST"),max((7*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_attlist; 04571 else if(_tcsncmp(mark,_T("ELEMENT"),max((7*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_element; 04572 else if(_tcsncmp(mark,_T("NOTATION"),max((8*sizeof(TCHAR)),dtdilen))==0) e = node_dtd_notation; 04573 #else 04574 if(_tcscmp(mark,_T("ATTLIST"))==0) e = node_dtd_attlist; 04575 else if(_tcscmp(mark,_T("ELEMENT"))==0) e = node_dtd_element; 04576 else if(_tcscmp(mark,_T("NOTATION"))==0) e = node_dtd_notation; 04577 #endif 04578 PUSHNODE(e); //Append a new node on the tree. 04579 if(*s!=0 && chartype_space(ch)) 04580 { 04581 SKIPWS(); //Eat whitespace. 04582 if(chartype_symbol(*s) || *s==_T('%')) 04583 { 04584 mark = s; 04585 if(*s==_T('%')) //Could be '<!ENTITY % name' -or- '<!ENTITY %name' 04586 { 04587 #ifdef PUGOPT_NONSEG 04588 //Note: For memory-mapped file support we need to treat 's' as read-only so we can't do '*(s-1) = _T('%');' below. 04589 cursor->name = mark; //Sort out extraneous whitespace when we retrieve it. TODO: Whitespace cleanup. 04590 #endif 04591 ++s; 04592 if(chartype_space(*s)) 04593 { 04594 SKIPWS(); //Eat whitespace. 04595 #ifndef PUGOPT_NONSEG 04596 *(s-1) = _T('%'); 04597 cursor->name = (s-1); 04598 #endif 04599 } 04600 #ifndef PUGOPT_NONSEG 04601 else cursor->name = mark; 04602 #endif 04603 } 04604 else cursor->name = s; 04605 SCANWHILE(chartype_symbol(*s)); 04606 #ifdef PUGOPT_NONSEG 04607 cursor->name_size = s - cursor->name; 04608 #endif 04609 ENDSEG(); //Save char in 'ch', terminate & step over. 04610 if(chartype_space(ch)) 04611 { 04612 SKIPWS(); //Eat whitespace. 04613 if(e == node_dtd_entity) //Special case; may have multiple quoted sections w/anything inside. 04614 { 04615 cursor->value = s; //Just store everything here. 04616 bool qq = false; //Quote in/out flag. 04617 while(*s != 0) //Loop till we find the right sequence. 04618 { 04619 if(!qq && chartype_quote(*s)){ ch = *s; qq = true; } 04620 else if(qq && *s == ch) qq = false; 04621 else if(!qq && chartype_leave(*s)) //Not in quoted reqion and '>' hit. 04622 { 04623 #ifdef PUGOPT_NONSEG 04624 SETLEN(); //NF 19 Jan 2003. 04625 #else 04626 *s = 0; 04627 #endif 04628 ++s; 04629 if(OPTSET(parse_trim_entity)) 04630 { 04631 #ifdef PUGOPT_NONSEG 04632 strwtrim(&cursor->value,cursor->value_size); 04633 #else 04634 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04635 else strwtrim(&cursor->value); 04636 #endif 04637 } 04638 POPNODE(); 04639 goto LOC_SEARCH; 04640 } 04641 ++s; 04642 } 04643 if(OPTSET(parse_trim_entity)) 04644 { 04645 #ifdef PUGOPT_NONSEG 04646 strwtrim(&cursor->value, cursor->value_size); 04647 #else 04648 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04649 else strwtrim(&cursor->value); 04650 #endif 04651 } 04652 } 04653 else 04654 { 04655 cursor->value = s; 04656 SCANFOR(chartype_leave(*s)); //Just look for '>'. 04657 #ifdef PUGOPT_NONSEG 04658 SETLEN(); //NF 19 Jan 2003. 04659 #else 04660 *s = 0; 04661 #endif 04662 ++s; 04663 if(OPTSET(parse_trim_entity)) 04664 { 04665 #ifdef PUGOPT_NONSEG 04666 strwtrim(&cursor->value, cursor->value_size); 04667 #else 04668 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04669 else strwtrim(&cursor->value); 04670 #endif 04671 } 04672 POPNODE(); 04673 goto LOC_SEARCH; 04674 } 04675 } 04676 } 04677 } 04678 POPNODE(); 04679 } 04680 } 04681 else if(chartype_symbol(*s)) //'<#...' 04682 { 04683 cursor = append_node(cursor,growby); //Append a new node to the tree. 04684 LOC_ELEMENT: //Scan for & store element name. 04685 cursor->name = s; 04686 SCANWHILE(chartype_symbol(*s)); //Scan for a terminator. 04687 #ifdef PUGOPT_NONSEG 04688 cursor->name_size = s - cursor->name; //Note: Before ENDSEG(). 04689 #endif 04690 ENDSEG(); //Save char in 'ch', terminate & step over. 04691 if 04692 ( 04693 *s!=0 && 04694 ( 04695 chartype_close(ch) //'</...' 04696 #ifdef PUGOPT_NONSEG 04697 //|| 04698 //chartype_pi(ch) //Treat '?>' as '/>' NF 19 Jan 2003 04699 #endif 04700 ) 04701 ) 04702 { 04703 SCANFOR(chartype_leave(*s)); //Scan for '>', stepping over the tag name. 04704 POPNODE(); //Pop. 04705 continue; 04706 } 04707 else if(*s!=0 && !chartype_space(ch)) goto LOC_PCDATA; //No attributes, so scan for PCDATA. 04708 else if(*s!=0 && chartype_space(ch)) 04709 { 04710 SKIPWS(); //Eat any whitespace. 04711 LOC_ATTRIBUTE: 04712 if(chartype_symbol(*s)) //<... #... 04713 { 04714 xml_attribute_struct* a = append_attribute(cursor,growby); //Make space for this attribute. 04715 a->name = s; //Save the offset. 04716 SCANWHILE(chartype_symbol(*s)); //Scan for a terminator. 04717 #ifdef PUGOPT_NONSEG 04718 ENDSEGNAM(a); 04719 #else 04720 ENDSEG(); //Save char in 'ch', terminate & step over. 04721 #endif 04722 if(*s!=0 && chartype_space(ch)) SKIPWS(); //Eat any whitespace. 04723 if(*s!=0 && (chartype_equals(ch) || chartype_equals(*s))) //'<... #=...' 04724 { 04725 if(chartype_equals(*s)) ++s; 04726 SKIPWS(); //Eat any whitespace. 04727 if(chartype_quote(*s)) //'<... #="...' 04728 { 04729 ch = *s; //Save quote char to avoid breaking on "''" -or- '""'. 04730 ++s; //Step over the quote. 04731 a->value = s; //Save the offset. 04732 SCANFOR(*s == ch); //Scan for the terminating quote, or '>'. 04733 #ifdef PUGOPT_NONSEG 04734 ENDSEGATT(a); 04735 #else 04736 ENDSEG(); //Save char in 'ch', terminate & step over. 04737 #endif 04738 if(OPTSET(parse_trim_attribute)) //Trim whitespace. 04739 { 04740 #ifdef PUGOPT_NONSEG 04741 strwtrim(&a->value,a->value_size); 04742 #else 04743 if(OPTSET(parse_wnorm)) strwnorm(&a->value); 04744 else strwtrim(&a->value); 04745 #endif 04746 } 04747 04748 #ifdef PUGOPT_NODE_FLAGS // NF 29 May 2003 04749 // If we have an "expanded=true" attribute set expand(). 04750 if ( _tcsncmp( a->name, _T("expanded"), max( (8*sizeof(TCHAR)), a->name_size ) ) == 0 04751 && _tcsncmp( a->value, _T("true"), max( 4*sizeof(TCHAR), a->value_size ) ) == 0 04752 ) 04753 { 04754 cursor->expand( true ); 04755 _expanded_nodes.push_back( cursor ); 04756 } 04757 #endif 04758 04759 if(chartype_leave(*s)){ ++s; goto LOC_PCDATA; } 04760 else if(chartype_close(*s)) 04761 { 04762 ++s; 04763 POPNODE(); 04764 SKIPWS(); //Eat any whitespace. 04765 if(chartype_leave(*s)) ++s; 04766 goto LOC_PCDATA; 04767 } 04768 if(chartype_space(*s)) //This may indicate a following attribute. 04769 { 04770 SKIPWS(); //Eat any whitespace. 04771 goto LOC_ATTRIBUTE; //Go scan for additional attributes. 04772 } 04773 } 04774 } 04775 if(chartype_symbol(*s)) goto LOC_ATTRIBUTE; 04776 else if(*s!=0 && cursor->type == node_pi) 04777 { 04778 #ifdef PUGOPT_NONSEG 04779 SCANFOR(chartype_pi(*s)); //compliments change where we don't fudge to '/>' when we find the PI. NF 20 Jan 2003 04780 SKIPWS(); //Eat any whitespace. 04781 if(chartype_pi(*s)) ++s; 04782 #else 04783 SCANFOR(chartype_close(*s)); 04784 SKIPWS(); //Eat any whitespace. 04785 if(chartype_close(*s)) ++s; 04786 #endif 04787 SKIPWS(); //Eat any whitespace. 04788 if(chartype_leave(*s)) ++s; 04789 POPNODE(); 04790 goto LOC_PCDATA; 04791 } 04792 } 04793 } 04794 LOC_LEAVE: 04795 if(chartype_leave(*s)) //'...>' 04796 { 04797 ++s; //Step over the '>'. 04798 LOC_PCDATA: //'>...<' 04799 mark = s; //Save this offset while searching for a terminator. 04800 SKIPWS(); //Eat whitespace if no genuine PCDATA here. 04801 if(chartype_enter(*s)) //We hit a '<...', with only whitespace, so don't bother storing anything. 04802 { 04803 if(chartype_close(*(s+1))) //'</...' 04804 { 04805 SCANFOR(chartype_leave(*s)); //Scan for '>', stepping over any end-tag name. 04806 POPNODE(); //Pop. 04807 continue; //Continue scanning. 04808 } 04809 else goto LOC_SEARCH; //Expect a new element enter, so go scan for it. 04810 } 04811 s = mark; //We hit something other than whitespace; restore the original offset. 04812 PUSHNODE(node_pcdata); //Append a new node on the tree. 04813 cursor->value = s; //Save the offset. 04814 SCANFOR(chartype_enter(*s)); //'...<' 04815 #ifdef PUGOPT_NONSEG 04816 ENDSEGDAT(); 04817 #else 04818 ENDSEG(); //Save char in 'ch', terminate & step over. 04819 #endif 04820 if(OPTSET(parse_trim_pcdata)) //Trim whitespace. 04821 { 04822 #ifdef PUGOPT_NONSEG 04823 strwtrim(&cursor->value,cursor->value_size); 04824 #else 04825 if(OPTSET(parse_wnorm)) strwnorm(&cursor->value); 04826 else strwtrim(&cursor->value); 04827 #endif 04828 } 04829 POPNODE(); //Pop since this is a standalone. 04830 if(chartype_enter(ch)) //Did we hit a '<...'? 04831 { 04832 if(chartype_close(*s)) //'</...' 04833 { 04834 SCANFOR(chartype_leave(*s)); //'...>' 04835 POPNODE(); //Pop. 04836 goto LOC_LEAVE; 04837 } 04838 else if(chartype_special(*s)) goto LOC_CLASSIFY; //We hit a '<!...'. We must test this here if we want comments intermixed w/PCDATA. 04839 else if(*s) goto LOC_CLASSIFY; 04840 else return s; 04841 } 04842 } 04843 //Fall-through A. 04844 else if(chartype_close(*s)) //'.../' 04845 { 04846 ++s; 04847 if(chartype_leave(*s)) //'.../>' 04848 { 04849 POPNODE(); //Pop. 04850 ++s; 04851 continue; 04852 } 04853 } 04854 } 04855 //Fall-through B. 04856 else if(chartype_close(*s)) //'.../' 04857 { 04858 SCANFOR(chartype_leave(*s)); //'.../>' 04859 POPNODE(); //Pop. 04860 continue; 04861 } 04862 } 04863 } 04864 return s; 04865 } // xml_parser::parse()
Here is the call graph for this function:

Member Data Documentation
xml_node_struct* pug::xml_parser::_xmldoc [protected] |
long pug::xml_parser::_growby [protected] |
bool pug::xml_parser::_autdel [protected] |
TCHAR* pug::xml_parser::_buffer [protected] |
TCHAR* pug::xml_parser::_strpos [protected] |
unsigned long pug::xml_parser::_optmsk [protected] |
The documentation for this class was generated from the following file:

