00001 <?php 00002 00013 class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex { 00014 00015 public function tokenizeHTML($html, $config, $context) { 00016 $new_html = $this->normalize($html, $config, $context); 00017 $new_html = $this->wrapHTML($new_html, $config, $context); 00018 try { 00019 $parser = new HTML5($new_html); 00020 $doc = $parser->save(); 00021 } catch (DOMException $e) { 00022 // Uh oh, it failed. Punt to DirectLex. 00023 $lexer = new HTMLPurifier_Lexer_DirectLex(); 00024 $context->register('PH5PError', $e); // save the error, so we can detect it 00025 return $lexer->tokenizeHTML($html, $config, $context); // use original HTML 00026 } 00027 $tokens = array(); 00028 $this->tokenizeDOM( 00029 $doc->getElementsByTagName('html')->item(0)-> // <html> 00030 getElementsByTagName('body')->item(0)-> // <body> 00031 getElementsByTagName('div')->item(0) // <div> 00032 , $tokens); 00033 return $tokens; 00034 } 00035 00036 } 00037 00038 /* 00039 00040 Copyright 2007 Jeroen van der Meer <http://jero.net/> 00041 00042 Permission is hereby granted, free of charge, to any person obtaining a 00043 copy of this software and associated documentation files (the 00044 "Software"), to deal in the Software without restriction, including 00045 without limitation the rights to use, copy, modify, merge, publish, 00046 distribute, sublicense, and/or sell copies of the Software, and to 00047 permit persons to whom the Software is furnished to do so, subject to 00048 the following conditions: 00049 00050 The above copyright notice and this permission notice shall be included 00051 in all copies or substantial portions of the Software. 00052 00053 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00054 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00055 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00056 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 00057 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 00058 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 00059 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00060 00061 */ 00062 00063 class HTML5 { 00064 private $data; 00065 private $char; 00066 private $EOF; 00067 private $state; 00068 private $tree; 00069 private $token; 00070 private $content_model; 00071 private $escape = false; 00072 private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', 00073 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', 00074 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', 00075 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', 00076 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', 00077 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', 00078 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', 00079 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', 00080 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', 00081 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', 00082 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', 00083 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', 00084 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', 00085 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', 00086 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', 00087 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', 00088 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', 00089 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', 00090 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', 00091 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', 00092 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', 00093 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', 00094 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', 00095 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', 00096 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', 00097 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', 00098 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', 00099 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', 00100 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', 00101 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', 00102 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', 00103 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', 00104 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', 00105 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', 00106 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', 00107 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', 00108 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', 00109 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', 00110 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', 00111 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', 00112 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', 00113 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); 00114 00115 const PCDATA = 0; 00116 const RCDATA = 1; 00117 const CDATA = 2; 00118 const PLAINTEXT = 3; 00119 00120 const DOCTYPE = 0; 00121 const STARTTAG = 1; 00122 const ENDTAG = 2; 00123 const COMMENT = 3; 00124 const CHARACTR = 4; 00125 const EOF = 5; 00126 00127 public function __construct($data) { 00128 $data = str_replace("\r\n", "\n", $data); 00129 $data = str_replace("\r", null, $data); 00130 00131 $this->data = $data; 00132 $this->char = -1; 00133 $this->EOF = strlen($data); 00134 $this->tree = new HTML5TreeConstructer; 00135 $this->content_model = self::PCDATA; 00136 00137 $this->state = 'data'; 00138 00139 while($this->state !== null) { 00140 $this->{$this->state.'State'}(); 00141 } 00142 } 00143 00144 public function save() { 00145 return $this->tree->save(); 00146 } 00147 00148 private function char() { 00149 return ($this->char < $this->EOF) 00150 ? $this->data[$this->char] 00151 : false; 00152 } 00153 00154 private function character($s, $l = 0) { 00155 if($s + $l < $this->EOF) { 00156 if($l === 0) { 00157 return $this->data[$s]; 00158 } else { 00159 return substr($this->data, $s, $l); 00160 } 00161 } 00162 } 00163 00164 private function characters($char_class, $start) { 00165 return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); 00166 } 00167 00168 private function dataState() { 00169 // Consume the next input character 00170 $this->char++; 00171 $char = $this->char(); 00172 00173 if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { 00174 /* U+0026 AMPERSAND (&) 00175 When the content model flag is set to one of the PCDATA or RCDATA 00176 states: switch to the entity data state. Otherwise: treat it as per 00177 the "anything else" entry below. */ 00178 $this->state = 'entityData'; 00179 00180 } elseif($char === '-') { 00181 /* If the content model flag is set to either the RCDATA state or 00182 the CDATA state, and the escape flag is false, and there are at 00183 least three characters before this one in the input stream, and the 00184 last four characters in the input stream, including this one, are 00185 U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, 00186 and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */ 00187 if(($this->content_model === self::RCDATA || $this->content_model === 00188 self::CDATA) && $this->escape === false && 00189 $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') { 00190 $this->escape = true; 00191 } 00192 00193 /* In any case, emit the input character as a character token. Stay 00194 in the data state. */ 00195 $this->emitToken(array( 00196 'type' => self::CHARACTR, 00197 'data' => $char 00198 )); 00199 00200 /* U+003C LESS-THAN SIGN (<) */ 00201 } elseif($char === '<' && ($this->content_model === self::PCDATA || 00202 (($this->content_model === self::RCDATA || 00203 $this->content_model === self::CDATA) && $this->escape === false))) { 00204 /* When the content model flag is set to the PCDATA state: switch 00205 to the tag open state. 00206 00207 When the content model flag is set to either the RCDATA state or 00208 the CDATA state and the escape flag is false: switch to the tag 00209 open state. 00210 00211 Otherwise: treat it as per the "anything else" entry below. */ 00212 $this->state = 'tagOpen'; 00213 00214 /* U+003E GREATER-THAN SIGN (>) */ 00215 } elseif($char === '>') { 00216 /* If the content model flag is set to either the RCDATA state or 00217 the CDATA state, and the escape flag is true, and the last three 00218 characters in the input stream including this one are U+002D 00219 HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), 00220 set the escape flag to false. */ 00221 if(($this->content_model === self::RCDATA || 00222 $this->content_model === self::CDATA) && $this->escape === true && 00223 $this->character($this->char, 3) === '-->') { 00224 $this->escape = false; 00225 } 00226 00227 /* In any case, emit the input character as a character token. 00228 Stay in the data state. */ 00229 $this->emitToken(array( 00230 'type' => self::CHARACTR, 00231 'data' => $char 00232 )); 00233 00234 } elseif($this->char === $this->EOF) { 00235 /* EOF 00236 Emit an end-of-file token. */ 00237 $this->EOF(); 00238 00239 } elseif($this->content_model === self::PLAINTEXT) { 00240 /* When the content model flag is set to the PLAINTEXT state 00241 THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of 00242 the text and emit it as a character token. */ 00243 $this->emitToken(array( 00244 'type' => self::CHARACTR, 00245 'data' => substr($this->data, $this->char) 00246 )); 00247 00248 $this->EOF(); 00249 00250 } else { 00251 /* Anything else 00252 THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that 00253 otherwise would also be treated as a character token and emit it 00254 as a single character token. Stay in the data state. */ 00255 $len = strcspn($this->data, '<&', $this->char); 00256 $char = substr($this->data, $this->char, $len); 00257 $this->char += $len - 1; 00258 00259 $this->emitToken(array( 00260 'type' => self::CHARACTR, 00261 'data' => $char 00262 )); 00263 00264 $this->state = 'data'; 00265 } 00266 } 00267 00268 private function entityDataState() { 00269 // Attempt to consume an entity. 00270 $entity = $this->entity(); 00271 00272 // If nothing is returned, emit a U+0026 AMPERSAND character token. 00273 // Otherwise, emit the character token that was returned. 00274 $char = (!$entity) ? '&' : $entity; 00275 $this->emitToken(array( 00276 'type' => self::CHARACTR, 00277 'data' => $char 00278 )); 00279 00280 // Finally, switch to the data state. 00281 $this->state = 'data'; 00282 } 00283 00284 private function tagOpenState() { 00285 switch($this->content_model) { 00286 case self::RCDATA: 00287 case self::CDATA: 00288 /* If the next input character is a U+002F SOLIDUS (/) character, 00289 consume it and switch to the close tag open state. If the next 00290 input character is not a U+002F SOLIDUS (/) character, emit a 00291 U+003C LESS-THAN SIGN character token and switch to the data 00292 state to process the next input character. */ 00293 if($this->character($this->char + 1) === '/') { 00294 $this->char++; 00295 $this->state = 'closeTagOpen'; 00296 00297 } else { 00298 $this->emitToken(array( 00299 'type' => self::CHARACTR, 00300 'data' => '<' 00301 )); 00302 00303 $this->state = 'data'; 00304 } 00305 break; 00306 00307 case self::PCDATA: 00308 // If the content model flag is set to the PCDATA state 00309 // Consume the next input character: 00310 $this->char++; 00311 $char = $this->char(); 00312 00313 if($char === '!') { 00314 /* U+0021 EXCLAMATION MARK (!) 00315 Switch to the markup declaration open state. */ 00316 $this->state = 'markupDeclarationOpen'; 00317 00318 } elseif($char === '/') { 00319 /* U+002F SOLIDUS (/) 00320 Switch to the close tag open state. */ 00321 $this->state = 'closeTagOpen'; 00322 00323 } elseif(preg_match('/^[A-Za-z]$/', $char)) { 00324 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z 00325 Create a new start tag token, set its tag name to the lowercase 00326 version of the input character (add 0x0020 to the character's code 00327 point), then switch to the tag name state. (Don't emit the token 00328 yet; further details will be filled in before it is emitted.) */ 00329 $this->token = array( 00330 'name' => strtolower($char), 00331 'type' => self::STARTTAG, 00332 'attr' => array() 00333 ); 00334 00335 $this->state = 'tagName'; 00336 00337 } elseif($char === '>') { 00338 /* U+003E GREATER-THAN SIGN (>) 00339 Parse error. Emit a U+003C LESS-THAN SIGN character token and a 00340 U+003E GREATER-THAN SIGN character token. Switch to the data state. */ 00341 $this->emitToken(array( 00342 'type' => self::CHARACTR, 00343 'data' => '<>' 00344 )); 00345 00346 $this->state = 'data'; 00347 00348 } elseif($char === '?') { 00349 /* U+003F QUESTION MARK (?) 00350 Parse error. Switch to the bogus comment state. */ 00351 $this->state = 'bogusComment'; 00352 00353 } else { 00354 /* Anything else 00355 Parse error. Emit a U+003C LESS-THAN SIGN character token and 00356 reconsume the current input character in the data state. */ 00357 $this->emitToken(array( 00358 'type' => self::CHARACTR, 00359 'data' => '<' 00360 )); 00361 00362 $this->char--; 00363 $this->state = 'data'; 00364 } 00365 break; 00366 } 00367 } 00368 00369 private function closeTagOpenState() { 00370 $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); 00371 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; 00372 00373 if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && 00374 (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', 00375 $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { 00376 /* If the content model flag is set to the RCDATA or CDATA states then 00377 examine the next few characters. If they do not match the tag name of 00378 the last start tag token emitted (case insensitively), or if they do but 00379 they are not immediately followed by one of the following characters: 00380 * U+0009 CHARACTER TABULATION 00381 * U+000A LINE FEED (LF) 00382 * U+000B LINE TABULATION 00383 * U+000C FORM FEED (FF) 00384 * U+0020 SPACE 00385 * U+003E GREATER-THAN SIGN (>) 00386 * U+002F SOLIDUS (/) 00387 * EOF 00388 ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character 00389 token, a U+002F SOLIDUS character token, and switch to the data state 00390 to process the next input character. */ 00391 $this->emitToken(array( 00392 'type' => self::CHARACTR, 00393 'data' => '</' 00394 )); 00395 00396 $this->state = 'data'; 00397 00398 } else { 00399 /* Otherwise, if the content model flag is set to the PCDATA state, 00400 or if the next few characters do match that tag name, consume the 00401 next input character: */ 00402 $this->char++; 00403 $char = $this->char(); 00404 00405 if(preg_match('/^[A-Za-z]$/', $char)) { 00406 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z 00407 Create a new end tag token, set its tag name to the lowercase version 00408 of the input character (add 0x0020 to the character's code point), then 00409 switch to the tag name state. (Don't emit the token yet; further details 00410 will be filled in before it is emitted.) */ 00411 $this->token = array( 00412 'name' => strtolower($char), 00413 'type' => self::ENDTAG 00414 ); 00415 00416 $this->state = 'tagName'; 00417 00418 } elseif($char === '>') { 00419 /* U+003E GREATER-THAN SIGN (>) 00420 Parse error. Switch to the data state. */ 00421 $this->state = 'data'; 00422 00423 } elseif($this->char === $this->EOF) { 00424 /* EOF 00425 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F 00426 SOLIDUS character token. Reconsume the EOF character in the data state. */ 00427 $this->emitToken(array( 00428 'type' => self::CHARACTR, 00429 'data' => '</' 00430 )); 00431 00432 $this->char--; 00433 $this->state = 'data'; 00434 00435 } else { 00436 /* Parse error. Switch to the bogus comment state. */ 00437 $this->state = 'bogusComment'; 00438 } 00439 } 00440 } 00441 00442 private function tagNameState() { 00443 // Consume the next input character: 00444 $this->char++; 00445 $char = $this->character($this->char); 00446 00447 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00448 /* U+0009 CHARACTER TABULATION 00449 U+000A LINE FEED (LF) 00450 U+000B LINE TABULATION 00451 U+000C FORM FEED (FF) 00452 U+0020 SPACE 00453 Switch to the before attribute name state. */ 00454 $this->state = 'beforeAttributeName'; 00455 00456 } elseif($char === '>') { 00457 /* U+003E GREATER-THAN SIGN (>) 00458 Emit the current tag token. Switch to the data state. */ 00459 $this->emitToken($this->token); 00460 $this->state = 'data'; 00461 00462 } elseif($this->char === $this->EOF) { 00463 /* EOF 00464 Parse error. Emit the current tag token. Reconsume the EOF 00465 character in the data state. */ 00466 $this->emitToken($this->token); 00467 00468 $this->char--; 00469 $this->state = 'data'; 00470 00471 } elseif($char === '/') { 00472 /* U+002F SOLIDUS (/) 00473 Parse error unless this is a permitted slash. Switch to the before 00474 attribute name state. */ 00475 $this->state = 'beforeAttributeName'; 00476 00477 } else { 00478 /* Anything else 00479 Append the current input character to the current tag token's tag name. 00480 Stay in the tag name state. */ 00481 $this->token['name'] .= strtolower($char); 00482 $this->state = 'tagName'; 00483 } 00484 } 00485 00486 private function beforeAttributeNameState() { 00487 // Consume the next input character: 00488 $this->char++; 00489 $char = $this->character($this->char); 00490 00491 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00492 /* U+0009 CHARACTER TABULATION 00493 U+000A LINE FEED (LF) 00494 U+000B LINE TABULATION 00495 U+000C FORM FEED (FF) 00496 U+0020 SPACE 00497 Stay in the before attribute name state. */ 00498 $this->state = 'beforeAttributeName'; 00499 00500 } elseif($char === '>') { 00501 /* U+003E GREATER-THAN SIGN (>) 00502 Emit the current tag token. Switch to the data state. */ 00503 $this->emitToken($this->token); 00504 $this->state = 'data'; 00505 00506 } elseif($char === '/') { 00507 /* U+002F SOLIDUS (/) 00508 Parse error unless this is a permitted slash. Stay in the before 00509 attribute name state. */ 00510 $this->state = 'beforeAttributeName'; 00511 00512 } elseif($this->char === $this->EOF) { 00513 /* EOF 00514 Parse error. Emit the current tag token. Reconsume the EOF 00515 character in the data state. */ 00516 $this->emitToken($this->token); 00517 00518 $this->char--; 00519 $this->state = 'data'; 00520 00521 } else { 00522 /* Anything else 00523 Start a new attribute in the current tag token. Set that attribute's 00524 name to the current input character, and its value to the empty string. 00525 Switch to the attribute name state. */ 00526 $this->token['attr'][] = array( 00527 'name' => strtolower($char), 00528 'value' => null 00529 ); 00530 00531 $this->state = 'attributeName'; 00532 } 00533 } 00534 00535 private function attributeNameState() { 00536 // Consume the next input character: 00537 $this->char++; 00538 $char = $this->character($this->char); 00539 00540 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00541 /* U+0009 CHARACTER TABULATION 00542 U+000A LINE FEED (LF) 00543 U+000B LINE TABULATION 00544 U+000C FORM FEED (FF) 00545 U+0020 SPACE 00546 Stay in the before attribute name state. */ 00547 $this->state = 'afterAttributeName'; 00548 00549 } elseif($char === '=') { 00550 /* U+003D EQUALS SIGN (=) 00551 Switch to the before attribute value state. */ 00552 $this->state = 'beforeAttributeValue'; 00553 00554 } elseif($char === '>') { 00555 /* U+003E GREATER-THAN SIGN (>) 00556 Emit the current tag token. Switch to the data state. */ 00557 $this->emitToken($this->token); 00558 $this->state = 'data'; 00559 00560 } elseif($char === '/' && $this->character($this->char + 1) !== '>') { 00561 /* U+002F SOLIDUS (/) 00562 Parse error unless this is a permitted slash. Switch to the before 00563 attribute name state. */ 00564 $this->state = 'beforeAttributeName'; 00565 00566 } elseif($this->char === $this->EOF) { 00567 /* EOF 00568 Parse error. Emit the current tag token. Reconsume the EOF 00569 character in the data state. */ 00570 $this->emitToken($this->token); 00571 00572 $this->char--; 00573 $this->state = 'data'; 00574 00575 } else { 00576 /* Anything else 00577 Append the current input character to the current attribute's name. 00578 Stay in the attribute name state. */ 00579 $last = count($this->token['attr']) - 1; 00580 $this->token['attr'][$last]['name'] .= strtolower($char); 00581 00582 $this->state = 'attributeName'; 00583 } 00584 } 00585 00586 private function afterAttributeNameState() { 00587 // Consume the next input character: 00588 $this->char++; 00589 $char = $this->character($this->char); 00590 00591 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00592 /* U+0009 CHARACTER TABULATION 00593 U+000A LINE FEED (LF) 00594 U+000B LINE TABULATION 00595 U+000C FORM FEED (FF) 00596 U+0020 SPACE 00597 Stay in the after attribute name state. */ 00598 $this->state = 'afterAttributeName'; 00599 00600 } elseif($char === '=') { 00601 /* U+003D EQUALS SIGN (=) 00602 Switch to the before attribute value state. */ 00603 $this->state = 'beforeAttributeValue'; 00604 00605 } elseif($char === '>') { 00606 /* U+003E GREATER-THAN SIGN (>) 00607 Emit the current tag token. Switch to the data state. */ 00608 $this->emitToken($this->token); 00609 $this->state = 'data'; 00610 00611 } elseif($char === '/' && $this->character($this->char + 1) !== '>') { 00612 /* U+002F SOLIDUS (/) 00613 Parse error unless this is a permitted slash. Switch to the 00614 before attribute name state. */ 00615 $this->state = 'beforeAttributeName'; 00616 00617 } elseif($this->char === $this->EOF) { 00618 /* EOF 00619 Parse error. Emit the current tag token. Reconsume the EOF 00620 character in the data state. */ 00621 $this->emitToken($this->token); 00622 00623 $this->char--; 00624 $this->state = 'data'; 00625 00626 } else { 00627 /* Anything else 00628 Start a new attribute in the current tag token. Set that attribute's 00629 name to the current input character, and its value to the empty string. 00630 Switch to the attribute name state. */ 00631 $this->token['attr'][] = array( 00632 'name' => strtolower($char), 00633 'value' => null 00634 ); 00635 00636 $this->state = 'attributeName'; 00637 } 00638 } 00639 00640 private function beforeAttributeValueState() { 00641 // Consume the next input character: 00642 $this->char++; 00643 $char = $this->character($this->char); 00644 00645 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00646 /* U+0009 CHARACTER TABULATION 00647 U+000A LINE FEED (LF) 00648 U+000B LINE TABULATION 00649 U+000C FORM FEED (FF) 00650 U+0020 SPACE 00651 Stay in the before attribute value state. */ 00652 $this->state = 'beforeAttributeValue'; 00653 00654 } elseif($char === '"') { 00655 /* U+0022 QUOTATION MARK (") 00656 Switch to the attribute value (double-quoted) state. */ 00657 $this->state = 'attributeValueDoubleQuoted'; 00658 00659 } elseif($char === '&') { 00660 /* U+0026 AMPERSAND (&) 00661 Switch to the attribute value (unquoted) state and reconsume 00662 this input character. */ 00663 $this->char--; 00664 $this->state = 'attributeValueUnquoted'; 00665 00666 } elseif($char === '\'') { 00667 /* U+0027 APOSTROPHE (') 00668 Switch to the attribute value (single-quoted) state. */ 00669 $this->state = 'attributeValueSingleQuoted'; 00670 00671 } elseif($char === '>') { 00672 /* U+003E GREATER-THAN SIGN (>) 00673 Emit the current tag token. Switch to the data state. */ 00674 $this->emitToken($this->token); 00675 $this->state = 'data'; 00676 00677 } else { 00678 /* Anything else 00679 Append the current input character to the current attribute's value. 00680 Switch to the attribute value (unquoted) state. */ 00681 $last = count($this->token['attr']) - 1; 00682 $this->token['attr'][$last]['value'] .= $char; 00683 00684 $this->state = 'attributeValueUnquoted'; 00685 } 00686 } 00687 00688 private function attributeValueDoubleQuotedState() { 00689 // Consume the next input character: 00690 $this->char++; 00691 $char = $this->character($this->char); 00692 00693 if($char === '"') { 00694 /* U+0022 QUOTATION MARK (") 00695 Switch to the before attribute name state. */ 00696 $this->state = 'beforeAttributeName'; 00697 00698 } elseif($char === '&') { 00699 /* U+0026 AMPERSAND (&) 00700 Switch to the entity in attribute value state. */ 00701 $this->entityInAttributeValueState('double'); 00702 00703 } elseif($this->char === $this->EOF) { 00704 /* EOF 00705 Parse error. Emit the current tag token. Reconsume the character 00706 in the data state. */ 00707 $this->emitToken($this->token); 00708 00709 $this->char--; 00710 $this->state = 'data'; 00711 00712 } else { 00713 /* Anything else 00714 Append the current input character to the current attribute's value. 00715 Stay in the attribute value (double-quoted) state. */ 00716 $last = count($this->token['attr']) - 1; 00717 $this->token['attr'][$last]['value'] .= $char; 00718 00719 $this->state = 'attributeValueDoubleQuoted'; 00720 } 00721 } 00722 00723 private function attributeValueSingleQuotedState() { 00724 // Consume the next input character: 00725 $this->char++; 00726 $char = $this->character($this->char); 00727 00728 if($char === '\'') { 00729 /* U+0022 QUOTATION MARK (') 00730 Switch to the before attribute name state. */ 00731 $this->state = 'beforeAttributeName'; 00732 00733 } elseif($char === '&') { 00734 /* U+0026 AMPERSAND (&) 00735 Switch to the entity in attribute value state. */ 00736 $this->entityInAttributeValueState('single'); 00737 00738 } elseif($this->char === $this->EOF) { 00739 /* EOF 00740 Parse error. Emit the current tag token. Reconsume the character 00741 in the data state. */ 00742 $this->emitToken($this->token); 00743 00744 $this->char--; 00745 $this->state = 'data'; 00746 00747 } else { 00748 /* Anything else 00749 Append the current input character to the current attribute's value. 00750 Stay in the attribute value (single-quoted) state. */ 00751 $last = count($this->token['attr']) - 1; 00752 $this->token['attr'][$last]['value'] .= $char; 00753 00754 $this->state = 'attributeValueSingleQuoted'; 00755 } 00756 } 00757 00758 private function attributeValueUnquotedState() { 00759 // Consume the next input character: 00760 $this->char++; 00761 $char = $this->character($this->char); 00762 00763 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00764 /* U+0009 CHARACTER TABULATION 00765 U+000A LINE FEED (LF) 00766 U+000B LINE TABULATION 00767 U+000C FORM FEED (FF) 00768 U+0020 SPACE 00769 Switch to the before attribute name state. */ 00770 $this->state = 'beforeAttributeName'; 00771 00772 } elseif($char === '&') { 00773 /* U+0026 AMPERSAND (&) 00774 Switch to the entity in attribute value state. */ 00775 $this->entityInAttributeValueState(); 00776 00777 } elseif($char === '>') { 00778 /* U+003E GREATER-THAN SIGN (>) 00779 Emit the current tag token. Switch to the data state. */ 00780 $this->emitToken($this->token); 00781 $this->state = 'data'; 00782 00783 } else { 00784 /* Anything else 00785 Append the current input character to the current attribute's value. 00786 Stay in the attribute value (unquoted) state. */ 00787 $last = count($this->token['attr']) - 1; 00788 $this->token['attr'][$last]['value'] .= $char; 00789 00790 $this->state = 'attributeValueUnquoted'; 00791 } 00792 } 00793 00794 private function entityInAttributeValueState() { 00795 // Attempt to consume an entity. 00796 $entity = $this->entity(); 00797 00798 // If nothing is returned, append a U+0026 AMPERSAND character to the 00799 // current attribute's value. Otherwise, emit the character token that 00800 // was returned. 00801 $char = (!$entity) 00802 ? '&' 00803 : $entity; 00804 00805 $last = count($this->token['attr']) - 1; 00806 $this->token['attr'][$last]['value'] .= $char; 00807 } 00808 00809 private function bogusCommentState() { 00810 /* Consume every character up to the first U+003E GREATER-THAN SIGN 00811 character (>) or the end of the file (EOF), whichever comes first. Emit 00812 a comment token whose data is the concatenation of all the characters 00813 starting from and including the character that caused the state machine 00814 to switch into the bogus comment state, up to and including the last 00815 consumed character before the U+003E character, if any, or up to the 00816 end of the file otherwise. (If the comment was started by the end of 00817 the file (EOF), the token is empty.) */ 00818 $data = $this->characters('^>', $this->char); 00819 $this->emitToken(array( 00820 'data' => $data, 00821 'type' => self::COMMENT 00822 )); 00823 00824 $this->char += strlen($data); 00825 00826 /* Switch to the data state. */ 00827 $this->state = 'data'; 00828 00829 /* If the end of the file was reached, reconsume the EOF character. */ 00830 if($this->char === $this->EOF) { 00831 $this->char = $this->EOF - 1; 00832 } 00833 } 00834 00835 private function markupDeclarationOpenState() { 00836 /* If the next two characters are both U+002D HYPHEN-MINUS (-) 00837 characters, consume those two characters, create a comment token whose 00838 data is the empty string, and switch to the comment state. */ 00839 if($this->character($this->char + 1, 2) === '--') { 00840 $this->char += 2; 00841 $this->state = 'comment'; 00842 $this->token = array( 00843 'data' => null, 00844 'type' => self::COMMENT 00845 ); 00846 00847 /* Otherwise if the next seven chacacters are a case-insensitive match 00848 for the word "DOCTYPE", then consume those characters and switch to the 00849 DOCTYPE state. */ 00850 } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { 00851 $this->char += 7; 00852 $this->state = 'doctype'; 00853 00854 /* Otherwise, is is a parse error. Switch to the bogus comment state. 00855 The next character that is consumed, if any, is the first character 00856 that will be in the comment. */ 00857 } else { 00858 $this->char++; 00859 $this->state = 'bogusComment'; 00860 } 00861 } 00862 00863 private function commentState() { 00864 /* Consume the next input character: */ 00865 $this->char++; 00866 $char = $this->char(); 00867 00868 /* U+002D HYPHEN-MINUS (-) */ 00869 if($char === '-') { 00870 /* Switch to the comment dash state */ 00871 $this->state = 'commentDash'; 00872 00873 /* EOF */ 00874 } elseif($this->char === $this->EOF) { 00875 /* Parse error. Emit the comment token. Reconsume the EOF character 00876 in the data state. */ 00877 $this->emitToken($this->token); 00878 $this->char--; 00879 $this->state = 'data'; 00880 00881 /* Anything else */ 00882 } else { 00883 /* Append the input character to the comment token's data. Stay in 00884 the comment state. */ 00885 $this->token['data'] .= $char; 00886 } 00887 } 00888 00889 private function commentDashState() { 00890 /* Consume the next input character: */ 00891 $this->char++; 00892 $char = $this->char(); 00893 00894 /* U+002D HYPHEN-MINUS (-) */ 00895 if($char === '-') { 00896 /* Switch to the comment end state */ 00897 $this->state = 'commentEnd'; 00898 00899 /* EOF */ 00900 } elseif($this->char === $this->EOF) { 00901 /* Parse error. Emit the comment token. Reconsume the EOF character 00902 in the data state. */ 00903 $this->emitToken($this->token); 00904 $this->char--; 00905 $this->state = 'data'; 00906 00907 /* Anything else */ 00908 } else { 00909 /* Append a U+002D HYPHEN-MINUS (-) character and the input 00910 character to the comment token's data. Switch to the comment state. */ 00911 $this->token['data'] .= '-'.$char; 00912 $this->state = 'comment'; 00913 } 00914 } 00915 00916 private function commentEndState() { 00917 /* Consume the next input character: */ 00918 $this->char++; 00919 $char = $this->char(); 00920 00921 if($char === '>') { 00922 $this->emitToken($this->token); 00923 $this->state = 'data'; 00924 00925 } elseif($char === '-') { 00926 $this->token['data'] .= '-'; 00927 00928 } elseif($this->char === $this->EOF) { 00929 $this->emitToken($this->token); 00930 $this->char--; 00931 $this->state = 'data'; 00932 00933 } else { 00934 $this->token['data'] .= '--'.$char; 00935 $this->state = 'comment'; 00936 } 00937 } 00938 00939 private function doctypeState() { 00940 /* Consume the next input character: */ 00941 $this->char++; 00942 $char = $this->char(); 00943 00944 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00945 $this->state = 'beforeDoctypeName'; 00946 00947 } else { 00948 $this->char--; 00949 $this->state = 'beforeDoctypeName'; 00950 } 00951 } 00952 00953 private function beforeDoctypeNameState() { 00954 /* Consume the next input character: */ 00955 $this->char++; 00956 $char = $this->char(); 00957 00958 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 00959 // Stay in the before DOCTYPE name state. 00960 00961 } elseif(preg_match('/^[a-z]$/', $char)) { 00962 $this->token = array( 00963 'name' => strtoupper($char), 00964 'type' => self::DOCTYPE, 00965 'error' => true 00966 ); 00967 00968 $this->state = 'doctypeName'; 00969 00970 } elseif($char === '>') { 00971 $this->emitToken(array( 00972 'name' => null, 00973 'type' => self::DOCTYPE, 00974 'error' => true 00975 )); 00976 00977 $this->state = 'data'; 00978 00979 } elseif($this->char === $this->EOF) { 00980 $this->emitToken(array( 00981 'name' => null, 00982 'type' => self::DOCTYPE, 00983 'error' => true 00984 )); 00985 00986 $this->char--; 00987 $this->state = 'data'; 00988 00989 } else { 00990 $this->token = array( 00991 'name' => $char, 00992 'type' => self::DOCTYPE, 00993 'error' => true 00994 ); 00995 00996 $this->state = 'doctypeName'; 00997 } 00998 } 00999 01000 private function doctypeNameState() { 01001 /* Consume the next input character: */ 01002 $this->char++; 01003 $char = $this->char(); 01004 01005 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 01006 $this->state = 'AfterDoctypeName'; 01007 01008 } elseif($char === '>') { 01009 $this->emitToken($this->token); 01010 $this->state = 'data'; 01011 01012 } elseif(preg_match('/^[a-z]$/', $char)) { 01013 $this->token['name'] .= strtoupper($char); 01014 01015 } elseif($this->char === $this->EOF) { 01016 $this->emitToken($this->token); 01017 $this->char--; 01018 $this->state = 'data'; 01019 01020 } else { 01021 $this->token['name'] .= $char; 01022 } 01023 01024 $this->token['error'] = ($this->token['name'] === 'HTML') 01025 ? false 01026 : true; 01027 } 01028 01029 private function afterDoctypeNameState() { 01030 /* Consume the next input character: */ 01031 $this->char++; 01032 $char = $this->char(); 01033 01034 if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { 01035 // Stay in the DOCTYPE name state. 01036 01037 } elseif($char === '>') { 01038 $this->emitToken($this->token); 01039 $this->state = 'data'; 01040 01041 } elseif($this->char === $this->EOF) { 01042 $this->emitToken($this->token); 01043 $this->char--; 01044 $this->state = 'data'; 01045 01046 } else { 01047 $this->token['error'] = true; 01048 $this->state = 'bogusDoctype'; 01049 } 01050 } 01051 01052 private function bogusDoctypeState() { 01053 /* Consume the next input character: */ 01054 $this->char++; 01055 $char = $this->char(); 01056 01057 if($char === '>') { 01058 $this->emitToken($this->token); 01059 $this->state = 'data'; 01060 01061 } elseif($this->char === $this->EOF) { 01062 $this->emitToken($this->token); 01063 $this->char--; 01064 $this->state = 'data'; 01065 01066 } else { 01067 // Stay in the bogus DOCTYPE state. 01068 } 01069 } 01070 01071 private function entity() { 01072 $start = $this->char; 01073 01074 // This section defines how to consume an entity. This definition is 01075 // used when parsing entities in text and in attributes. 01076 01077 // The behaviour depends on the identity of the next character (the 01078 // one immediately after the U+0026 AMPERSAND character): 01079 01080 switch($this->character($this->char + 1)) { 01081 // U+0023 NUMBER SIGN (#) 01082 case '#': 01083 01084 // The behaviour further depends on the character after the 01085 // U+0023 NUMBER SIGN: 01086 switch($this->character($this->char + 1)) { 01087 // U+0078 LATIN SMALL LETTER X 01088 // U+0058 LATIN CAPITAL LETTER X 01089 case 'x': 01090 case 'X': 01091 // Follow the steps below, but using the range of 01092 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT 01093 // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 01094 // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER 01095 // A, through to U+0046 LATIN CAPITAL LETTER F (in other 01096 // words, 0-9, A-F, a-f). 01097 $char = 1; 01098 $char_class = '0-9A-Fa-f'; 01099 break; 01100 01101 // Anything else 01102 default: 01103 // Follow the steps below, but using the range of 01104 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT 01105 // NINE (i.e. just 0-9). 01106 $char = 0; 01107 $char_class = '0-9'; 01108 break; 01109 } 01110 01111 // Consume as many characters as match the range of characters 01112 // given above. 01113 $this->char++; 01114 $e_name = $this->characters($char_class, $this->char + $char + 1); 01115 $entity = $this->character($start, $this->char); 01116 $cond = strlen($e_name) > 0; 01117 01118 // The rest of the parsing happens bellow. 01119 break; 01120 01121 // Anything else 01122 default: 01123 // Consume the maximum number of characters possible, with the 01124 // consumed characters case-sensitively matching one of the 01125 // identifiers in the first column of the entities table. 01126 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); 01127 $len = strlen($e_name); 01128 01129 for($c = 1; $c <= $len; $c++) { 01130 $id = substr($e_name, 0, $c); 01131 $this->char++; 01132 01133 if(in_array($id, $this->entities)) { 01134 if ($e_name[$c-1] !== ';') { 01135 if ($c < $len && $e_name[$c] == ';') { 01136 $this->char++; // consume extra semicolon 01137 } 01138 } 01139 $entity = $id; 01140 break; 01141 } 01142 } 01143 01144 $cond = isset($entity); 01145 // The rest of the parsing happens bellow. 01146 break; 01147 } 01148 01149 if(!$cond) { 01150 // If no match can be made, then this is a parse error. No 01151 // characters are consumed, and nothing is returned. 01152 $this->char = $start; 01153 return false; 01154 } 01155 01156 // Return a character token for the character corresponding to the 01157 // entity name (as given by the second column of the entities table). 01158 return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); 01159 } 01160 01161 private function emitToken($token) { 01162 $emit = $this->tree->emitToken($token); 01163 01164 if(is_int($emit)) { 01165 $this->content_model = $emit; 01166 01167 } elseif($token['type'] === self::ENDTAG) { 01168 $this->content_model = self::PCDATA; 01169 } 01170 } 01171 01172 private function EOF() { 01173 $this->state = null; 01174 $this->tree->emitToken(array( 01175 'type' => self::EOF 01176 )); 01177 } 01178 } 01179 01180 class HTML5TreeConstructer { 01181 public $stack = array(); 01182 01183 private $phase; 01184 private $mode; 01185 private $dom; 01186 private $foster_parent = null; 01187 private $a_formatting = array(); 01188 01189 private $head_pointer = null; 01190 private $form_pointer = null; 01191 01192 private $scoping = array('button','caption','html','marquee','object','table','td','th'); 01193 private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); 01194 private $special = array('address','area','base','basefont','bgsound', 01195 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', 01196 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', 01197 'h6','head','hr','iframe','image','img','input','isindex','li','link', 01198 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', 01199 'option','p','param','plaintext','pre','script','select','spacer','style', 01200 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); 01201 01202 // The different phases. 01203 const INIT_PHASE = 0; 01204 const ROOT_PHASE = 1; 01205 const MAIN_PHASE = 2; 01206 const END_PHASE = 3; 01207 01208 // The different insertion modes for the main phase. 01209 const BEFOR_HEAD = 0; 01210 const IN_HEAD = 1; 01211 const AFTER_HEAD = 2; 01212 const IN_BODY = 3; 01213 const IN_TABLE = 4; 01214 const IN_CAPTION = 5; 01215 const IN_CGROUP = 6; 01216 const IN_TBODY = 7; 01217 const IN_ROW = 8; 01218 const IN_CELL = 9; 01219 const IN_SELECT = 10; 01220 const AFTER_BODY = 11; 01221 const IN_FRAME = 12; 01222 const AFTR_FRAME = 13; 01223 01224 // The different types of elements. 01225 const SPECIAL = 0; 01226 const SCOPING = 1; 01227 const FORMATTING = 2; 01228 const PHRASING = 3; 01229 01230 const MARKER = 0; 01231 01232 public function __construct() { 01233 $this->phase = self::INIT_PHASE; 01234 $this->mode = self::BEFOR_HEAD; 01235 $this->dom = new DOMDocument; 01236 01237 $this->dom->encoding = 'UTF-8'; 01238 $this->dom->preserveWhiteSpace = true; 01239 $this->dom->substituteEntities = true; 01240 $this->dom->strictErrorChecking = false; 01241 } 01242 01243 // Process tag tokens 01244 public function emitToken($token) { 01245 switch($this->phase) { 01246 case self::INIT_PHASE: return $this->initPhase($token); break; 01247 case self::ROOT_PHASE: return $this->rootElementPhase($token); break; 01248 case self::MAIN_PHASE: return $this->mainPhase($token); break; 01249 case self::END_PHASE : return $this->trailingEndPhase($token); break; 01250 } 01251 } 01252 01253 private function initPhase($token) { 01254 /* Initially, the tree construction stage must handle each token 01255 emitted from the tokenisation stage as follows: */ 01256 01257 /* A DOCTYPE token that is marked as being in error 01258 A comment token 01259 A start tag token 01260 An end tag token 01261 A character token that is not one of one of U+0009 CHARACTER TABULATION, 01262 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01263 or U+0020 SPACE 01264 An end-of-file token */ 01265 if((isset($token['error']) && $token['error']) || 01266 $token['type'] === HTML5::COMMENT || 01267 $token['type'] === HTML5::STARTTAG || 01268 $token['type'] === HTML5::ENDTAG || 01269 $token['type'] === HTML5::EOF || 01270 ($token['type'] === HTML5::CHARACTR && isset($token['data']) && 01271 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { 01272 /* This specification does not define how to handle this case. In 01273 particular, user agents may ignore the entirety of this specification 01274 altogether for such documents, and instead invoke special parse modes 01275 with a greater emphasis on backwards compatibility. */ 01276 01277 $this->phase = self::ROOT_PHASE; 01278 return $this->rootElementPhase($token); 01279 01280 /* A DOCTYPE token marked as being correct */ 01281 } elseif(isset($token['error']) && !$token['error']) { 01282 /* Append a DocumentType node to the Document node, with the name 01283 attribute set to the name given in the DOCTYPE token (which will be 01284 "HTML"), and the other attributes specific to DocumentType objects 01285 set to null, empty lists, or the empty string as appropriate. */ 01286 $doctype = new DOMDocumentType(null, null, 'HTML'); 01287 01288 /* Then, switch to the root element phase of the tree construction 01289 stage. */ 01290 $this->phase = self::ROOT_PHASE; 01291 01292 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01293 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01294 or U+0020 SPACE */ 01295 } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', 01296 $token['data'])) { 01297 /* Append that character to the Document node. */ 01298 $text = $this->dom->createTextNode($token['data']); 01299 $this->dom->appendChild($text); 01300 } 01301 } 01302 01303 private function rootElementPhase($token) { 01304 /* After the initial phase, as each token is emitted from the tokenisation 01305 stage, it must be processed as described in this section. */ 01306 01307 /* A DOCTYPE token */ 01308 if($token['type'] === HTML5::DOCTYPE) { 01309 // Parse error. Ignore the token. 01310 01311 /* A comment token */ 01312 } elseif($token['type'] === HTML5::COMMENT) { 01313 /* Append a Comment node to the Document object with the data 01314 attribute set to the data given in the comment token. */ 01315 $comment = $this->dom->createComment($token['data']); 01316 $this->dom->appendChild($comment); 01317 01318 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01319 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01320 or U+0020 SPACE */ 01321 } elseif($token['type'] === HTML5::CHARACTR && 01322 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 01323 /* Append that character to the Document node. */ 01324 $text = $this->dom->createTextNode($token['data']); 01325 $this->dom->appendChild($text); 01326 01327 /* A character token that is not one of U+0009 CHARACTER TABULATION, 01328 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED 01329 (FF), or U+0020 SPACE 01330 A start tag token 01331 An end tag token 01332 An end-of-file token */ 01333 } elseif(($token['type'] === HTML5::CHARACTR && 01334 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || 01335 $token['type'] === HTML5::STARTTAG || 01336 $token['type'] === HTML5::ENDTAG || 01337 $token['type'] === HTML5::EOF) { 01338 /* Create an HTMLElement node with the tag name html, in the HTML 01339 namespace. Append it to the Document object. Switch to the main 01340 phase and reprocess the current token. */ 01341 $html = $this->dom->createElement('html'); 01342 $this->dom->appendChild($html); 01343 $this->stack[] = $html; 01344 01345 $this->phase = self::MAIN_PHASE; 01346 return $this->mainPhase($token); 01347 } 01348 } 01349 01350 private function mainPhase($token) { 01351 /* Tokens in the main phase must be handled as follows: */ 01352 01353 /* A DOCTYPE token */ 01354 if($token['type'] === HTML5::DOCTYPE) { 01355 // Parse error. Ignore the token. 01356 01357 /* A start tag token with the tag name "html" */ 01358 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { 01359 /* If this start tag token was not the first start tag token, then 01360 it is a parse error. */ 01361 01362 /* For each attribute on the token, check to see if the attribute 01363 is already present on the top element of the stack of open elements. 01364 If it is not, add the attribute and its corresponding value to that 01365 element. */ 01366 foreach($token['attr'] as $attr) { 01367 if(!$this->stack[0]->hasAttribute($attr['name'])) { 01368 $this->stack[0]->setAttribute($attr['name'], $attr['value']); 01369 } 01370 } 01371 01372 /* An end-of-file token */ 01373 } elseif($token['type'] === HTML5::EOF) { 01374 /* Generate implied end tags. */ 01375 $this->generateImpliedEndTags(); 01376 01377 /* Anything else. */ 01378 } else { 01379 /* Depends on the insertion mode: */ 01380 switch($this->mode) { 01381 case self::BEFOR_HEAD: return $this->beforeHead($token); break; 01382 case self::IN_HEAD: return $this->inHead($token); break; 01383 case self::AFTER_HEAD: return $this->afterHead($token); break; 01384 case self::IN_BODY: return $this->inBody($token); break; 01385 case self::IN_TABLE: return $this->inTable($token); break; 01386 case self::IN_CAPTION: return $this->inCaption($token); break; 01387 case self::IN_CGROUP: return $this->inColumnGroup($token); break; 01388 case self::IN_TBODY: return $this->inTableBody($token); break; 01389 case self::IN_ROW: return $this->inRow($token); break; 01390 case self::IN_CELL: return $this->inCell($token); break; 01391 case self::IN_SELECT: return $this->inSelect($token); break; 01392 case self::AFTER_BODY: return $this->afterBody($token); break; 01393 case self::IN_FRAME: return $this->inFrameset($token); break; 01394 case self::AFTR_FRAME: return $this->afterFrameset($token); break; 01395 case self::END_PHASE: return $this->trailingEndPhase($token); break; 01396 } 01397 } 01398 } 01399 01400 private function beforeHead($token) { 01401 /* Handle the token as follows: */ 01402 01403 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01404 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01405 or U+0020 SPACE */ 01406 if($token['type'] === HTML5::CHARACTR && 01407 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 01408 /* Append the character to the current node. */ 01409 $this->insertText($token['data']); 01410 01411 /* A comment token */ 01412 } elseif($token['type'] === HTML5::COMMENT) { 01413 /* Append a Comment node to the current node with the data attribute 01414 set to the data given in the comment token. */ 01415 $this->insertComment($token['data']); 01416 01417 /* A start tag token with the tag name "head" */ 01418 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { 01419 /* Create an element for the token, append the new element to the 01420 current node and push it onto the stack of open elements. */ 01421 $element = $this->insertElement($token); 01422 01423 /* Set the head element pointer to this new element node. */ 01424 $this->head_pointer = $element; 01425 01426 /* Change the insertion mode to "in head". */ 01427 $this->mode = self::IN_HEAD; 01428 01429 /* A start tag token whose tag name is one of: "base", "link", "meta", 01430 "script", "style", "title". Or an end tag with the tag name "html". 01431 Or a character token that is not one of U+0009 CHARACTER TABULATION, 01432 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01433 or U+0020 SPACE. Or any other start tag token */ 01434 } elseif($token['type'] === HTML5::STARTTAG || 01435 ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || 01436 ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', 01437 $token['data']))) { 01438 /* Act as if a start tag token with the tag name "head" and no 01439 attributes had been seen, then reprocess the current token. */ 01440 $this->beforeHead(array( 01441 'name' => 'head', 01442 'type' => HTML5::STARTTAG, 01443 'attr' => array() 01444 )); 01445 01446 return $this->inHead($token); 01447 01448 /* Any other end tag */ 01449 } elseif($token['type'] === HTML5::ENDTAG) { 01450 /* Parse error. Ignore the token. */ 01451 } 01452 } 01453 01454 private function inHead($token) { 01455 /* Handle the token as follows: */ 01456 01457 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01458 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01459 or U+0020 SPACE. 01460 01461 THIS DIFFERS FROM THE SPEC: If the current node is either a title, style 01462 or script element, append the character to the current node regardless 01463 of its content. */ 01464 if(($token['type'] === HTML5::CHARACTR && 01465 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( 01466 $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, 01467 array('title', 'style', 'script')))) { 01468 /* Append the character to the current node. */ 01469 $this->insertText($token['data']); 01470 01471 /* A comment token */ 01472 } elseif($token['type'] === HTML5::COMMENT) { 01473 /* Append a Comment node to the current node with the data attribute 01474 set to the data given in the comment token. */ 01475 $this->insertComment($token['data']); 01476 01477 } elseif($token['type'] === HTML5::ENDTAG && 01478 in_array($token['name'], array('title', 'style', 'script'))) { 01479 array_pop($this->stack); 01480 return HTML5::PCDATA; 01481 01482 /* A start tag with the tag name "title" */ 01483 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { 01484 /* Create an element for the token and append the new element to the 01485 node pointed to by the head element pointer, or, if that is null 01486 (innerHTML case), to the current node. */ 01487 if($this->head_pointer !== null) { 01488 $element = $this->insertElement($token, false); 01489 $this->head_pointer->appendChild($element); 01490 01491 } else { 01492 $element = $this->insertElement($token); 01493 } 01494 01495 /* Switch the tokeniser's content model flag to the RCDATA state. */ 01496 return HTML5::RCDATA; 01497 01498 /* A start tag with the tag name "style" */ 01499 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { 01500 /* Create an element for the token and append the new element to the 01501 node pointed to by the head element pointer, or, if that is null 01502 (innerHTML case), to the current node. */ 01503 if($this->head_pointer !== null) { 01504 $element = $this->insertElement($token, false); 01505 $this->head_pointer->appendChild($element); 01506 01507 } else { 01508 $this->insertElement($token); 01509 } 01510 01511 /* Switch the tokeniser's content model flag to the CDATA state. */ 01512 return HTML5::CDATA; 01513 01514 /* A start tag with the tag name "script" */ 01515 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { 01516 /* Create an element for the token. */ 01517 $element = $this->insertElement($token, false); 01518 $this->head_pointer->appendChild($element); 01519 01520 /* Switch the tokeniser's content model flag to the CDATA state. */ 01521 return HTML5::CDATA; 01522 01523 /* A start tag with the tag name "base", "link", or "meta" */ 01524 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 01525 array('base', 'link', 'meta'))) { 01526 /* Create an element for the token and append the new element to the 01527 node pointed to by the head element pointer, or, if that is null 01528 (innerHTML case), to the current node. */ 01529 if($this->head_pointer !== null) { 01530 $element = $this->insertElement($token, false); 01531 $this->head_pointer->appendChild($element); 01532 array_pop($this->stack); 01533 01534 } else { 01535 $this->insertElement($token); 01536 } 01537 01538 /* An end tag with the tag name "head" */ 01539 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { 01540 /* If the current node is a head element, pop the current node off 01541 the stack of open elements. */ 01542 if($this->head_pointer->isSameNode(end($this->stack))) { 01543 array_pop($this->stack); 01544 01545 /* Otherwise, this is a parse error. */ 01546 } else { 01547 // k 01548 } 01549 01550 /* Change the insertion mode to "after head". */ 01551 $this->mode = self::AFTER_HEAD; 01552 01553 /* A start tag with the tag name "head" or an end tag except "html". */ 01554 } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || 01555 ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { 01556 // Parse error. Ignore the token. 01557 01558 /* Anything else */ 01559 } else { 01560 /* If the current node is a head element, act as if an end tag 01561 token with the tag name "head" had been seen. */ 01562 if($this->head_pointer->isSameNode(end($this->stack))) { 01563 $this->inHead(array( 01564 'name' => 'head', 01565 'type' => HTML5::ENDTAG 01566 )); 01567 01568 /* Otherwise, change the insertion mode to "after head". */ 01569 } else { 01570 $this->mode = self::AFTER_HEAD; 01571 } 01572 01573 /* Then, reprocess the current token. */ 01574 return $this->afterHead($token); 01575 } 01576 } 01577 01578 private function afterHead($token) { 01579 /* Handle the token as follows: */ 01580 01581 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 01582 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 01583 or U+0020 SPACE */ 01584 if($token['type'] === HTML5::CHARACTR && 01585 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 01586 /* Append the character to the current node. */ 01587 $this->insertText($token['data']); 01588 01589 /* A comment token */ 01590 } elseif($token['type'] === HTML5::COMMENT) { 01591 /* Append a Comment node to the current node with the data attribute 01592 set to the data given in the comment token. */ 01593 $this->insertComment($token['data']); 01594 01595 /* A start tag token with the tag name "body" */ 01596 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { 01597 /* Insert a body element for the token. */ 01598 $this->insertElement($token); 01599 01600 /* Change the insertion mode to "in body". */ 01601 $this->mode = self::IN_BODY; 01602 01603 /* A start tag token with the tag name "frameset" */ 01604 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { 01605 /* Insert a frameset element for the token. */ 01606 $this->insertElement($token); 01607 01608 /* Change the insertion mode to "in frameset". */ 01609 $this->mode = self::IN_FRAME; 01610 01611 /* A start tag token whose tag name is one of: "base", "link", "meta", 01612 "script", "style", "title" */ 01613 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 01614 array('base', 'link', 'meta', 'script', 'style', 'title'))) { 01615 /* Parse error. Switch the insertion mode back to "in head" and 01616 reprocess the token. */ 01617 $this->mode = self::IN_HEAD; 01618 return $this->inHead($token); 01619 01620 /* Anything else */ 01621 } else { 01622 /* Act as if a start tag token with the tag name "body" and no 01623 attributes had been seen, and then reprocess the current token. */ 01624 $this->afterHead(array( 01625 'name' => 'body', 01626 'type' => HTML5::STARTTAG, 01627 'attr' => array() 01628 )); 01629 01630 return $this->inBody($token); 01631 } 01632 } 01633 01634 private function inBody($token) { 01635 /* Handle the token as follows: */ 01636 01637 switch($token['type']) { 01638 /* A character token */ 01639 case HTML5::CHARACTR: 01640 /* Reconstruct the active formatting elements, if any. */ 01641 $this->reconstructActiveFormattingElements(); 01642 01643 /* Append the token's character to the current node. */ 01644 $this->insertText($token['data']); 01645 break; 01646 01647 /* A comment token */ 01648 case HTML5::COMMENT: 01649 /* Append a Comment node to the current node with the data 01650 attribute set to the data given in the comment token. */ 01651 $this->insertComment($token['data']); 01652 break; 01653 01654 case HTML5::STARTTAG: 01655 switch($token['name']) { 01656 /* A start tag token whose tag name is one of: "script", 01657 "style" */ 01658 case 'script': case 'style': 01659 /* Process the token as if the insertion mode had been "in 01660 head". */ 01661 return $this->inHead($token); 01662 break; 01663 01664 /* A start tag token whose tag name is one of: "base", "link", 01665 "meta", "title" */ 01666 case 'base': case 'link': case 'meta': case 'title': 01667 /* Parse error. Process the token as if the insertion mode 01668 had been "in head". */ 01669 return $this->inHead($token); 01670 break; 01671 01672 /* A start tag token with the tag name "body" */ 01673 case 'body': 01674 /* Parse error. If the second element on the stack of open 01675 elements is not a body element, or, if the stack of open 01676 elements has only one node on it, then ignore the token. 01677 (innerHTML case) */ 01678 if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { 01679 // Ignore 01680 01681 /* Otherwise, for each attribute on the token, check to see 01682 if the attribute is already present on the body element (the 01683 second element) on the stack of open elements. If it is not, 01684 add the attribute and its corresponding value to that 01685 element. */ 01686 } else { 01687 foreach($token['attr'] as $attr) { 01688 if(!$this->stack[1]->hasAttribute($attr['name'])) { 01689 $this->stack[1]->setAttribute($attr['name'], $attr['value']); 01690 } 01691 } 01692 } 01693 break; 01694 01695 /* A start tag whose tag name is one of: "address", 01696 "blockquote", "center", "dir", "div", "dl", "fieldset", 01697 "listing", "menu", "ol", "p", "ul" */ 01698 case 'address': case 'blockquote': case 'center': case 'dir': 01699 case 'div': case 'dl': case 'fieldset': case 'listing': 01700 case 'menu': case 'ol': case 'p': case 'ul': 01701 /* If the stack of open elements has a p element in scope, 01702 then act as if an end tag with the tag name p had been 01703 seen. */ 01704 if($this->elementInScope('p')) { 01705 $this->emitToken(array( 01706 'name' => 'p', 01707 'type' => HTML5::ENDTAG 01708 )); 01709 } 01710 01711 /* Insert an HTML element for the token. */ 01712 $this->insertElement($token); 01713 break; 01714 01715 /* A start tag whose tag name is "form" */ 01716 case 'form': 01717 /* If the form element pointer is not null, ignore the 01718 token with a parse error. */ 01719 if($this->form_pointer !== null) { 01720 // Ignore. 01721 01722 /* Otherwise: */ 01723 } else { 01724 /* If the stack of open elements has a p element in 01725 scope, then act as if an end tag with the tag name p 01726 had been seen. */ 01727 if($this->elementInScope('p')) { 01728 $this->emitToken(array( 01729 'name' => 'p', 01730 'type' => HTML5::ENDTAG 01731 )); 01732 } 01733 01734 /* Insert an HTML element for the token, and set the 01735 form element pointer to point to the element created. */ 01736 $element = $this->insertElement($token); 01737 $this->form_pointer = $element; 01738 } 01739 break; 01740 01741 /* A start tag whose tag name is "li", "dd" or "dt" */ 01742 case 'li': case 'dd': case 'dt': 01743 /* If the stack of open elements has a p element in scope, 01744 then act as if an end tag with the tag name p had been 01745 seen. */ 01746 if($this->elementInScope('p')) { 01747 $this->emitToken(array( 01748 'name' => 'p', 01749 'type' => HTML5::ENDTAG 01750 )); 01751 } 01752 01753 $stack_length = count($this->stack) - 1; 01754 01755 for($n = $stack_length; 0 <= $n; $n--) { 01756 /* 1. Initialise node to be the current node (the 01757 bottommost node of the stack). */ 01758 $stop = false; 01759 $node = $this->stack[$n]; 01760 $cat = $this->getElementCategory($node->tagName); 01761 01762 /* 2. If node is an li, dd or dt element, then pop all 01763 the nodes from the current node up to node, including 01764 node, then stop this algorithm. */ 01765 if($token['name'] === $node->tagName || ($token['name'] !== 'li' 01766 && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { 01767 for($x = $stack_length; $x >= $n ; $x--) { 01768 array_pop($this->stack); 01769 } 01770 01771 break; 01772 } 01773 01774 /* 3. If node is not in the formatting category, and is 01775 not in the phrasing category, and is not an address or 01776 div element, then stop this algorithm. */ 01777 if($cat !== self::FORMATTING && $cat !== self::PHRASING && 01778 $node->tagName !== 'address' && $node->tagName !== 'div') { 01779 break; 01780 } 01781 } 01782 01783 /* Finally, insert an HTML element with the same tag 01784 name as the token's. */ 01785 $this->insertElement($token); 01786 break; 01787 01788 /* A start tag token whose tag name is "plaintext" */ 01789 case 'plaintext': 01790 /* If the stack of open elements has a p element in scope, 01791 then act as if an end tag with the tag name p had been 01792 seen. */ 01793 if($this->elementInScope('p')) { 01794 $this->emitToken(array( 01795 'name' => 'p', 01796 'type' => HTML5::ENDTAG 01797 )); 01798 } 01799 01800 /* Insert an HTML element for the token. */ 01801 $this->insertElement($token); 01802 01803 return HTML5::PLAINTEXT; 01804 break; 01805 01806 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", 01807 "h5", "h6" */ 01808 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': 01809 /* If the stack of open elements has a p element in scope, 01810 then act as if an end tag with the tag name p had been seen. */ 01811 if($this->elementInScope('p')) { 01812 $this->emitToken(array( 01813 'name' => 'p', 01814 'type' => HTML5::ENDTAG 01815 )); 01816 } 01817 01818 /* If the stack of open elements has in scope an element whose 01819 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then 01820 this is a parse error; pop elements from the stack until an 01821 element with one of those tag names has been popped from the 01822 stack. */ 01823 while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { 01824 array_pop($this->stack); 01825 } 01826 01827 /* Insert an HTML element for the token. */ 01828 $this->insertElement($token); 01829 break; 01830 01831 /* A start tag whose tag name is "a" */ 01832 case 'a': 01833 /* If the list of active formatting elements contains 01834 an element whose tag name is "a" between the end of the 01835 list and the last marker on the list (or the start of 01836 the list if there is no marker on the list), then this 01837 is a parse error; act as if an end tag with the tag name 01838 "a" had been seen, then remove that element from the list 01839 of active formatting elements and the stack of open 01840 elements if the end tag didn't already remove it (it 01841 might not have if the element is not in table scope). */ 01842 $leng = count($this->a_formatting); 01843 01844 for($n = $leng - 1; $n >= 0; $n--) { 01845 if($this->a_formatting[$n] === self::MARKER) { 01846 break; 01847 01848 } elseif($this->a_formatting[$n]->nodeName === 'a') { 01849 $this->emitToken(array( 01850 'name' => 'a', 01851 'type' => HTML5::ENDTAG 01852 )); 01853 break; 01854 } 01855 } 01856 01857 /* Reconstruct the active formatting elements, if any. */ 01858 $this->reconstructActiveFormattingElements(); 01859 01860 /* Insert an HTML element for the token. */ 01861 $el = $this->insertElement($token); 01862 01863 /* Add that element to the list of active formatting 01864 elements. */ 01865 $this->a_formatting[] = $el; 01866 break; 01867 01868 /* A start tag whose tag name is one of: "b", "big", "em", "font", 01869 "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ 01870 case 'b': case 'big': case 'em': case 'font': case 'i': 01871 case 'nobr': case 's': case 'small': case 'strike': 01872 case 'strong': case 'tt': case 'u': 01873 /* Reconstruct the active formatting elements, if any. */ 01874 $this->reconstructActiveFormattingElements(); 01875 01876 /* Insert an HTML element for the token. */ 01877 $el = $this->insertElement($token); 01878 01879 /* Add that element to the list of active formatting 01880 elements. */ 01881 $this->a_formatting[] = $el; 01882 break; 01883 01884 /* A start tag token whose tag name is "button" */ 01885 case 'button': 01886 /* If the stack of open elements has a button element in scope, 01887 then this is a parse error; act as if an end tag with the tag 01888 name "button" had been seen, then reprocess the token. (We don't 01889 do that. Unnecessary.) */ 01890 if($this->elementInScope('button')) { 01891 $this->inBody(array( 01892 'name' => 'button', 01893 'type' => HTML5::ENDTAG 01894 )); 01895 } 01896 01897 /* Reconstruct the active formatting elements, if any. */ 01898 $this->reconstructActiveFormattingElements(); 01899 01900 /* Insert an HTML element for the token. */ 01901 $this->insertElement($token); 01902 01903 /* Insert a marker at the end of the list of active 01904 formatting elements. */ 01905 $this->a_formatting[] = self::MARKER; 01906 break; 01907 01908 /* A start tag token whose tag name is one of: "marquee", "object" */ 01909 case 'marquee': case 'object': 01910 /* Reconstruct the active formatting elements, if any. */ 01911 $this->reconstructActiveFormattingElements(); 01912 01913 /* Insert an HTML element for the token. */ 01914 $this->insertElement($token); 01915 01916 /* Insert a marker at the end of the list of active 01917 formatting elements. */ 01918 $this->a_formatting[] = self::MARKER; 01919 break; 01920 01921 /* A start tag token whose tag name is "xmp" */ 01922 case 'xmp': 01923 /* Reconstruct the active formatting elements, if any. */ 01924 $this->reconstructActiveFormattingElements(); 01925 01926 /* Insert an HTML element for the token. */ 01927 $this->insertElement($token); 01928 01929 /* Switch the content model flag to the CDATA state. */ 01930 return HTML5::CDATA; 01931 break; 01932 01933 /* A start tag whose tag name is "table" */ 01934 case 'table': 01935 /* If the stack of open elements has a p element in scope, 01936 then act as if an end tag with the tag name p had been seen. */ 01937 if($this->elementInScope('p')) { 01938 $this->emitToken(array( 01939 'name' => 'p', 01940 'type' => HTML5::ENDTAG 01941 )); 01942 } 01943 01944 /* Insert an HTML element for the token. */ 01945 $this->insertElement($token); 01946 01947 /* Change the insertion mode to "in table". */ 01948 $this->mode = self::IN_TABLE; 01949 break; 01950 01951 /* A start tag whose tag name is one of: "area", "basefont", 01952 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ 01953 case 'area': case 'basefont': case 'bgsound': case 'br': 01954 case 'embed': case 'img': case 'param': case 'spacer': 01955 case 'wbr': 01956 /* Reconstruct the active formatting elements, if any. */ 01957 $this->reconstructActiveFormattingElements(); 01958 01959 /* Insert an HTML element for the token. */ 01960 $this->insertElement($token); 01961 01962 /* Immediately pop the current node off the stack of open elements. */ 01963 array_pop($this->stack); 01964 break; 01965 01966 /* A start tag whose tag name is "hr" */ 01967 case 'hr': 01968 /* If the stack of open elements has a p element in scope, 01969 then act as if an end tag with the tag name p had been seen. */ 01970 if($this->elementInScope('p')) { 01971 $this->emitToken(array( 01972 'name' => 'p', 01973 'type' => HTML5::ENDTAG 01974 )); 01975 } 01976 01977 /* Insert an HTML element for the token. */ 01978 $this->insertElement($token); 01979 01980 /* Immediately pop the current node off the stack of open elements. */ 01981 array_pop($this->stack); 01982 break; 01983 01984 /* A start tag whose tag name is "image" */ 01985 case 'image': 01986 /* Parse error. Change the token's tag name to "img" and 01987 reprocess it. (Don't ask.) */ 01988 $token['name'] = 'img'; 01989 return $this->inBody($token); 01990 break; 01991 01992 /* A start tag whose tag name is "input" */ 01993 case 'input': 01994 /* Reconstruct the active formatting elements, if any. */ 01995 $this->reconstructActiveFormattingElements(); 01996 01997 /* Insert an input element for the token. */ 01998 $element = $this->insertElement($token, false); 01999 02000 /* If the form element pointer is not null, then associate the 02001 input element with the form element pointed to by the form 02002 element pointer. */ 02003 $this->form_pointer !== null 02004 ? $this->form_pointer->appendChild($element) 02005 : end($this->stack)->appendChild($element); 02006 02007 /* Pop that input element off the stack of open elements. */ 02008 array_pop($this->stack); 02009 break; 02010 02011 /* A start tag whose tag name is "isindex" */ 02012 case 'isindex': 02013 /* Parse error. */ 02014 // w/e 02015 02016 /* If the form element pointer is not null, 02017 then ignore the token. */ 02018 if($this->form_pointer === null) { 02019 /* Act as if a start tag token with the tag name "form" had 02020 been seen. */ 02021 $this->inBody(array( 02022 'name' => 'body', 02023 'type' => HTML5::STARTTAG, 02024 'attr' => array() 02025 )); 02026 02027 /* Act as if a start tag token with the tag name "hr" had 02028 been seen. */ 02029 $this->inBody(array( 02030 'name' => 'hr', 02031 'type' => HTML5::STARTTAG, 02032 'attr' => array() 02033 )); 02034 02035 /* Act as if a start tag token with the tag name "p" had 02036 been seen. */ 02037 $this->inBody(array( 02038 'name' => 'p', 02039 'type' => HTML5::STARTTAG, 02040 'attr' => array() 02041 )); 02042 02043 /* Act as if a start tag token with the tag name "label" 02044 had been seen. */ 02045 $this->inBody(array( 02046 'name' => 'label', 02047 'type' => HTML5::STARTTAG, 02048 'attr' => array() 02049 )); 02050 02051 /* Act as if a stream of character tokens had been seen. */ 02052 $this->insertText('This is a searchable index. '. 02053 'Insert your search keywords here: '); 02054 02055 /* Act as if a start tag token with the tag name "input" 02056 had been seen, with all the attributes from the "isindex" 02057 token, except with the "name" attribute set to the value 02058 "isindex" (ignoring any explicit "name" attribute). */ 02059 $attr = $token['attr']; 02060 $attr[] = array('name' => 'name', 'value' => 'isindex'); 02061 02062 $this->inBody(array( 02063 'name' => 'input', 02064 'type' => HTML5::STARTTAG, 02065 'attr' => $attr 02066 )); 02067 02068 /* Act as if a stream of character tokens had been seen 02069 (see below for what they should say). */ 02070 $this->insertText('This is a searchable index. '. 02071 'Insert your search keywords here: '); 02072 02073 /* Act as if an end tag token with the tag name "label" 02074 had been seen. */ 02075 $this->inBody(array( 02076 'name' => 'label', 02077 'type' => HTML5::ENDTAG 02078 )); 02079 02080 /* Act as if an end tag token with the tag name "p" had 02081 been seen. */ 02082 $this->inBody(array( 02083 'name' => 'p', 02084 'type' => HTML5::ENDTAG 02085 )); 02086 02087 /* Act as if a start tag token with the tag name "hr" had 02088 been seen. */ 02089 $this->inBody(array( 02090 'name' => 'hr', 02091 'type' => HTML5::ENDTAG 02092 )); 02093 02094 /* Act as if an end tag token with the tag name "form" had 02095 been seen. */ 02096 $this->inBody(array( 02097 'name' => 'form', 02098 'type' => HTML5::ENDTAG 02099 )); 02100 } 02101 break; 02102 02103 /* A start tag whose tag name is "textarea" */ 02104 case 'textarea': 02105 $this->insertElement($token); 02106 02107 /* Switch the tokeniser's content model flag to the 02108 RCDATA state. */ 02109 return HTML5::RCDATA; 02110 break; 02111 02112 /* A start tag whose tag name is one of: "iframe", "noembed", 02113 "noframes" */ 02114 case 'iframe': case 'noembed': case 'noframes': 02115 $this->insertElement($token); 02116 02117 /* Switch the tokeniser's content model flag to the CDATA state. */ 02118 return HTML5::CDATA; 02119 break; 02120 02121 /* A start tag whose tag name is "select" */ 02122 case 'select': 02123 /* Reconstruct the active formatting elements, if any. */ 02124 $this->reconstructActiveFormattingElements(); 02125 02126 /* Insert an HTML element for the token. */ 02127 $this->insertElement($token); 02128 02129 /* Change the insertion mode to "in select". */ 02130 $this->mode = self::IN_SELECT; 02131 break; 02132 02133 /* A start or end tag whose tag name is one of: "caption", "col", 02134 "colgroup", "frame", "frameset", "head", "option", "optgroup", 02135 "tbody", "td", "tfoot", "th", "thead", "tr". */ 02136 case 'caption': case 'col': case 'colgroup': case 'frame': 02137 case 'frameset': case 'head': case 'option': case 'optgroup': 02138 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': 02139 case 'tr': 02140 // Parse error. Ignore the token. 02141 break; 02142 02143 /* A start or end tag whose tag name is one of: "event-source", 02144 "section", "nav", "article", "aside", "header", "footer", 02145 "datagrid", "command" */ 02146 case 'event-source': case 'section': case 'nav': case 'article': 02147 case 'aside': case 'header': case 'footer': case 'datagrid': 02148 case 'command': 02149 // Work in progress! 02150 break; 02151 02152 /* A start tag token not covered by the previous entries */ 02153 default: 02154 /* Reconstruct the active formatting elements, if any. */ 02155 $this->reconstructActiveFormattingElements(); 02156 02157 $this->insertElement($token, true, true); 02158 break; 02159 } 02160 break; 02161 02162 case HTML5::ENDTAG: 02163 switch($token['name']) { 02164 /* An end tag with the tag name "body" */ 02165 case 'body': 02166 /* If the second element in the stack of open elements is 02167 not a body element, this is a parse error. Ignore the token. 02168 (innerHTML case) */ 02169 if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { 02170 // Ignore. 02171 02172 /* If the current node is not the body element, then this 02173 is a parse error. */ 02174 } elseif(end($this->stack)->nodeName !== 'body') { 02175 // Parse error. 02176 } 02177 02178 /* Change the insertion mode to "after body". */ 02179 $this->mode = self::AFTER_BODY; 02180 break; 02181 02182 /* An end tag with the tag name "html" */ 02183 case 'html': 02184 /* Act as if an end tag with tag name "body" had been seen, 02185 then, if that token wasn't ignored, reprocess the current 02186 token. */ 02187 $this->inBody(array( 02188 'name' => 'body', 02189 'type' => HTML5::ENDTAG 02190 )); 02191 02192 return $this->afterBody($token); 02193 break; 02194 02195 /* An end tag whose tag name is one of: "address", "blockquote", 02196 "center", "dir", "div", "dl", "fieldset", "listing", "menu", 02197 "ol", "pre", "ul" */ 02198 case 'address': case 'blockquote': case 'center': case 'dir': 02199 case 'div': case 'dl': case 'fieldset': case 'listing': 02200 case 'menu': case 'ol': case 'pre': case 'ul': 02201 /* If the stack of open elements has an element in scope 02202 with the same tag name as that of the token, then generate 02203 implied end tags. */ 02204 if($this->elementInScope($token['name'])) { 02205 $this->generateImpliedEndTags(); 02206 02207 /* Now, if the current node is not an element with 02208 the same tag name as that of the token, then this 02209 is a parse error. */ 02210 // w/e 02211 02212 /* If the stack of open elements has an element in 02213 scope with the same tag name as that of the token, 02214 then pop elements from this stack until an element 02215 with that tag name has been popped from the stack. */ 02216 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02217 if($this->stack[$n]->nodeName === $token['name']) { 02218 $n = -1; 02219 } 02220 02221 array_pop($this->stack); 02222 } 02223 } 02224 break; 02225 02226 /* An end tag whose tag name is "form" */ 02227 case 'form': 02228 /* If the stack of open elements has an element in scope 02229 with the same tag name as that of the token, then generate 02230 implied end tags. */ 02231 if($this->elementInScope($token['name'])) { 02232 $this->generateImpliedEndTags(); 02233 02234 } 02235 02236 if(end($this->stack)->nodeName !== $token['name']) { 02237 /* Now, if the current node is not an element with the 02238 same tag name as that of the token, then this is a parse 02239 error. */ 02240 // w/e 02241 02242 } else { 02243 /* Otherwise, if the current node is an element with 02244 the same tag name as that of the token pop that element 02245 from the stack. */ 02246 array_pop($this->stack); 02247 } 02248 02249 /* In any case, set the form element pointer to null. */ 02250 $this->form_pointer = null; 02251 break; 02252 02253 /* An end tag whose tag name is "p" */ 02254 case 'p': 02255 /* If the stack of open elements has a p element in scope, 02256 then generate implied end tags, except for p elements. */ 02257 if($this->elementInScope('p')) { 02258 $this->generateImpliedEndTags(array('p')); 02259 02260 /* If the current node is not a p element, then this is 02261 a parse error. */ 02262 // k 02263 02264 /* If the stack of open elements has a p element in 02265 scope, then pop elements from this stack until the stack 02266 no longer has a p element in scope. */ 02267 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02268 if($this->elementInScope('p')) { 02269 array_pop($this->stack); 02270 02271 } else { 02272 break; 02273 } 02274 } 02275 } 02276 break; 02277 02278 /* An end tag whose tag name is "dd", "dt", or "li" */ 02279 case 'dd': case 'dt': case 'li': 02280 /* If the stack of open elements has an element in scope 02281 whose tag name matches the tag name of the token, then 02282 generate implied end tags, except for elements with the 02283 same tag name as the token. */ 02284 if($this->elementInScope($token['name'])) { 02285 $this->generateImpliedEndTags(array($token['name'])); 02286 02287 /* If the current node is not an element with the same 02288 tag name as the token, then this is a parse error. */ 02289 // w/e 02290 02291 /* If the stack of open elements has an element in scope 02292 whose tag name matches the tag name of the token, then 02293 pop elements from this stack until an element with that 02294 tag name has been popped from the stack. */ 02295 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02296 if($this->stack[$n]->nodeName === $token['name']) { 02297 $n = -1; 02298 } 02299 02300 array_pop($this->stack); 02301 } 02302 } 02303 break; 02304 02305 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", 02306 "h5", "h6" */ 02307 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': 02308 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); 02309 02310 /* If the stack of open elements has in scope an element whose 02311 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then 02312 generate implied end tags. */ 02313 if($this->elementInScope($elements)) { 02314 $this->generateImpliedEndTags(); 02315 02316 /* Now, if the current node is not an element with the same 02317 tag name as that of the token, then this is a parse error. */ 02318 // w/e 02319 02320 /* If the stack of open elements has in scope an element 02321 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or 02322 "h6", then pop elements from the stack until an element 02323 with one of those tag names has been popped from the stack. */ 02324 while($this->elementInScope($elements)) { 02325 array_pop($this->stack); 02326 } 02327 } 02328 break; 02329 02330 /* An end tag whose tag name is one of: "a", "b", "big", "em", 02331 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ 02332 case 'a': case 'b': case 'big': case 'em': case 'font': 02333 case 'i': case 'nobr': case 's': case 'small': case 'strike': 02334 case 'strong': case 'tt': case 'u': 02335 /* 1. Let the formatting element be the last element in 02336 the list of active formatting elements that: 02337 * is between the end of the list and the last scope 02338 marker in the list, if any, or the start of the list 02339 otherwise, and 02340 * has the same tag name as the token. 02341 */ 02342 while(true) { 02343 for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { 02344 if($this->a_formatting[$a] === self::MARKER) { 02345 break; 02346 02347 } elseif($this->a_formatting[$a]->tagName === $token['name']) { 02348 $formatting_element = $this->a_formatting[$a]; 02349 $in_stack = in_array($formatting_element, $this->stack, true); 02350 $fe_af_pos = $a; 02351 break; 02352 } 02353 } 02354 02355 /* If there is no such node, or, if that node is 02356 also in the stack of open elements but the element 02357 is not in scope, then this is a parse error. Abort 02358 these steps. The token is ignored. */ 02359 if(!isset($formatting_element) || ($in_stack && 02360 !$this->elementInScope($token['name']))) { 02361 break; 02362 02363 /* Otherwise, if there is such a node, but that node 02364 is not in the stack of open elements, then this is a 02365 parse error; remove the element from the list, and 02366 abort these steps. */ 02367 } elseif(isset($formatting_element) && !$in_stack) { 02368 unset($this->a_formatting[$fe_af_pos]); 02369 $this->a_formatting = array_merge($this->a_formatting); 02370 break; 02371 } 02372 02373 /* 2. Let the furthest block be the topmost node in the 02374 stack of open elements that is lower in the stack 02375 than the formatting element, and is not an element in 02376 the phrasing or formatting categories. There might 02377 not be one. */ 02378 $fe_s_pos = array_search($formatting_element, $this->stack, true); 02379 $length = count($this->stack); 02380 02381 for($s = $fe_s_pos + 1; $s < $length; $s++) { 02382 $category = $this->getElementCategory($this->stack[$s]->nodeName); 02383 02384 if($category !== self::PHRASING && $category !== self::FORMATTING) { 02385 $furthest_block = $this->stack[$s]; 02386 } 02387 } 02388 02389 /* 3. If there is no furthest block, then the UA must 02390 skip the subsequent steps and instead just pop all 02391 the nodes from the bottom of the stack of open 02392 elements, from the current node up to the formatting 02393 element, and remove the formatting element from the 02394 list of active formatting elements. */ 02395 if(!isset($furthest_block)) { 02396 for($n = $length - 1; $n >= $fe_s_pos; $n--) { 02397 array_pop($this->stack); 02398 } 02399 02400 unset($this->a_formatting[$fe_af_pos]); 02401 $this->a_formatting = array_merge($this->a_formatting); 02402 break; 02403 } 02404 02405 /* 4. Let the common ancestor be the element 02406 immediately above the formatting element in the stack 02407 of open elements. */ 02408 $common_ancestor = $this->stack[$fe_s_pos - 1]; 02409 02410 /* 5. If the furthest block has a parent node, then 02411 remove the furthest block from its parent node. */ 02412 if($furthest_block->parentNode !== null) { 02413 $furthest_block->parentNode->removeChild($furthest_block); 02414 } 02415 02416 /* 6. Let a bookmark note the position of the 02417 formatting element in the list of active formatting 02418 elements relative to the elements on either side 02419 of it in the list. */ 02420 $bookmark = $fe_af_pos; 02421 02422 /* 7. Let node and last node be the furthest block. 02423 Follow these steps: */ 02424 $node = $furthest_block; 02425 $last_node = $furthest_block; 02426 02427 while(true) { 02428 for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { 02429 /* 7.1 Let node be the element immediately 02430 prior to node in the stack of open elements. */ 02431 $node = $this->stack[$n]; 02432 02433 /* 7.2 If node is not in the list of active 02434 formatting elements, then remove node from 02435 the stack of open elements and then go back 02436 to step 1. */ 02437 if(!in_array($node, $this->a_formatting, true)) { 02438 unset($this->stack[$n]); 02439 $this->stack = array_merge($this->stack); 02440 02441 } else { 02442 break; 02443 } 02444 } 02445 02446 /* 7.3 Otherwise, if node is the formatting 02447 element, then go to the next step in the overall 02448 algorithm. */ 02449 if($node === $formatting_element) { 02450 break; 02451 02452 /* 7.4 Otherwise, if last node is the furthest 02453 block, then move the aforementioned bookmark to 02454 be immediately after the node in the list of 02455 active formatting elements. */ 02456 } elseif($last_node === $furthest_block) { 02457 $bookmark = array_search($node, $this->a_formatting, true) + 1; 02458 } 02459 02460 /* 7.5 If node has any children, perform a 02461 shallow clone of node, replace the entry for 02462 node in the list of active formatting elements 02463 with an entry for the clone, replace the entry 02464 for node in the stack of open elements with an 02465 entry for the clone, and let node be the clone. */ 02466 if($node->hasChildNodes()) { 02467 $clone = $node->cloneNode(); 02468 $s_pos = array_search($node, $this->stack, true); 02469 $a_pos = array_search($node, $this->a_formatting, true); 02470 02471 $this->stack[$s_pos] = $clone; 02472 $this->a_formatting[$a_pos] = $clone; 02473 $node = $clone; 02474 } 02475 02476 /* 7.6 Insert last node into node, first removing 02477 it from its previous parent node if any. */ 02478 if($last_node->parentNode !== null) { 02479 $last_node->parentNode->removeChild($last_node); 02480 } 02481 02482 $node->appendChild($last_node); 02483 02484 /* 7.7 Let last node be node. */ 02485 $last_node = $node; 02486 } 02487 02488 /* 8. Insert whatever last node ended up being in 02489 the previous step into the common ancestor node, 02490 first removing it from its previous parent node if 02491 any. */ 02492 if($last_node->parentNode !== null) { 02493 $last_node->parentNode->removeChild($last_node); 02494 } 02495 02496 $common_ancestor->appendChild($last_node); 02497 02498 /* 9. Perform a shallow clone of the formatting 02499 element. */ 02500 $clone = $formatting_element->cloneNode(); 02501 02502 /* 10. Take all of the child nodes of the furthest 02503 block and append them to the clone created in the 02504 last step. */ 02505 while($furthest_block->hasChildNodes()) { 02506 $child = $furthest_block->firstChild; 02507 $furthest_block->removeChild($child); 02508 $clone->appendChild($child); 02509 } 02510 02511 /* 11. Append that clone to the furthest block. */ 02512 $furthest_block->appendChild($clone); 02513 02514 /* 12. Remove the formatting element from the list 02515 of active formatting elements, and insert the clone 02516 into the list of active formatting elements at the 02517 position of the aforementioned bookmark. */ 02518 $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); 02519 unset($this->a_formatting[$fe_af_pos]); 02520 $this->a_formatting = array_merge($this->a_formatting); 02521 02522 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); 02523 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); 02524 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); 02525 02526 /* 13. Remove the formatting element from the stack 02527 of open elements, and insert the clone into the stack 02528 of open elements immediately after (i.e. in a more 02529 deeply nested position than) the position of the 02530 furthest block in that stack. */ 02531 $fe_s_pos = array_search($formatting_element, $this->stack, true); 02532 $fb_s_pos = array_search($furthest_block, $this->stack, true); 02533 unset($this->stack[$fe_s_pos]); 02534 02535 $s_part1 = array_slice($this->stack, 0, $fb_s_pos); 02536 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); 02537 $this->stack = array_merge($s_part1, array($clone), $s_part2); 02538 02539 /* 14. Jump back to step 1 in this series of steps. */ 02540 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); 02541 } 02542 break; 02543 02544 /* An end tag token whose tag name is one of: "button", 02545 "marquee", "object" */ 02546 case 'button': case 'marquee': case 'object': 02547 /* If the stack of open elements has an element in scope whose 02548 tag name matches the tag name of the token, then generate implied 02549 tags. */ 02550 if($this->elementInScope($token['name'])) { 02551 $this->generateImpliedEndTags(); 02552 02553 /* Now, if the current node is not an element with the same 02554 tag name as the token, then this is a parse error. */ 02555 // k 02556 02557 /* Now, if the stack of open elements has an element in scope 02558 whose tag name matches the tag name of the token, then pop 02559 elements from the stack until that element has been popped from 02560 the stack, and clear the list of active formatting elements up 02561 to the last marker. */ 02562 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02563 if($this->stack[$n]->nodeName === $token['name']) { 02564 $n = -1; 02565 } 02566 02567 array_pop($this->stack); 02568 } 02569 02570 $marker = end(array_keys($this->a_formatting, self::MARKER, true)); 02571 02572 for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { 02573 array_pop($this->a_formatting); 02574 } 02575 } 02576 break; 02577 02578 /* Or an end tag whose tag name is one of: "area", "basefont", 02579 "bgsound", "br", "embed", "hr", "iframe", "image", "img", 02580 "input", "isindex", "noembed", "noframes", "param", "select", 02581 "spacer", "table", "textarea", "wbr" */ 02582 case 'area': case 'basefont': case 'bgsound': case 'br': 02583 case 'embed': case 'hr': case 'iframe': case 'image': 02584 case 'img': case 'input': case 'isindex': case 'noembed': 02585 case 'noframes': case 'param': case 'select': case 'spacer': 02586 case 'table': case 'textarea': case 'wbr': 02587 // Parse error. Ignore the token. 02588 break; 02589 02590 /* An end tag token not covered by the previous entries */ 02591 default: 02592 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02593 /* Initialise node to be the current node (the bottommost 02594 node of the stack). */ 02595 $node = end($this->stack); 02596 02597 /* If node has the same tag name as the end tag token, 02598 then: */ 02599 if($token['name'] === $node->nodeName) { 02600 /* Generate implied end tags. */ 02601 $this->generateImpliedEndTags(); 02602 02603 /* If the tag name of the end tag token does not 02604 match the tag name of the current node, this is a 02605 parse error. */ 02606 // k 02607 02608 /* Pop all the nodes from the current node up to 02609 node, including node, then stop this algorithm. */ 02610 for($x = count($this->stack) - $n; $x >= $n; $x--) { 02611 array_pop($this->stack); 02612 } 02613 02614 } else { 02615 $category = $this->getElementCategory($node); 02616 02617 if($category !== self::SPECIAL && $category !== self::SCOPING) { 02618 /* Otherwise, if node is in neither the formatting 02619 category nor the phrasing category, then this is a 02620 parse error. Stop this algorithm. The end tag token 02621 is ignored. */ 02622 return false; 02623 } 02624 } 02625 } 02626 break; 02627 } 02628 break; 02629 } 02630 } 02631 02632 private function inTable($token) { 02633 $clear = array('html', 'table'); 02634 02635 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 02636 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 02637 or U+0020 SPACE */ 02638 if($token['type'] === HTML5::CHARACTR && 02639 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 02640 /* Append the character to the current node. */ 02641 $text = $this->dom->createTextNode($token['data']); 02642 end($this->stack)->appendChild($text); 02643 02644 /* A comment token */ 02645 } elseif($token['type'] === HTML5::COMMENT) { 02646 /* Append a Comment node to the current node with the data 02647 attribute set to the data given in the comment token. */ 02648 $comment = $this->dom->createComment($token['data']); 02649 end($this->stack)->appendChild($comment); 02650 02651 /* A start tag whose tag name is "caption" */ 02652 } elseif($token['type'] === HTML5::STARTTAG && 02653 $token['name'] === 'caption') { 02654 /* Clear the stack back to a table context. */ 02655 $this->clearStackToTableContext($clear); 02656 02657 /* Insert a marker at the end of the list of active 02658 formatting elements. */ 02659 $this->a_formatting[] = self::MARKER; 02660 02661 /* Insert an HTML element for the token, then switch the 02662 insertion mode to "in caption". */ 02663 $this->insertElement($token); 02664 $this->mode = self::IN_CAPTION; 02665 02666 /* A start tag whose tag name is "colgroup" */ 02667 } elseif($token['type'] === HTML5::STARTTAG && 02668 $token['name'] === 'colgroup') { 02669 /* Clear the stack back to a table context. */ 02670 $this->clearStackToTableContext($clear); 02671 02672 /* Insert an HTML element for the token, then switch the 02673 insertion mode to "in column group". */ 02674 $this->insertElement($token); 02675 $this->mode = self::IN_CGROUP; 02676 02677 /* A start tag whose tag name is "col" */ 02678 } elseif($token['type'] === HTML5::STARTTAG && 02679 $token['name'] === 'col') { 02680 $this->inTable(array( 02681 'name' => 'colgroup', 02682 'type' => HTML5::STARTTAG, 02683 'attr' => array() 02684 )); 02685 02686 $this->inColumnGroup($token); 02687 02688 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ 02689 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 02690 array('tbody', 'tfoot', 'thead'))) { 02691 /* Clear the stack back to a table context. */ 02692 $this->clearStackToTableContext($clear); 02693 02694 /* Insert an HTML element for the token, then switch the insertion 02695 mode to "in table body". */ 02696 $this->insertElement($token); 02697 $this->mode = self::IN_TBODY; 02698 02699 /* A start tag whose tag name is one of: "td", "th", "tr" */ 02700 } elseif($token['type'] === HTML5::STARTTAG && 02701 in_array($token['name'], array('td', 'th', 'tr'))) { 02702 /* Act as if a start tag token with the tag name "tbody" had been 02703 seen, then reprocess the current token. */ 02704 $this->inTable(array( 02705 'name' => 'tbody', 02706 'type' => HTML5::STARTTAG, 02707 'attr' => array() 02708 )); 02709 02710 return $this->inTableBody($token); 02711 02712 /* A start tag whose tag name is "table" */ 02713 } elseif($token['type'] === HTML5::STARTTAG && 02714 $token['name'] === 'table') { 02715 /* Parse error. Act as if an end tag token with the tag name "table" 02716 had been seen, then, if that token wasn't ignored, reprocess the 02717 current token. */ 02718 $this->inTable(array( 02719 'name' => 'table', 02720 'type' => HTML5::ENDTAG 02721 )); 02722 02723 return $this->mainPhase($token); 02724 02725 /* An end tag whose tag name is "table" */ 02726 } elseif($token['type'] === HTML5::ENDTAG && 02727 $token['name'] === 'table') { 02728 /* If the stack of open elements does not have an element in table 02729 scope with the same tag name as the token, this is a parse error. 02730 Ignore the token. (innerHTML case) */ 02731 if(!$this->elementInScope($token['name'], true)) { 02732 return false; 02733 02734 /* Otherwise: */ 02735 } else { 02736 /* Generate implied end tags. */ 02737 $this->generateImpliedEndTags(); 02738 02739 /* Now, if the current node is not a table element, then this 02740 is a parse error. */ 02741 // w/e 02742 02743 /* Pop elements from this stack until a table element has been 02744 popped from the stack. */ 02745 while(true) { 02746 $current = end($this->stack)->nodeName; 02747 array_pop($this->stack); 02748 02749 if($current === 'table') { 02750 break; 02751 } 02752 } 02753 02754 /* Reset the insertion mode appropriately. */ 02755 $this->resetInsertionMode(); 02756 } 02757 02758 /* An end tag whose tag name is one of: "body", "caption", "col", 02759 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ 02760 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 02761 array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 02762 'tfoot', 'th', 'thead', 'tr'))) { 02763 // Parse error. Ignore the token. 02764 02765 /* Anything else */ 02766 } else { 02767 /* Parse error. Process the token as if the insertion mode was "in 02768 body", with the following exception: */ 02769 02770 /* If the current node is a table, tbody, tfoot, thead, or tr 02771 element, then, whenever a node would be inserted into the current 02772 node, it must instead be inserted into the foster parent element. */ 02773 if(in_array(end($this->stack)->nodeName, 02774 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { 02775 /* The foster parent element is the parent element of the last 02776 table element in the stack of open elements, if there is a 02777 table element and it has such a parent element. If there is no 02778 table element in the stack of open elements (innerHTML case), 02779 then the foster parent element is the first element in the 02780 stack of open elements (the html element). Otherwise, if there 02781 is a table element in the stack of open elements, but the last 02782 table element in the stack of open elements has no parent, or 02783 its parent node is not an element, then the foster parent 02784 element is the element before the last table element in the 02785 stack of open elements. */ 02786 for($n = count($this->stack) - 1; $n >= 0; $n--) { 02787 if($this->stack[$n]->nodeName === 'table') { 02788 $table = $this->stack[$n]; 02789 break; 02790 } 02791 } 02792 02793 if(isset($table) && $table->parentNode !== null) { 02794 $this->foster_parent = $table->parentNode; 02795 02796 } elseif(!isset($table)) { 02797 $this->foster_parent = $this->stack[0]; 02798 02799 } elseif(isset($table) && ($table->parentNode === null || 02800 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { 02801 $this->foster_parent = $this->stack[$n - 1]; 02802 } 02803 } 02804 02805 $this->inBody($token); 02806 } 02807 } 02808 02809 private function inCaption($token) { 02810 /* An end tag whose tag name is "caption" */ 02811 if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { 02812 /* If the stack of open elements does not have an element in table 02813 scope with the same tag name as the token, this is a parse error. 02814 Ignore the token. (innerHTML case) */ 02815 if(!$this->elementInScope($token['name'], true)) { 02816 // Ignore 02817 02818 /* Otherwise: */ 02819 } else { 02820 /* Generate implied end tags. */ 02821 $this->generateImpliedEndTags(); 02822 02823 /* Now, if the current node is not a caption element, then this 02824 is a parse error. */ 02825 // w/e 02826 02827 /* Pop elements from this stack until a caption element has 02828 been popped from the stack. */ 02829 while(true) { 02830 $node = end($this->stack)->nodeName; 02831 array_pop($this->stack); 02832 02833 if($node === 'caption') { 02834 break; 02835 } 02836 } 02837 02838 /* Clear the list of active formatting elements up to the last 02839 marker. */ 02840 $this->clearTheActiveFormattingElementsUpToTheLastMarker(); 02841 02842 /* Switch the insertion mode to "in table". */ 02843 $this->mode = self::IN_TABLE; 02844 } 02845 02846 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 02847 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag 02848 name is "table" */ 02849 } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], 02850 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 02851 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && 02852 $token['name'] === 'table')) { 02853 /* Parse error. Act as if an end tag with the tag name "caption" 02854 had been seen, then, if that token wasn't ignored, reprocess the 02855 current token. */ 02856 $this->inCaption(array( 02857 'name' => 'caption', 02858 'type' => HTML5::ENDTAG 02859 )); 02860 02861 return $this->inTable($token); 02862 02863 /* An end tag whose tag name is one of: "body", "col", "colgroup", 02864 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ 02865 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 02866 array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', 02867 'thead', 'tr'))) { 02868 // Parse error. Ignore the token. 02869 02870 /* Anything else */ 02871 } else { 02872 /* Process the token as if the insertion mode was "in body". */ 02873 $this->inBody($token); 02874 } 02875 } 02876 02877 private function inColumnGroup($token) { 02878 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 02879 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 02880 or U+0020 SPACE */ 02881 if($token['type'] === HTML5::CHARACTR && 02882 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 02883 /* Append the character to the current node. */ 02884 $text = $this->dom->createTextNode($token['data']); 02885 end($this->stack)->appendChild($text); 02886 02887 /* A comment token */ 02888 } elseif($token['type'] === HTML5::COMMENT) { 02889 /* Append a Comment node to the current node with the data 02890 attribute set to the data given in the comment token. */ 02891 $comment = $this->dom->createComment($token['data']); 02892 end($this->stack)->appendChild($comment); 02893 02894 /* A start tag whose tag name is "col" */ 02895 } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { 02896 /* Insert a col element for the token. Immediately pop the current 02897 node off the stack of open elements. */ 02898 $this->insertElement($token); 02899 array_pop($this->stack); 02900 02901 /* An end tag whose tag name is "colgroup" */ 02902 } elseif($token['type'] === HTML5::ENDTAG && 02903 $token['name'] === 'colgroup') { 02904 /* If the current node is the root html element, then this is a 02905 parse error, ignore the token. (innerHTML case) */ 02906 if(end($this->stack)->nodeName === 'html') { 02907 // Ignore 02908 02909 /* Otherwise, pop the current node (which will be a colgroup 02910 element) from the stack of open elements. Switch the insertion 02911 mode to "in table". */ 02912 } else { 02913 array_pop($this->stack); 02914 $this->mode = self::IN_TABLE; 02915 } 02916 02917 /* An end tag whose tag name is "col" */ 02918 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { 02919 /* Parse error. Ignore the token. */ 02920 02921 /* Anything else */ 02922 } else { 02923 /* Act as if an end tag with the tag name "colgroup" had been seen, 02924 and then, if that token wasn't ignored, reprocess the current token. */ 02925 $this->inColumnGroup(array( 02926 'name' => 'colgroup', 02927 'type' => HTML5::ENDTAG 02928 )); 02929 02930 return $this->inTable($token); 02931 } 02932 } 02933 02934 private function inTableBody($token) { 02935 $clear = array('tbody', 'tfoot', 'thead', 'html'); 02936 02937 /* A start tag whose tag name is "tr" */ 02938 if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { 02939 /* Clear the stack back to a table body context. */ 02940 $this->clearStackToTableContext($clear); 02941 02942 /* Insert a tr element for the token, then switch the insertion 02943 mode to "in row". */ 02944 $this->insertElement($token); 02945 $this->mode = self::IN_ROW; 02946 02947 /* A start tag whose tag name is one of: "th", "td" */ 02948 } elseif($token['type'] === HTML5::STARTTAG && 02949 ($token['name'] === 'th' || $token['name'] === 'td')) { 02950 /* Parse error. Act as if a start tag with the tag name "tr" had 02951 been seen, then reprocess the current token. */ 02952 $this->inTableBody(array( 02953 'name' => 'tr', 02954 'type' => HTML5::STARTTAG, 02955 'attr' => array() 02956 )); 02957 02958 return $this->inRow($token); 02959 02960 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ 02961 } elseif($token['type'] === HTML5::ENDTAG && 02962 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { 02963 /* If the stack of open elements does not have an element in table 02964 scope with the same tag name as the token, this is a parse error. 02965 Ignore the token. */ 02966 if(!$this->elementInScope($token['name'], true)) { 02967 // Ignore 02968 02969 /* Otherwise: */ 02970 } else { 02971 /* Clear the stack back to a table body context. */ 02972 $this->clearStackToTableContext($clear); 02973 02974 /* Pop the current node from the stack of open elements. Switch 02975 the insertion mode to "in table". */ 02976 array_pop($this->stack); 02977 $this->mode = self::IN_TABLE; 02978 } 02979 02980 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 02981 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ 02982 } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], 02983 array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || 02984 ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { 02985 /* If the stack of open elements does not have a tbody, thead, or 02986 tfoot element in table scope, this is a parse error. Ignore the 02987 token. (innerHTML case) */ 02988 if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { 02989 // Ignore. 02990 02991 /* Otherwise: */ 02992 } else { 02993 /* Clear the stack back to a table body context. */ 02994 $this->clearStackToTableContext($clear); 02995 02996 /* Act as if an end tag with the same tag name as the current 02997 node ("tbody", "tfoot", or "thead") had been seen, then 02998 reprocess the current token. */ 02999 $this->inTableBody(array( 03000 'name' => end($this->stack)->nodeName, 03001 'type' => HTML5::ENDTAG 03002 )); 03003 03004 return $this->mainPhase($token); 03005 } 03006 03007 /* An end tag whose tag name is one of: "body", "caption", "col", 03008 "colgroup", "html", "td", "th", "tr" */ 03009 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03010 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { 03011 /* Parse error. Ignore the token. */ 03012 03013 /* Anything else */ 03014 } else { 03015 /* Process the token as if the insertion mode was "in table". */ 03016 $this->inTable($token); 03017 } 03018 } 03019 03020 private function inRow($token) { 03021 $clear = array('tr', 'html'); 03022 03023 /* A start tag whose tag name is one of: "th", "td" */ 03024 if($token['type'] === HTML5::STARTTAG && 03025 ($token['name'] === 'th' || $token['name'] === 'td')) { 03026 /* Clear the stack back to a table row context. */ 03027 $this->clearStackToTableContext($clear); 03028 03029 /* Insert an HTML element for the token, then switch the insertion 03030 mode to "in cell". */ 03031 $this->insertElement($token); 03032 $this->mode = self::IN_CELL; 03033 03034 /* Insert a marker at the end of the list of active formatting 03035 elements. */ 03036 $this->a_formatting[] = self::MARKER; 03037 03038 /* An end tag whose tag name is "tr" */ 03039 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { 03040 /* If the stack of open elements does not have an element in table 03041 scope with the same tag name as the token, this is a parse error. 03042 Ignore the token. (innerHTML case) */ 03043 if(!$this->elementInScope($token['name'], true)) { 03044 // Ignore. 03045 03046 /* Otherwise: */ 03047 } else { 03048 /* Clear the stack back to a table row context. */ 03049 $this->clearStackToTableContext($clear); 03050 03051 /* Pop the current node (which will be a tr element) from the 03052 stack of open elements. Switch the insertion mode to "in table 03053 body". */ 03054 array_pop($this->stack); 03055 $this->mode = self::IN_TBODY; 03056 } 03057 03058 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 03059 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ 03060 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 03061 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { 03062 /* Act as if an end tag with the tag name "tr" had been seen, then, 03063 if that token wasn't ignored, reprocess the current token. */ 03064 $this->inRow(array( 03065 'name' => 'tr', 03066 'type' => HTML5::ENDTAG 03067 )); 03068 03069 return $this->inCell($token); 03070 03071 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ 03072 } elseif($token['type'] === HTML5::ENDTAG && 03073 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { 03074 /* If the stack of open elements does not have an element in table 03075 scope with the same tag name as the token, this is a parse error. 03076 Ignore the token. */ 03077 if(!$this->elementInScope($token['name'], true)) { 03078 // Ignore. 03079 03080 /* Otherwise: */ 03081 } else { 03082 /* Otherwise, act as if an end tag with the tag name "tr" had 03083 been seen, then reprocess the current token. */ 03084 $this->inRow(array( 03085 'name' => 'tr', 03086 'type' => HTML5::ENDTAG 03087 )); 03088 03089 return $this->inCell($token); 03090 } 03091 03092 /* An end tag whose tag name is one of: "body", "caption", "col", 03093 "colgroup", "html", "td", "th" */ 03094 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03095 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { 03096 /* Parse error. Ignore the token. */ 03097 03098 /* Anything else */ 03099 } else { 03100 /* Process the token as if the insertion mode was "in table". */ 03101 $this->inTable($token); 03102 } 03103 } 03104 03105 private function inCell($token) { 03106 /* An end tag whose tag name is one of: "td", "th" */ 03107 if($token['type'] === HTML5::ENDTAG && 03108 ($token['name'] === 'td' || $token['name'] === 'th')) { 03109 /* If the stack of open elements does not have an element in table 03110 scope with the same tag name as that of the token, then this is a 03111 parse error and the token must be ignored. */ 03112 if(!$this->elementInScope($token['name'], true)) { 03113 // Ignore. 03114 03115 /* Otherwise: */ 03116 } else { 03117 /* Generate implied end tags, except for elements with the same 03118 tag name as the token. */ 03119 $this->generateImpliedEndTags(array($token['name'])); 03120 03121 /* Now, if the current node is not an element with the same tag 03122 name as the token, then this is a parse error. */ 03123 // k 03124 03125 /* Pop elements from this stack until an element with the same 03126 tag name as the token has been popped from the stack. */ 03127 while(true) { 03128 $node = end($this->stack)->nodeName; 03129 array_pop($this->stack); 03130 03131 if($node === $token['name']) { 03132 break; 03133 } 03134 } 03135 03136 /* Clear the list of active formatting elements up to the last 03137 marker. */ 03138 $this->clearTheActiveFormattingElementsUpToTheLastMarker(); 03139 03140 /* Switch the insertion mode to "in row". (The current node 03141 will be a tr element at this point.) */ 03142 $this->mode = self::IN_ROW; 03143 } 03144 03145 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 03146 "tbody", "td", "tfoot", "th", "thead", "tr" */ 03147 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 03148 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 03149 'thead', 'tr'))) { 03150 /* If the stack of open elements does not have a td or th element 03151 in table scope, then this is a parse error; ignore the token. 03152 (innerHTML case) */ 03153 if(!$this->elementInScope(array('td', 'th'), true)) { 03154 // Ignore. 03155 03156 /* Otherwise, close the cell (see below) and reprocess the current 03157 token. */ 03158 } else { 03159 $this->closeCell(); 03160 return $this->inRow($token); 03161 } 03162 03163 /* A start tag whose tag name is one of: "caption", "col", "colgroup", 03164 "tbody", "td", "tfoot", "th", "thead", "tr" */ 03165 } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], 03166 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', 03167 'thead', 'tr'))) { 03168 /* If the stack of open elements does not have a td or th element 03169 in table scope, then this is a parse error; ignore the token. 03170 (innerHTML case) */ 03171 if(!$this->elementInScope(array('td', 'th'), true)) { 03172 // Ignore. 03173 03174 /* Otherwise, close the cell (see below) and reprocess the current 03175 token. */ 03176 } else { 03177 $this->closeCell(); 03178 return $this->inRow($token); 03179 } 03180 03181 /* An end tag whose tag name is one of: "body", "caption", "col", 03182 "colgroup", "html" */ 03183 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03184 array('body', 'caption', 'col', 'colgroup', 'html'))) { 03185 /* Parse error. Ignore the token. */ 03186 03187 /* An end tag whose tag name is one of: "table", "tbody", "tfoot", 03188 "thead", "tr" */ 03189 } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], 03190 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { 03191 /* If the stack of open elements does not have an element in table 03192 scope with the same tag name as that of the token (which can only 03193 happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), 03194 then this is a parse error and the token must be ignored. */ 03195 if(!$this->elementInScope($token['name'], true)) { 03196 // Ignore. 03197 03198 /* Otherwise, close the cell (see below) and reprocess the current 03199 token. */ 03200 } else { 03201 $this->closeCell(); 03202 return $this->inRow($token); 03203 } 03204 03205 /* Anything else */ 03206 } else { 03207 /* Process the token as if the insertion mode was "in body". */ 03208 $this->inBody($token); 03209 } 03210 } 03211 03212 private function inSelect($token) { 03213 /* Handle the token as follows: */ 03214 03215 /* A character token */ 03216 if($token['type'] === HTML5::CHARACTR) { 03217 /* Append the token's character to the current node. */ 03218 $this->insertText($token['data']); 03219 03220 /* A comment token */ 03221 } elseif($token['type'] === HTML5::COMMENT) { 03222 /* Append a Comment node to the current node with the data 03223 attribute set to the data given in the comment token. */ 03224 $this->insertComment($token['data']); 03225 03226 /* A start tag token whose tag name is "option" */ 03227 } elseif($token['type'] === HTML5::STARTTAG && 03228 $token['name'] === 'option') { 03229 /* If the current node is an option element, act as if an end tag 03230 with the tag name "option" had been seen. */ 03231 if(end($this->stack)->nodeName === 'option') { 03232 $this->inSelect(array( 03233 'name' => 'option', 03234 'type' => HTML5::ENDTAG 03235 )); 03236 } 03237 03238 /* Insert an HTML element for the token. */ 03239 $this->insertElement($token); 03240 03241 /* A start tag token whose tag name is "optgroup" */ 03242 } elseif($token['type'] === HTML5::STARTTAG && 03243 $token['name'] === 'optgroup') { 03244 /* If the current node is an option element, act as if an end tag 03245 with the tag name "option" had been seen. */ 03246 if(end($this->stack)->nodeName === 'option') { 03247 $this->inSelect(array( 03248 'name' => 'option', 03249 'type' => HTML5::ENDTAG 03250 )); 03251 } 03252 03253 /* If the current node is an optgroup element, act as if an end tag 03254 with the tag name "optgroup" had been seen. */ 03255 if(end($this->stack)->nodeName === 'optgroup') { 03256 $this->inSelect(array( 03257 'name' => 'optgroup', 03258 'type' => HTML5::ENDTAG 03259 )); 03260 } 03261 03262 /* Insert an HTML element for the token. */ 03263 $this->insertElement($token); 03264 03265 /* An end tag token whose tag name is "optgroup" */ 03266 } elseif($token['type'] === HTML5::ENDTAG && 03267 $token['name'] === 'optgroup') { 03268 /* First, if the current node is an option element, and the node 03269 immediately before it in the stack of open elements is an optgroup 03270 element, then act as if an end tag with the tag name "option" had 03271 been seen. */ 03272 $elements_in_stack = count($this->stack); 03273 03274 if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && 03275 $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { 03276 $this->inSelect(array( 03277 'name' => 'option', 03278 'type' => HTML5::ENDTAG 03279 )); 03280 } 03281 03282 /* If the current node is an optgroup element, then pop that node 03283 from the stack of open elements. Otherwise, this is a parse error, 03284 ignore the token. */ 03285 if($this->stack[$elements_in_stack - 1] === 'optgroup') { 03286 array_pop($this->stack); 03287 } 03288 03289 /* An end tag token whose tag name is "option" */ 03290 } elseif($token['type'] === HTML5::ENDTAG && 03291 $token['name'] === 'option') { 03292 /* If the current node is an option element, then pop that node 03293 from the stack of open elements. Otherwise, this is a parse error, 03294 ignore the token. */ 03295 if(end($this->stack)->nodeName === 'option') { 03296 array_pop($this->stack); 03297 } 03298 03299 /* An end tag whose tag name is "select" */ 03300 } elseif($token['type'] === HTML5::ENDTAG && 03301 $token['name'] === 'select') { 03302 /* If the stack of open elements does not have an element in table 03303 scope with the same tag name as the token, this is a parse error. 03304 Ignore the token. (innerHTML case) */ 03305 if(!$this->elementInScope($token['name'], true)) { 03306 // w/e 03307 03308 /* Otherwise: */ 03309 } else { 03310 /* Pop elements from the stack of open elements until a select 03311 element has been popped from the stack. */ 03312 while(true) { 03313 $current = end($this->stack)->nodeName; 03314 array_pop($this->stack); 03315 03316 if($current === 'select') { 03317 break; 03318 } 03319 } 03320 03321 /* Reset the insertion mode appropriately. */ 03322 $this->resetInsertionMode(); 03323 } 03324 03325 /* A start tag whose tag name is "select" */ 03326 } elseif($token['name'] === 'select' && 03327 $token['type'] === HTML5::STARTTAG) { 03328 /* Parse error. Act as if the token had been an end tag with the 03329 tag name "select" instead. */ 03330 $this->inSelect(array( 03331 'name' => 'select', 03332 'type' => HTML5::ENDTAG 03333 )); 03334 03335 /* An end tag whose tag name is one of: "caption", "table", "tbody", 03336 "tfoot", "thead", "tr", "td", "th" */ 03337 } elseif(in_array($token['name'], array('caption', 'table', 'tbody', 03338 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { 03339 /* Parse error. */ 03340 // w/e 03341 03342 /* If the stack of open elements has an element in table scope with 03343 the same tag name as that of the token, then act as if an end tag 03344 with the tag name "select" had been seen, and reprocess the token. 03345 Otherwise, ignore the token. */ 03346 if($this->elementInScope($token['name'], true)) { 03347 $this->inSelect(array( 03348 'name' => 'select', 03349 'type' => HTML5::ENDTAG 03350 )); 03351 03352 $this->mainPhase($token); 03353 } 03354 03355 /* Anything else */ 03356 } else { 03357 /* Parse error. Ignore the token. */ 03358 } 03359 } 03360 03361 private function afterBody($token) { 03362 /* Handle the token as follows: */ 03363 03364 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03365 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03366 or U+0020 SPACE */ 03367 if($token['type'] === HTML5::CHARACTR && 03368 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03369 /* Process the token as it would be processed if the insertion mode 03370 was "in body". */ 03371 $this->inBody($token); 03372 03373 /* A comment token */ 03374 } elseif($token['type'] === HTML5::COMMENT) { 03375 /* Append a Comment node to the first element in the stack of open 03376 elements (the html element), with the data attribute set to the 03377 data given in the comment token. */ 03378 $comment = $this->dom->createComment($token['data']); 03379 $this->stack[0]->appendChild($comment); 03380 03381 /* An end tag with the tag name "html" */ 03382 } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { 03383 /* If the parser was originally created in order to handle the 03384 setting of an element's innerHTML attribute, this is a parse error; 03385 ignore the token. (The element will be an html element in this 03386 case.) (innerHTML case) */ 03387 03388 /* Otherwise, switch to the trailing end phase. */ 03389 $this->phase = self::END_PHASE; 03390 03391 /* Anything else */ 03392 } else { 03393 /* Parse error. Set the insertion mode to "in body" and reprocess 03394 the token. */ 03395 $this->mode = self::IN_BODY; 03396 return $this->inBody($token); 03397 } 03398 } 03399 03400 private function inFrameset($token) { 03401 /* Handle the token as follows: */ 03402 03403 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03404 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03405 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ 03406 if($token['type'] === HTML5::CHARACTR && 03407 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03408 /* Append the character to the current node. */ 03409 $this->insertText($token['data']); 03410 03411 /* A comment token */ 03412 } elseif($token['type'] === HTML5::COMMENT) { 03413 /* Append a Comment node to the current node with the data 03414 attribute set to the data given in the comment token. */ 03415 $this->insertComment($token['data']); 03416 03417 /* A start tag with the tag name "frameset" */ 03418 } elseif($token['name'] === 'frameset' && 03419 $token['type'] === HTML5::STARTTAG) { 03420 $this->insertElement($token); 03421 03422 /* An end tag with the tag name "frameset" */ 03423 } elseif($token['name'] === 'frameset' && 03424 $token['type'] === HTML5::ENDTAG) { 03425 /* If the current node is the root html element, then this is a 03426 parse error; ignore the token. (innerHTML case) */ 03427 if(end($this->stack)->nodeName === 'html') { 03428 // Ignore 03429 03430 } else { 03431 /* Otherwise, pop the current node from the stack of open 03432 elements. */ 03433 array_pop($this->stack); 03434 03435 /* If the parser was not originally created in order to handle 03436 the setting of an element's innerHTML attribute (innerHTML case), 03437 and the current node is no longer a frameset element, then change 03438 the insertion mode to "after frameset". */ 03439 $this->mode = self::AFTR_FRAME; 03440 } 03441 03442 /* A start tag with the tag name "frame" */ 03443 } elseif($token['name'] === 'frame' && 03444 $token['type'] === HTML5::STARTTAG) { 03445 /* Insert an HTML element for the token. */ 03446 $this->insertElement($token); 03447 03448 /* Immediately pop the current node off the stack of open elements. */ 03449 array_pop($this->stack); 03450 03451 /* A start tag with the tag name "noframes" */ 03452 } elseif($token['name'] === 'noframes' && 03453 $token['type'] === HTML5::STARTTAG) { 03454 /* Process the token as if the insertion mode had been "in body". */ 03455 $this->inBody($token); 03456 03457 /* Anything else */ 03458 } else { 03459 /* Parse error. Ignore the token. */ 03460 } 03461 } 03462 03463 private function afterFrameset($token) { 03464 /* Handle the token as follows: */ 03465 03466 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03467 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03468 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ 03469 if($token['type'] === HTML5::CHARACTR && 03470 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03471 /* Append the character to the current node. */ 03472 $this->insertText($token['data']); 03473 03474 /* A comment token */ 03475 } elseif($token['type'] === HTML5::COMMENT) { 03476 /* Append a Comment node to the current node with the data 03477 attribute set to the data given in the comment token. */ 03478 $this->insertComment($token['data']); 03479 03480 /* An end tag with the tag name "html" */ 03481 } elseif($token['name'] === 'html' && 03482 $token['type'] === HTML5::ENDTAG) { 03483 /* Switch to the trailing end phase. */ 03484 $this->phase = self::END_PHASE; 03485 03486 /* A start tag with the tag name "noframes" */ 03487 } elseif($token['name'] === 'noframes' && 03488 $token['type'] === HTML5::STARTTAG) { 03489 /* Process the token as if the insertion mode had been "in body". */ 03490 $this->inBody($token); 03491 03492 /* Anything else */ 03493 } else { 03494 /* Parse error. Ignore the token. */ 03495 } 03496 } 03497 03498 private function trailingEndPhase($token) { 03499 /* After the main phase, as each token is emitted from the tokenisation 03500 stage, it must be processed as described in this section. */ 03501 03502 /* A DOCTYPE token */ 03503 if($token['type'] === HTML5::DOCTYPE) { 03504 // Parse error. Ignore the token. 03505 03506 /* A comment token */ 03507 } elseif($token['type'] === HTML5::COMMENT) { 03508 /* Append a Comment node to the Document object with the data 03509 attribute set to the data given in the comment token. */ 03510 $comment = $this->dom->createComment($token['data']); 03511 $this->dom->appendChild($comment); 03512 03513 /* A character token that is one of one of U+0009 CHARACTER TABULATION, 03514 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03515 or U+0020 SPACE */ 03516 } elseif($token['type'] === HTML5::CHARACTR && 03517 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { 03518 /* Process the token as it would be processed in the main phase. */ 03519 $this->mainPhase($token); 03520 03521 /* A character token that is not one of U+0009 CHARACTER TABULATION, 03522 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), 03523 or U+0020 SPACE. Or a start tag token. Or an end tag token. */ 03524 } elseif(($token['type'] === HTML5::CHARACTR && 03525 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || 03526 $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { 03527 /* Parse error. Switch back to the main phase and reprocess the 03528 token. */ 03529 $this->phase = self::MAIN_PHASE; 03530 return $this->mainPhase($token); 03531 03532 /* An end-of-file token */ 03533 } elseif($token['type'] === HTML5::EOF) { 03534 /* OMG DONE!! */ 03535 } 03536 } 03537 03538 private function insertElement($token, $append = true, $check = false) { 03539 // Proprietary workaround for libxml2's limitations with tag names 03540 if ($check) { 03541 // Slightly modified HTML5 tag-name modification, 03542 // removing anything that's not an ASCII letter, digit, or hyphen 03543 $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); 03544 // Remove leading hyphens and numbers 03545 $token['name'] = ltrim($token['name'], '-0..9'); 03546 // In theory, this should ever be needed, but just in case 03547 if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice 03548 } 03549 03550 $el = $this->dom->createElement($token['name']); 03551 03552 foreach($token['attr'] as $attr) { 03553 if(!$el->hasAttribute($attr['name'])) { 03554 $el->setAttribute($attr['name'], $attr['value']); 03555 } 03556 } 03557 03558 $this->appendToRealParent($el); 03559 $this->stack[] = $el; 03560 03561 return $el; 03562 } 03563 03564 private function insertText($data) { 03565 $text = $this->dom->createTextNode($data); 03566 $this->appendToRealParent($text); 03567 } 03568 03569 private function insertComment($data) { 03570 $comment = $this->dom->createComment($data); 03571 $this->appendToRealParent($comment); 03572 } 03573 03574 private function appendToRealParent($node) { 03575 if($this->foster_parent === null) { 03576 end($this->stack)->appendChild($node); 03577 03578 } elseif($this->foster_parent !== null) { 03579 /* If the foster parent element is the parent element of the 03580 last table element in the stack of open elements, then the new 03581 node must be inserted immediately before the last table element 03582 in the stack of open elements in the foster parent element; 03583 otherwise, the new node must be appended to the foster parent 03584 element. */ 03585 for($n = count($this->stack) - 1; $n >= 0; $n--) { 03586 if($this->stack[$n]->nodeName === 'table' && 03587 $this->stack[$n]->parentNode !== null) { 03588 $table = $this->stack[$n]; 03589 break; 03590 } 03591 } 03592 03593 if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) 03594 $this->foster_parent->insertBefore($node, $table); 03595 else 03596 $this->foster_parent->appendChild($node); 03597 03598 $this->foster_parent = null; 03599 } 03600 } 03601 03602 private function elementInScope($el, $table = false) { 03603 if(is_array($el)) { 03604 foreach($el as $element) { 03605 if($this->elementInScope($element, $table)) { 03606 return true; 03607 } 03608 } 03609 03610 return false; 03611 } 03612 03613 $leng = count($this->stack); 03614 03615 for($n = 0; $n < $leng; $n++) { 03616 /* 1. Initialise node to be the current node (the bottommost node of 03617 the stack). */ 03618 $node = $this->stack[$leng - 1 - $n]; 03619 03620 if($node->tagName === $el) { 03621 /* 2. If node is the target node, terminate in a match state. */ 03622 return true; 03623 03624 } elseif($node->tagName === 'table') { 03625 /* 3. Otherwise, if node is a table element, terminate in a failure 03626 state. */ 03627 return false; 03628 03629 } elseif($table === true && in_array($node->tagName, array('caption', 'td', 03630 'th', 'button', 'marquee', 'object'))) { 03631 /* 4. Otherwise, if the algorithm is the "has an element in scope" 03632 variant (rather than the "has an element in table scope" variant), 03633 and node is one of the following, terminate in a failure state. */ 03634 return false; 03635 03636 } elseif($node === $node->ownerDocument->documentElement) { 03637 /* 5. Otherwise, if node is an html element (root element), terminate 03638 in a failure state. (This can only happen if the node is the topmost 03639 node of the stack of open elements, and prevents the next step from 03640 being invoked if there are no more elements in the stack.) */ 03641 return false; 03642 } 03643 03644 /* Otherwise, set node to the previous entry in the stack of open 03645 elements and return to step 2. (This will never fail, since the loop 03646 will always terminate in the previous step if the top of the stack 03647 is reached.) */ 03648 } 03649 } 03650 03651 private function reconstructActiveFormattingElements() { 03652 /* 1. If there are no entries in the list of active formatting elements, 03653 then there is nothing to reconstruct; stop this algorithm. */ 03654 $formatting_elements = count($this->a_formatting); 03655 03656 if($formatting_elements === 0) { 03657 return false; 03658 } 03659 03660 /* 3. Let entry be the last (most recently added) element in the list 03661 of active formatting elements. */ 03662 $entry = end($this->a_formatting); 03663 03664 /* 2. If the last (most recently added) entry in the list of active 03665 formatting elements is a marker, or if it is an element that is in the 03666 stack of open elements, then there is nothing to reconstruct; stop this 03667 algorithm. */ 03668 if($entry === self::MARKER || in_array($entry, $this->stack, true)) { 03669 return false; 03670 } 03671 03672 for($a = $formatting_elements - 1; $a >= 0; true) { 03673 /* 4. If there are no entries before entry in the list of active 03674 formatting elements, then jump to step 8. */ 03675 if($a === 0) { 03676 $step_seven = false; 03677 break; 03678 } 03679 03680 /* 5. Let entry be the entry one earlier than entry in the list of 03681 active formatting elements. */ 03682 $a--; 03683 $entry = $this->a_formatting[$a]; 03684 03685 /* 6. If entry is neither a marker nor an element that is also in 03686 thetack of open elements, go to step 4. */ 03687 if($entry === self::MARKER || in_array($entry, $this->stack, true)) { 03688 break; 03689 } 03690 } 03691 03692 while(true) { 03693 /* 7. Let entry be the element one later than entry in the list of 03694 active formatting elements. */ 03695 if(isset($step_seven) && $step_seven === true) { 03696 $a++; 03697 $entry = $this->a_formatting[$a]; 03698 } 03699 03700 /* 8. Perform a shallow clone of the element entry to obtain clone. */ 03701 $clone = $entry->cloneNode(); 03702 03703 /* 9. Append clone to the current node and push it onto the stack 03704 of open elements so that it is the new current node. */ 03705 end($this->stack)->appendChild($clone); 03706 $this->stack[] = $clone; 03707 03708 /* 10. Replace the entry for entry in the list with an entry for 03709 clone. */ 03710 $this->a_formatting[$a] = $clone; 03711 03712 /* 11. If the entry for clone in the list of active formatting 03713 elements is not the last entry in the list, return to step 7. */ 03714 if(end($this->a_formatting) !== $clone) { 03715 $step_seven = true; 03716 } else { 03717 break; 03718 } 03719 } 03720 } 03721 03722 private function clearTheActiveFormattingElementsUpToTheLastMarker() { 03723 /* When the steps below require the UA to clear the list of active 03724 formatting elements up to the last marker, the UA must perform the 03725 following steps: */ 03726 03727 while(true) { 03728 /* 1. Let entry be the last (most recently added) entry in the list 03729 of active formatting elements. */ 03730 $entry = end($this->a_formatting); 03731 03732 /* 2. Remove entry from the list of active formatting elements. */ 03733 array_pop($this->a_formatting); 03734 03735 /* 3. If entry was a marker, then stop the algorithm at this point. 03736 The list has been cleared up to the last marker. */ 03737 if($entry === self::MARKER) { 03738 break; 03739 } 03740 } 03741 } 03742 03743 private function generateImpliedEndTags($exclude = array()) { 03744 /* When the steps below require the UA to generate implied end tags, 03745 then, if the current node is a dd element, a dt element, an li element, 03746 a p element, a td element, a th element, or a tr element, the UA must 03747 act as if an end tag with the respective tag name had been seen and 03748 then generate implied end tags again. */ 03749 $node = end($this->stack); 03750 $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); 03751 03752 while(in_array(end($this->stack)->nodeName, $elements)) { 03753 array_pop($this->stack); 03754 } 03755 } 03756 03757 private function getElementCategory($node) { 03758 $name = $node->tagName; 03759 if(in_array($name, $this->special)) 03760 return self::SPECIAL; 03761 03762 elseif(in_array($name, $this->scoping)) 03763 return self::SCOPING; 03764 03765 elseif(in_array($name, $this->formatting)) 03766 return self::FORMATTING; 03767 03768 else 03769 return self::PHRASING; 03770 } 03771 03772 private function clearStackToTableContext($elements) { 03773 /* When the steps above require the UA to clear the stack back to a 03774 table context, it means that the UA must, while the current node is not 03775 a table element or an html element, pop elements from the stack of open 03776 elements. If this causes any elements to be popped from the stack, then 03777 this is a parse error. */ 03778 while(true) { 03779 $node = end($this->stack)->nodeName; 03780 03781 if(in_array($node, $elements)) { 03782 break; 03783 } else { 03784 array_pop($this->stack); 03785 } 03786 } 03787 } 03788 03789 private function resetInsertionMode() { 03790 /* 1. Let last be false. */ 03791 $last = false; 03792 $leng = count($this->stack); 03793 03794 for($n = $leng - 1; $n >= 0; $n--) { 03795 /* 2. Let node be the last node in the stack of open elements. */ 03796 $node = $this->stack[$n]; 03797 03798 /* 3. If node is the first node in the stack of open elements, then 03799 set last to true. If the element whose innerHTML attribute is being 03800 set is neither a td element nor a th element, then set node to the 03801 element whose innerHTML attribute is being set. (innerHTML case) */ 03802 if($this->stack[0]->isSameNode($node)) { 03803 $last = true; 03804 } 03805 03806 /* 4. If node is a select element, then switch the insertion mode to 03807 "in select" and abort these steps. (innerHTML case) */ 03808 if($node->nodeName === 'select') { 03809 $this->mode = self::IN_SELECT; 03810 break; 03811 03812 /* 5. If node is a td or th element, then switch the insertion mode 03813 to "in cell" and abort these steps. */ 03814 } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { 03815 $this->mode = self::IN_CELL; 03816 break; 03817 03818 /* 6. If node is a tr element, then switch the insertion mode to 03819 "in row" and abort these steps. */ 03820 } elseif($node->nodeName === 'tr') { 03821 $this->mode = self::IN_ROW; 03822 break; 03823 03824 /* 7. If node is a tbody, thead, or tfoot element, then switch the 03825 insertion mode to "in table body" and abort these steps. */ 03826 } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { 03827 $this->mode = self::IN_TBODY; 03828 break; 03829 03830 /* 8. If node is a caption element, then switch the insertion mode 03831 to "in caption" and abort these steps. */ 03832 } elseif($node->nodeName === 'caption') { 03833 $this->mode = self::IN_CAPTION; 03834 break; 03835 03836 /* 9. If node is a colgroup element, then switch the insertion mode 03837 to "in column group" and abort these steps. (innerHTML case) */ 03838 } elseif($node->nodeName === 'colgroup') { 03839 $this->mode = self::IN_CGROUP; 03840 break; 03841 03842 /* 10. If node is a table element, then switch the insertion mode 03843 to "in table" and abort these steps. */ 03844 } elseif($node->nodeName === 'table') { 03845 $this->mode = self::IN_TABLE; 03846 break; 03847 03848 /* 11. If node is a head element, then switch the insertion mode 03849 to "in body" ("in body"! not "in head"!) and abort these steps. 03850 (innerHTML case) */ 03851 } elseif($node->nodeName === 'head') { 03852 $this->mode = self::IN_BODY; 03853 break; 03854 03855 /* 12. If node is a body element, then switch the insertion mode to 03856 "in body" and abort these steps. */ 03857 } elseif($node->nodeName === 'body') { 03858 $this->mode = self::IN_BODY; 03859 break; 03860 03861 /* 13. If node is a frameset element, then switch the insertion 03862 mode to "in frameset" and abort these steps. (innerHTML case) */ 03863 } elseif($node->nodeName === 'frameset') { 03864 $this->mode = self::IN_FRAME; 03865 break; 03866 03867 /* 14. If node is an html element, then: if the head element 03868 pointer is null, switch the insertion mode to "before head", 03869 otherwise, switch the insertion mode to "after head". In either 03870 case, abort these steps. (innerHTML case) */ 03871 } elseif($node->nodeName === 'html') { 03872 $this->mode = ($this->head_pointer === null) 03873 ? self::BEFOR_HEAD 03874 : self::AFTER_HEAD; 03875 03876 break; 03877 03878 /* 15. If last is true, then set the insertion mode to "in body" 03879 and abort these steps. (innerHTML case) */ 03880 } elseif($last) { 03881 $this->mode = self::IN_BODY; 03882 break; 03883 } 03884 } 03885 } 03886 03887 private function closeCell() { 03888 /* If the stack of open elements has a td or th element in table scope, 03889 then act as if an end tag token with that tag name had been seen. */ 03890 foreach(array('td', 'th') as $cell) { 03891 if($this->elementInScope($cell, true)) { 03892 $this->inCell(array( 03893 'name' => $cell, 03894 'type' => HTML5::ENDTAG 03895 )); 03896 03897 break; 03898 } 03899 } 03900 } 03901 03902 public function save() { 03903 return $this->dom; 03904 } 03905 } 03906 ?>
| Copyright © 2003 - 2009 MyOOS [Shopsystem]. All rights reserved. MyOOS [Shopsystem] is Free Software released under the GNU/GPL License. Webmaster: info@r23.de (Impressum) |
|
