00001 <?php 00002 00006 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy 00007 { 00008 00012 protected $tokens; 00013 00017 protected $t; 00018 00022 protected $stack; 00023 00027 protected $injectors; 00028 00032 protected $config; 00033 00037 protected $context; 00038 00039 public function execute($tokens, $config, $context) { 00040 00041 $definition = $config->getHTMLDefinition(); 00042 00043 // local variables 00044 $generator = new HTMLPurifier_Generator($config, $context); 00045 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); 00046 $e = $context->get('ErrorCollector', true); 00047 $t = false; // token index 00048 $i = false; // injector index 00049 $token = false; // the current token 00050 $reprocess = false; // whether or not to reprocess the same token 00051 $stack = array(); 00052 00053 // member variables 00054 $this->stack =& $stack; 00055 $this->t =& $t; 00056 $this->tokens =& $tokens; 00057 $this->config = $config; 00058 $this->context = $context; 00059 00060 // context variables 00061 $context->register('CurrentNesting', $stack); 00062 $context->register('InputIndex', $t); 00063 $context->register('InputTokens', $tokens); 00064 $context->register('CurrentToken', $token); 00065 00066 // -- begin INJECTOR -- 00067 00068 $this->injectors = array(); 00069 00070 $injectors = $config->getBatch('AutoFormat'); 00071 $def_injectors = $definition->info_injector; 00072 $custom_injectors = $injectors['Custom']; 00073 unset($injectors['Custom']); // special case 00074 foreach ($injectors as $injector => $b) { 00075 // XXX: Fix with a legitimate lookup table of enabled filters 00076 if (strpos($injector, '.') !== false) continue; 00077 $injector = "HTMLPurifier_Injector_$injector"; 00078 if (!$b) continue; 00079 $this->injectors[] = new $injector; 00080 } 00081 foreach ($def_injectors as $injector) { 00082 // assumed to be objects 00083 $this->injectors[] = $injector; 00084 } 00085 foreach ($custom_injectors as $injector) { 00086 if (is_string($injector)) { 00087 $injector = "HTMLPurifier_Injector_$injector"; 00088 $injector = new $injector; 00089 } 00090 $this->injectors[] = $injector; 00091 } 00092 00093 // give the injectors references to the definition and context 00094 // variables for performance reasons 00095 foreach ($this->injectors as $ix => $injector) { 00096 $error = $injector->prepare($config, $context); 00097 if (!$error) continue; 00098 array_splice($this->injectors, $ix, 1); // rm the injector 00099 trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING); 00100 } 00101 00102 // -- end INJECTOR -- 00103 00104 // a note on punting: 00105 // In order to reduce code duplication, whenever some code needs 00106 // to make HTML changes in order to make things "correct", the 00107 // new HTML gets sent through the purifier, regardless of its 00108 // status. This means that if we add a start token, because it 00109 // was totally necessary, we don't have to update nesting; we just 00110 // punt ($reprocess = true; continue;) and it does that for us. 00111 00112 // isset is in loop because $tokens size changes during loop exec 00113 for ( 00114 $t = 0; 00115 $t == 0 || isset($tokens[$t - 1]); 00116 // only increment if we don't need to reprocess 00117 $reprocess ? $reprocess = false : $t++ 00118 ) { 00119 00120 // check for a rewind 00121 if (is_int($i) && $i >= 0) { 00122 // possibility: disable rewinding if the current token has a 00123 // rewind set on it already. This would offer protection from 00124 // infinite loop, but might hinder some advanced rewinding. 00125 $rewind_to = $this->injectors[$i]->getRewind(); 00126 if (is_int($rewind_to) && $rewind_to < $t) { 00127 if ($rewind_to < 0) $rewind_to = 0; 00128 while ($t > $rewind_to) { 00129 $t--; 00130 $prev = $tokens[$t]; 00131 // indicate that other injectors should not process this token, 00132 // but we need to reprocess it 00133 unset($prev->skip[$i]); 00134 $prev->rewind = $i; 00135 if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack); 00136 elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start; 00137 } 00138 } 00139 $i = false; 00140 } 00141 00142 // handle case of document end 00143 if (!isset($tokens[$t])) { 00144 // kill processing if stack is empty 00145 if (empty($this->stack)) break; 00146 00147 // peek 00148 $top_nesting = array_pop($this->stack); 00149 $this->stack[] = $top_nesting; 00150 00151 // send error 00152 if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { 00153 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); 00154 } 00155 00156 // append, don't splice, since this is the end 00157 $tokens[] = new HTMLPurifier_Token_End($top_nesting->name); 00158 00159 // punt! 00160 $reprocess = true; 00161 continue; 00162 } 00163 00164 $token = $tokens[$t]; 00165 00166 //echo '<br>'; printTokens($tokens, $t); printTokens($this->stack); 00167 00168 // quick-check: if it's not a tag, no need to process 00169 if (empty($token->is_tag)) { 00170 if ($token instanceof HTMLPurifier_Token_Text) { 00171 foreach ($this->injectors as $i => $injector) { 00172 if (isset($token->skip[$i])) continue; 00173 if ($token->rewind !== null && $token->rewind !== $i) continue; 00174 $injector->handleText($token); 00175 $this->processToken($token, $i); 00176 $reprocess = true; 00177 break; 00178 } 00179 } 00180 // another possibility is a comment 00181 continue; 00182 } 00183 00184 if (isset($definition->info[$token->name])) { 00185 $type = $definition->info[$token->name]->child->type; 00186 } else { 00187 $type = false; // Type is unknown, treat accordingly 00188 } 00189 00190 // quick tag checks: anything that's *not* an end tag 00191 $ok = false; 00192 if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { 00193 // claims to be a start tag but is empty 00194 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr); 00195 $ok = true; 00196 } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { 00197 // claims to be empty but really is a start tag 00198 $this->swap(new HTMLPurifier_Token_End($token->name)); 00199 $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr)); 00200 // punt (since we had to modify the input stream in a non-trivial way) 00201 $reprocess = true; 00202 continue; 00203 } elseif ($token instanceof HTMLPurifier_Token_Empty) { 00204 // real empty token 00205 $ok = true; 00206 } elseif ($token instanceof HTMLPurifier_Token_Start) { 00207 // start tag 00208 00209 // ...unless they also have to close their parent 00210 if (!empty($this->stack)) { 00211 00212 $parent = array_pop($this->stack); 00213 $this->stack[] = $parent; 00214 00215 if (isset($definition->info[$parent->name])) { 00216 $elements = $definition->info[$parent->name]->child->getAllowedElements($config); 00217 $autoclose = !isset($elements[$token->name]); 00218 } else { 00219 $autoclose = false; 00220 } 00221 00222 $carryover = false; 00223 if ($autoclose && $definition->info[$parent->name]->formatting) { 00224 $carryover = true; 00225 } 00226 00227 if ($autoclose) { 00228 // errors need to be updated 00229 $new_token = new HTMLPurifier_Token_End($parent->name); 00230 $new_token->start = $parent; 00231 if ($carryover) { 00232 $element = clone $parent; 00233 $element->armor['MakeWellFormed_TagClosedError'] = true; 00234 $element->carryover = true; 00235 $this->processToken(array($new_token, $token, $element)); 00236 } else { 00237 $this->insertBefore($new_token); 00238 } 00239 if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { 00240 if (!$carryover) { 00241 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); 00242 } else { 00243 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); 00244 } 00245 } 00246 $reprocess = true; 00247 continue; 00248 } 00249 00250 } 00251 $ok = true; 00252 } 00253 00254 if ($ok) { 00255 foreach ($this->injectors as $i => $injector) { 00256 if (isset($token->skip[$i])) continue; 00257 if ($token->rewind !== null && $token->rewind !== $i) continue; 00258 $injector->handleElement($token); 00259 $this->processToken($token, $i); 00260 $reprocess = true; 00261 break; 00262 } 00263 if (!$reprocess) { 00264 // ah, nothing interesting happened; do normal processing 00265 $this->swap($token); 00266 if ($token instanceof HTMLPurifier_Token_Start) { 00267 $this->stack[] = $token; 00268 } elseif ($token instanceof HTMLPurifier_Token_End) { 00269 throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed'); 00270 } 00271 } 00272 continue; 00273 } 00274 00275 // sanity check: we should be dealing with a closing tag 00276 if (!$token instanceof HTMLPurifier_Token_End) { 00277 throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier'); 00278 } 00279 00280 // make sure that we have something open 00281 if (empty($this->stack)) { 00282 if ($escape_invalid_tags) { 00283 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); 00284 $this->swap(new HTMLPurifier_Token_Text( 00285 $generator->generateFromToken($token) 00286 )); 00287 } else { 00288 $this->remove(); 00289 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); 00290 } 00291 $reprocess = true; 00292 continue; 00293 } 00294 00295 // first, check for the simplest case: everything closes neatly. 00296 // Eventually, everything passes through here; if there are problems 00297 // we modify the input stream accordingly and then punt, so that 00298 // the tokens get processed again. 00299 $current_parent = array_pop($this->stack); 00300 if ($current_parent->name == $token->name) { 00301 $token->start = $current_parent; 00302 foreach ($this->injectors as $i => $injector) { 00303 if (isset($token->skip[$i])) continue; 00304 if ($token->rewind !== null && $token->rewind !== $i) continue; 00305 $injector->handleEnd($token); 00306 $this->processToken($token, $i); 00307 $this->stack[] = $current_parent; 00308 $reprocess = true; 00309 break; 00310 } 00311 continue; 00312 } 00313 00314 // okay, so we're trying to close the wrong tag 00315 00316 // undo the pop previous pop 00317 $this->stack[] = $current_parent; 00318 00319 // scroll back the entire nest, trying to find our tag. 00320 // (feature could be to specify how far you'd like to go) 00321 $size = count($this->stack); 00322 // -2 because -1 is the last element, but we already checked that 00323 $skipped_tags = false; 00324 for ($j = $size - 2; $j >= 0; $j--) { 00325 if ($this->stack[$j]->name == $token->name) { 00326 $skipped_tags = array_slice($this->stack, $j); 00327 break; 00328 } 00329 } 00330 00331 // we didn't find the tag, so remove 00332 if ($skipped_tags === false) { 00333 if ($escape_invalid_tags) { 00334 $this->swap(new HTMLPurifier_Token_Text( 00335 $generator->generateFromToken($token) 00336 )); 00337 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); 00338 } else { 00339 $this->remove(); 00340 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); 00341 } 00342 $reprocess = true; 00343 continue; 00344 } 00345 00346 // do errors, in REVERSE $j order: a,b,c with </a></b></c> 00347 $c = count($skipped_tags); 00348 if ($e) { 00349 for ($j = $c - 1; $j > 0; $j--) { 00350 // notice we exclude $j == 0, i.e. the current ending tag, from 00351 // the errors... 00352 if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) { 00353 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]); 00354 } 00355 } 00356 } 00357 00358 // insert tags, in FORWARD $j order: c,b,a with </a></b></c> 00359 $replace = array($token); 00360 for ($j = 1; $j < $c; $j++) { 00361 // ...as well as from the insertions 00362 $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name); 00363 $new_token->start = $skipped_tags[$j]; 00364 array_unshift($replace, $new_token); 00365 if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { 00366 $element = clone $skipped_tags[$j]; 00367 $element->carryover = true; 00368 $element->armor['MakeWellFormed_TagClosedError'] = true; 00369 $replace[] = $element; 00370 } 00371 } 00372 $this->processToken($replace); 00373 $reprocess = true; 00374 continue; 00375 } 00376 00377 $context->destroy('CurrentNesting'); 00378 $context->destroy('InputTokens'); 00379 $context->destroy('InputIndex'); 00380 $context->destroy('CurrentToken'); 00381 00382 unset($this->injectors, $this->stack, $this->tokens, $this->t); 00383 return $tokens; 00384 } 00385 00406 protected function processToken($token, $injector = -1) { 00407 00408 // normalize forms of token 00409 if (is_object($token)) $token = array(1, $token); 00410 if (is_int($token)) $token = array($token); 00411 if ($token === false) $token = array(1); 00412 if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector'); 00413 if (!is_int($token[0])) array_unshift($token, 1); 00414 if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid'); 00415 00416 // $token is now an array with the following form: 00417 // array(number nodes to delete, new node 1, new node 2, ...) 00418 00419 $delete = array_shift($token); 00420 $old = array_splice($this->tokens, $this->t, $delete, $token); 00421 00422 if ($injector > -1) { 00423 // determine appropriate skips 00424 $oldskip = isset($old[0]) ? $old[0]->skip : array(); 00425 foreach ($token as $object) { 00426 $object->skip = $oldskip; 00427 $object->skip[$injector] = true; 00428 } 00429 } 00430 00431 } 00432 00436 private function insertBefore($token) { 00437 array_splice($this->tokens, $this->t, 0, array($token)); 00438 } 00439 00444 private function remove() { 00445 array_splice($this->tokens, $this->t, 1); 00446 } 00447 00451 private function swap($token) { 00452 $this->tokens[$this->t] = $token; 00453 } 00454 00455 } 00456 00457 // vim: et sw=4 sts=4
| Copyright © 2003 - 2009 MyOOS [Shopsystem]. All rights reserved. MyOOS [Shopsystem] is Free Software released under the GNU/GPL License. Webmaster: info@r23.de (Impressum) |
|
