HTMLPurifier/Strategy/MakeWellFormed.php Quellcode

MakeWellFormed.php
gehe zur Dokumentation dieser Datei
1 <?php
2 
15 {
16 
21  protected $tokens;
22 
27  protected $token;
28 
33  protected $zipper;
34 
39  protected $stack;
40 
45  protected $injectors;
46 
51  protected $config;
52 
57  protected $context;
58 
66  public function execute($tokens, $config, $context)
67  {
68  $definition = $config->getHTMLDefinition();
69 
70  // local variables
71  $generator = new HTMLPurifier_Generator($config, $context);
72  $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
73  // used for autoclose early abortion
74  $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config);
75  $e = $context->get('ErrorCollector', true);
76  $i = false; // injector index
78  if ($token === NULL) {
79  return array();
80  }
81  $reprocess = false; // whether or not to reprocess the same token
82  $stack = array();
83 
84  // member variables
85  $this->stack =& $stack;
86  $this->tokens =& $tokens;
87  $this->token =& $token;
88  $this->zipper =& $zipper;
89  $this->config = $config;
90  $this->context = $context;
91 
92  // context variables
93  $context->register('CurrentNesting', $stack);
94  $context->register('InputZipper', $zipper);
95  $context->register('CurrentToken', $token);
96 
97  // -- begin INJECTOR --
98 
99  $this->injectors = array();
100 
101  $injectors = $config->getBatch('AutoFormat');
102  $def_injectors = $definition->info_injector;
103  $custom_injectors = $injectors['Custom'];
104  unset($injectors['Custom']); // special case
105  foreach ($injectors as $injector => $b) {
106  // XXX: Fix with a legitimate lookup table of enabled filters
107  if (strpos($injector, '.') !== false) {
108  continue;
109  }
110  $injector = "HTMLPurifier_Injector_$injector";
111  if (!$b) {
112  continue;
113  }
114  $this->injectors[] = new $injector;
115  }
116  foreach ($def_injectors as $injector) {
117  // assumed to be objects
118  $this->injectors[] = $injector;
119  }
120  foreach ($custom_injectors as $injector) {
121  if (!$injector) {
122  continue;
123  }
124  if (is_string($injector)) {
125  $injector = "HTMLPurifier_Injector_$injector";
126  $injector = new $injector;
127  }
128  $this->injectors[] = $injector;
129  }
130 
131  // give the injectors references to the definition and context
132  // variables for performance reasons
133  foreach ($this->injectors as $ix => $injector) {
134  $error = $injector->prepare($config, $context);
135  if (!$error) {
136  continue;
137  }
138  array_splice($this->injectors, $ix, 1); // rm the injector
139  trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
140  }
141 
142  // -- end INJECTOR --
143 
144  // a note on reprocessing:
145  // In order to reduce code duplication, whenever some code needs
146  // to make HTML changes in order to make things "correct", the
147  // new HTML gets sent through the purifier, regardless of its
148  // status. This means that if we add a start token, because it
149  // was totally necessary, we don't have to update nesting; we just
150  // punt ($reprocess = true; continue;) and it does that for us.
151 
152  // isset is in loop because $tokens size changes during loop exec
153  for (;;
154  // only increment if we don't need to reprocess
155  $reprocess ? $reprocess = false : $token = $zipper->next($token)) {
156 
157  // check for a rewind
158  if (is_int($i)) {
159  // possibility: disable rewinding if the current token has a
160  // rewind set on it already. This would offer protection from
161  // infinite loop, but might hinder some advanced rewinding.
162  $rewind_offset = $this->injectors[$i]->getRewindOffset();
163  if (is_int($rewind_offset)) {
164  for ($j = 0; $j < $rewind_offset; $j++) {
165  if (empty($zipper->front)) break;
166  $token = $zipper->prev($token);
167  // indicate that other injectors should not process this token,
168  // but we need to reprocess it
169  unset($token->skip[$i]);
170  $token->rewind = $i;
171  if ($token instanceof HTMLPurifier_Token_Start) {
172  array_pop($this->stack);
173  } elseif ($token instanceof HTMLPurifier_Token_End) {
174  $this->stack[] = $token->start;
175  }
176  }
177  }
178  $i = false;
179  }
180 
181  // handle case of document end
182  if ($token === NULL) {
183  // kill processing if stack is empty
184  if (empty($this->stack)) {
185  break;
186  }
187 
188  // peek
189  $top_nesting = array_pop($this->stack);
190  $this->stack[] = $top_nesting;
191 
192  // send error [TagClosedSuppress]
193  if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
194  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
195  }
196 
197  // append, don't splice, since this is the end
198  $token = new HTMLPurifier_Token_End($top_nesting->name);
199 
200  // punt!
201  $reprocess = true;
202  continue;
203  }
204 
205  //echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack);
206  //flush();
207 
208  // quick-check: if it's not a tag, no need to process
209  if (empty($token->is_tag)) {
210  if ($token instanceof HTMLPurifier_Token_Text) {
211  foreach ($this->injectors as $i => $injector) {
212  if (isset($token->skip[$i])) {
213  continue;
214  }
215  if ($token->rewind !== null && $token->rewind !== $i) {
216  continue;
217  }
218  // XXX fuckup
219  $r = $token;
220  $injector->handleText($r);
221  $token = $this->processToken($r, $i);
222  $reprocess = true;
223  break;
224  }
225  }
226  // another possibility is a comment
227  continue;
228  }
229 
230  if (isset($definition->info[$token->name])) {
231  $type = $definition->info[$token->name]->child->type;
232  } else {
233  $type = false; // Type is unknown, treat accordingly
234  }
235 
236  // quick tag checks: anything that's *not* an end tag
237  $ok = false;
238  if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
239  // claims to be a start tag but is empty
241  $token->name,
242  $token->attr,
243  $token->line,
244  $token->col,
245  $token->armor
246  );
247  $ok = true;
248  } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
249  // claims to be empty but really is a start tag
250  // NB: this assignment is required
251  $old_token = $token;
252  $token = new HTMLPurifier_Token_End($token->name);
253  $token = $this->insertBefore(
254  new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor)
255  );
256  // punt (since we had to modify the input stream in a non-trivial way)
257  $reprocess = true;
258  continue;
259  } elseif ($token instanceof HTMLPurifier_Token_Empty) {
260  // real empty token
261  $ok = true;
262  } elseif ($token instanceof HTMLPurifier_Token_Start) {
263  // start tag
264 
265  // ...unless they also have to close their parent
266  if (!empty($this->stack)) {
267 
268  // Performance note: you might think that it's rather
269  // inefficient, recalculating the autoclose information
270  // for every tag that a token closes (since when we
271  // do an autoclose, we push a new token into the
272  // stream and then /process/ that, before
273  // re-processing this token.) But this is
274  // necessary, because an injector can make an
275  // arbitrary transformations to the autoclosing
276  // tokens we introduce, so things may have changed
277  // in the meantime. Also, doing the inefficient thing is
278  // "easy" to reason about (for certain perverse definitions
279  // of "easy")
280 
281  $parent = array_pop($this->stack);
282  $this->stack[] = $parent;
283 
284  $parent_def = null;
285  $parent_elements = null;
286  $autoclose = false;
287  if (isset($definition->info[$parent->name])) {
288  $parent_def = $definition->info[$parent->name];
289  $parent_elements = $parent_def->child->getAllowedElements($config);
290  $autoclose = !isset($parent_elements[$token->name]);
291  }
292 
293  if ($autoclose && $definition->info[$token->name]->wrap) {
294  // Check if an element can be wrapped by another
295  // element to make it valid in a context (for
296  // example, <ul><ul> needs a <li> in between)
297  $wrapname = $definition->info[$token->name]->wrap;
298  $wrapdef = $definition->info[$wrapname];
299  $elements = $wrapdef->child->getAllowedElements($config);
300  if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
301  $newtoken = new HTMLPurifier_Token_Start($wrapname);
302  $token = $this->insertBefore($newtoken);
303  $reprocess = true;
304  continue;
305  }
306  }
307 
308  $carryover = false;
309  if ($autoclose && $parent_def->formatting) {
310  $carryover = true;
311  }
312 
313  if ($autoclose) {
314  // check if this autoclose is doomed to fail
315  // (this rechecks $parent, which his harmless)
316  $autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
317  if (!$autoclose_ok) {
318  foreach ($this->stack as $ancestor) {
319  $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
320  if (isset($elements[$token->name])) {
321  $autoclose_ok = true;
322  break;
323  }
324  if ($definition->info[$token->name]->wrap) {
325  $wrapname = $definition->info[$token->name]->wrap;
326  $wrapdef = $definition->info[$wrapname];
327  $wrap_elements = $wrapdef->child->getAllowedElements($config);
328  if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
329  $autoclose_ok = true;
330  break;
331  }
332  }
333  }
334  }
335  if ($autoclose_ok) {
336  // errors need to be updated
337  $new_token = new HTMLPurifier_Token_End($parent->name);
338  $new_token->start = $parent;
339  // [TagClosedSuppress]
340  if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
341  if (!$carryover) {
342  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
343  } else {
344  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
345  }
346  }
347  if ($carryover) {
348  $element = clone $parent;
349  // [TagClosedAuto]
350  $element->armor['MakeWellFormed_TagClosedError'] = true;
351  $element->carryover = true;
352  $token = $this->processToken(array($new_token, $token, $element));
353  } else {
354  $token = $this->insertBefore($new_token);
355  }
356  } else {
357  $token = $this->remove();
358  }
359  $reprocess = true;
360  continue;
361  }
362 
363  }
364  $ok = true;
365  }
366 
367  if ($ok) {
368  foreach ($this->injectors as $i => $injector) {
369  if (isset($token->skip[$i])) {
370  continue;
371  }
372  if ($token->rewind !== null && $token->rewind !== $i) {
373  continue;
374  }
375  $r = $token;
376  $injector->handleElement($r);
377  $token = $this->processToken($r, $i);
378  $reprocess = true;
379  break;
380  }
381  if (!$reprocess) {
382  // ah, nothing interesting happened; do normal processing
383  if ($token instanceof HTMLPurifier_Token_Start) {
384  $this->stack[] = $token;
385  } elseif ($token instanceof HTMLPurifier_Token_End) {
386  throw new HTMLPurifier_Exception(
387  'Improper handling of end tag in start code; possible error in MakeWellFormed'
388  );
389  }
390  }
391  continue;
392  }
393 
394  // sanity check: we should be dealing with a closing tag
395  if (!$token instanceof HTMLPurifier_Token_End) {
396  throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier');
397  }
398 
399  // make sure that we have something open
400  if (empty($this->stack)) {
401  if ($escape_invalid_tags) {
402  if ($e) {
403  $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
404  }
405  $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
406  } else {
407  if ($e) {
408  $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
409  }
410  $token = $this->remove();
411  }
412  $reprocess = true;
413  continue;
414  }
415 
416  // first, check for the simplest case: everything closes neatly.
417  // Eventually, everything passes through here; if there are problems
418  // we modify the input stream accordingly and then punt, so that
419  // the tokens get processed again.
420  $current_parent = array_pop($this->stack);
421  if ($current_parent->name == $token->name) {
422  $token->start = $current_parent;
423  foreach ($this->injectors as $i => $injector) {
424  if (isset($token->skip[$i])) {
425  continue;
426  }
427  if ($token->rewind !== null && $token->rewind !== $i) {
428  continue;
429  }
430  $r = $token;
431  $injector->handleEnd($r);
432  $token = $this->processToken($r, $i);
433  $this->stack[] = $current_parent;
434  $reprocess = true;
435  break;
436  }
437  continue;
438  }
439 
440  // okay, so we're trying to close the wrong tag
441 
442  // undo the pop previous pop
443  $this->stack[] = $current_parent;
444 
445  // scroll back the entire nest, trying to find our tag.
446  // (feature could be to specify how far you'd like to go)
447  $size = count($this->stack);
448  // -2 because -1 is the last element, but we already checked that
449  $skipped_tags = false;
450  for ($j = $size - 2; $j >= 0; $j--) {
451  if ($this->stack[$j]->name == $token->name) {
452  $skipped_tags = array_slice($this->stack, $j);
453  break;
454  }
455  }
456 
457  // we didn't find the tag, so remove
458  if ($skipped_tags === false) {
459  if ($escape_invalid_tags) {
460  if ($e) {
461  $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
462  }
463  $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token));
464  } else {
465  if ($e) {
466  $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
467  }
468  $token = $this->remove();
469  }
470  $reprocess = true;
471  continue;
472  }
473 
474  // do errors, in REVERSE $j order: a,b,c with </a></b></c>
475  $c = count($skipped_tags);
476  if ($e) {
477  for ($j = $c - 1; $j > 0; $j--) {
478  // notice we exclude $j == 0, i.e. the current ending tag, from
479  // the errors... [TagClosedSuppress]
480  if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
481  $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
482  }
483  }
484  }
485 
486  // insert tags, in FORWARD $j order: c,b,a with </a></b></c>
487  $replace = array($token);
488  for ($j = 1; $j < $c; $j++) {
489  // ...as well as from the insertions
490  $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name);
491  $new_token->start = $skipped_tags[$j];
492  array_unshift($replace, $new_token);
493  if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
494  // [TagClosedAuto]
495  $element = clone $skipped_tags[$j];
496  $element->carryover = true;
497  $element->armor['MakeWellFormed_TagClosedError'] = true;
498  $replace[] = $element;
499  }
500  }
501  $token = $this->processToken($replace);
502  $reprocess = true;
503  continue;
504  }
505 
506  $context->destroy('CurrentToken');
507  $context->destroy('CurrentNesting');
508  $context->destroy('InputZipper');
509 
510  unset($this->injectors, $this->stack, $this->tokens);
511  return $zipper->toArray($token);
512  }
513 
535  protected function processToken($token, $injector = -1)
536  {
537  // normalize forms of token
538  if (is_object($token)) {
539  $token = array(1, $token);
540  }
541  if (is_int($token)) {
542  $token = array($token);
543  }
544  if ($token === false) {
545  $token = array(1);
546  }
547  if (!is_array($token)) {
548  throw new HTMLPurifier_Exception('Invalid token type from injector');
549  }
550  if (!is_int($token[0])) {
551  array_unshift($token, 1);
552  }
553  if ($token[0] === 0) {
554  throw new HTMLPurifier_Exception('Deleting zero tokens is not valid');
555  }
556 
557  // $token is now an array with the following form:
558  // array(number nodes to delete, new node 1, new node 2, ...)
559 
560  $delete = array_shift($token);
561  list($old, $r) = $this->zipper->splice($this->token, $delete, $token);
562 
563  if ($injector > -1) {
564  // determine appropriate skips
565  $oldskip = isset($old[0]) ? $old[0]->skip : array();
566  foreach ($token as $object) {
567  $object->skip = $oldskip;
568  $object->skip[$injector] = true;
569  }
570  }
571 
572  return $r;
573 
574  }
575 
581  private function insertBefore($token)
582  {
583  // NB not $this->zipper->insertBefore(), due to positioning
584  // differences
585  $splice = $this->zipper->splice($this->token, 0, array($token));
586 
587  return $splice[1];
588  }
589 
594  private function remove()
595  {
596  return $this->zipper->delete();
597  }
598 }
599 
600 // vim: et sw=4 sts=4




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.