library/SimplePie/Sanitize.php Quellcode

Sanitize.php
gehe zur Dokumentation dieser Datei
1 <?php
55 {
56  // Private vars
57  var $base;
58 
59  // Options
60  var $remove_div = true;
61  var $image_handler = '';
62  var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
64  var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
65  var $strip_comments = false;
66  var $output_encoding = 'UTF-8';
67  var $enable_cache = true;
68  var $cache_location = './cache';
69  var $cache_name_function = 'md5';
70  var $timeout = 10;
71  var $useragent = '';
72  var $force_fsockopen = false;
74 
75  public function __construct()
76  {
77  // Set defaults
78  $this->set_url_replacements(null);
79  }
80 
81  public function remove_div($enable = true)
82  {
83  $this->remove_div = (bool) $enable;
84  }
85 
86  public function set_image_handler($page = false)
87  {
88  if ($page)
89  {
90  $this->image_handler = (string) $page;
91  }
92  else
93  {
94  $this->image_handler = false;
95  }
96  }
97 
98  public function set_registry(SimplePie_Registry $registry)
99  {
100  $this->registry = $registry;
101  }
102 
103  public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
104  {
105  if (isset($enable_cache))
106  {
107  $this->enable_cache = (bool) $enable_cache;
108  }
109 
110  if ($cache_location)
111  {
112  $this->cache_location = (string) $cache_location;
113  }
114 
116  {
117  $this->cache_name_function = (string) $cache_name_function;
118  }
119  }
120 
121  public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
122  {
123  if ($timeout)
124  {
125  $this->timeout = (string) $timeout;
126  }
127 
128  if ($useragent)
129  {
130  $this->useragent = (string) $useragent;
131  }
132 
133  if ($force_fsockopen)
134  {
135  $this->force_fsockopen = (string) $force_fsockopen;
136  }
137  }
138 
139  public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
140  {
141  if ($tags)
142  {
143  if (is_array($tags))
144  {
145  $this->strip_htmltags = $tags;
146  }
147  else
148  {
149  $this->strip_htmltags = explode(',', $tags);
150  }
151  }
152  else
153  {
154  $this->strip_htmltags = false;
155  }
156  }
157 
158  public function encode_instead_of_strip($encode = false)
159  {
160  $this->encode_instead_of_strip = (bool) $encode;
161  }
162 
163  public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
164  {
165  if ($attribs)
166  {
167  if (is_array($attribs))
168  {
169  $this->strip_attributes = $attribs;
170  }
171  else
172  {
173  $this->strip_attributes = explode(',', $attribs);
174  }
175  }
176  else
177  {
178  $this->strip_attributes = false;
179  }
180  }
181 
182  public function strip_comments($strip = false)
183  {
184  $this->strip_comments = (bool) $strip;
185  }
186 
187  public function set_output_encoding($encoding = 'UTF-8')
188  {
189  $this->output_encoding = (string) $encoding;
190  }
191 
203  public function set_url_replacements($element_attribute = null)
204  {
205  if ($element_attribute === null)
206  {
207  $element_attribute = array(
208  'a' => 'href',
209  'area' => 'href',
210  'blockquote' => 'cite',
211  'del' => 'cite',
212  'form' => 'action',
213  'img' => array(
214  'longdesc',
215  'src'
216  ),
217  'input' => 'src',
218  'ins' => 'cite',
219  'q' => 'cite'
220  );
221  }
222  $this->replace_url_attributes = (array) $element_attribute;
223  }
224 
225  public function sanitize($data, $type, $base = '')
226  {
227  $data = trim($data);
228  if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
229  {
230  if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
231  {
232  if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
233  {
234  $type |= SIMPLEPIE_CONSTRUCT_HTML;
235  }
236  else
237  {
238  $type |= SIMPLEPIE_CONSTRUCT_TEXT;
239  }
240  }
241 
242  if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
243  {
244  $data = base64_decode($data);
245  }
246 
248  {
249 
250  $document = new DOMDocument();
251  $document->encoding = 'UTF-8';
252  $data = $this->preprocess($data, $type);
253 
254  set_error_handler(array('SimplePie_Misc', 'silence_errors'));
255  $document->loadHTML($data);
256  restore_error_handler();
257 
258  // Strip comments
259  if ($this->strip_comments)
260  {
261  $xpath = new DOMXPath($document);
262  $comments = $xpath->query('//comment()');
263 
264  foreach ($comments as $comment)
265  {
266  $comment->parentNode->removeChild($comment);
267  }
268  }
269 
270  // Strip out HTML tags and attributes that might cause various security problems.
271  // Based on recommendations by Mark Pilgrim at:
272  // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
273  if ($this->strip_htmltags)
274  {
275  foreach ($this->strip_htmltags as $tag)
276  {
277  $this->strip_tag($tag, $document, $type);
278  }
279  }
280 
281  if ($this->strip_attributes)
282  {
283  foreach ($this->strip_attributes as $attrib)
284  {
285  $this->strip_attr($attrib, $document);
286  }
287  }
288 
289  // Replace relative URLs
290  $this->base = $base;
291  foreach ($this->replace_url_attributes as $element => $attributes)
292  {
293  $this->replace_urls($document, $element, $attributes);
294  }
295 
296  // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
297  if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
298  {
299  $images = $document->getElementsByTagName('img');
300  foreach ($images as $img)
301  {
302  if ($img->hasAttribute('src'))
303  {
304  $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
305  $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
306 
307  if ($cache->load())
308  {
309  $img->setAttribute('src', $this->image_handler . $image_url);
310  }
311  else
312  {
313  $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
314  $headers = $file->headers;
315 
316  if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
317  {
318  if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
319  {
320  $img->setAttribute('src', $this->image_handler . $image_url);
321  }
322  else
323  {
324  trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
325  }
326  }
327  }
328  }
329  }
330  }
331 
332  // Remove the DOCTYPE
333  // Seems to cause segfaulting if we don't do this
334  if ($document->firstChild instanceof DOMDocumentType)
335  {
336  $document->removeChild($document->firstChild);
337  }
338 
339  // Move everything from the body to the root
340  $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
341  $document->replaceChild($real_body, $document->firstChild);
342 
343  // Finally, convert to a HTML string
344  $data = trim($document->saveHTML());
345 
346  if ($this->remove_div)
347  {
348  $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
349  $data = preg_replace('/<\/div>$/', '', $data);
350  }
351  else
352  {
353  $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
354  }
355  }
356 
357  if ($type & SIMPLEPIE_CONSTRUCT_IRI)
358  {
359  $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
360  if ($absolute !== false)
361  {
362  $data = $absolute;
363  }
364  }
365 
366  if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
367  {
368  $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
369  }
370 
371  if ($this->output_encoding !== 'UTF-8')
372  {
373  $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
374  }
375  }
376  return $data;
377  }
378 
379  protected function preprocess($html, $type)
380  {
381  $ret = '';
382  if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
383  {
384  // Atom XHTML constructs are wrapped with a div by default
385  // Note: No protection if $html contains a stray </div>!
386  $html = '<div>' . $html . '</div>';
387  $ret .= '<!DOCTYPE html>';
388  $content_type = 'text/html';
389  }
390  else
391  {
392  $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
393  $content_type = 'application/xhtml+xml';
394  }
395 
396  $ret .= '<html><head>';
397  $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
398  $ret .= '</head><body>' . $html . '</body></html>';
399  return $ret;
400  }
401 
402  public function replace_urls($document, $tag, $attributes)
403  {
404  if (!is_array($attributes))
405  {
406  $attributes = array($attributes);
407  }
408 
409  if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
410  {
411  $elements = $document->getElementsByTagName($tag);
412  foreach ($elements as $element)
413  {
414  foreach ($attributes as $attribute)
415  {
416  if ($element->hasAttribute($attribute))
417  {
418  $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
419  if ($value !== false)
420  {
421  $element->setAttribute($attribute, $value);
422  }
423  }
424  }
425  }
426  }
427  }
428 
429  public function do_strip_htmltags($match)
430  {
431  if ($this->encode_instead_of_strip)
432  {
433  if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
434  {
435  $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
436  $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
437  return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
438  }
439  else
440  {
441  return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
442  }
443  }
444  elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
445  {
446  return $match[4];
447  }
448  else
449  {
450  return '';
451  }
452  }
453 
454  protected function strip_tag($tag, $document, $type)
455  {
456  $xpath = new DOMXPath($document);
457  $elements = $xpath->query('body//' . $tag);
458  if ($this->encode_instead_of_strip)
459  {
460  foreach ($elements as $element)
461  {
462  $fragment = $document->createDocumentFragment();
463 
464  // For elements which aren't script or style, include the tag itself
465  if (!in_array($tag, array('script', 'style')))
466  {
467  $text = '<' . $tag;
468  if ($element->hasAttributes())
469  {
470  $attrs = array();
471  foreach ($element->attributes as $name => $attr)
472  {
473  $value = $attr->value;
474 
475  // In XHTML, empty values should never exist, so we repeat the value
476  if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
477  {
478  $value = $name;
479  }
480  // For HTML, empty is fine
481  elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
482  {
483  $attrs[] = $name;
484  continue;
485  }
486 
487  // Standard attribute text
488  $attrs[] = $name . '="' . $attr->value . '"';
489  }
490  $text .= ' ' . implode(' ', $attrs);
491  }
492  $text .= '>';
493  $fragment->appendChild(new DOMText($text));
494  }
495 
496  $number = $element->childNodes->length;
497  for ($i = $number; $i > 0; $i--)
498  {
499  $child = $element->childNodes->item(0);
500  $fragment->appendChild($child);
501  }
502 
503  if (!in_array($tag, array('script', 'style')))
504  {
505  $fragment->appendChild(new DOMText('</' . $tag . '>'));
506  }
507 
508  $element->parentNode->replaceChild($fragment, $element);
509  }
510 
511  return;
512  }
513  elseif (in_array($tag, array('script', 'style')))
514  {
515  foreach ($elements as $element)
516  {
517  $element->parentNode->removeChild($element);
518  }
519 
520  return;
521  }
522  else
523  {
524  foreach ($elements as $element)
525  {
526  $fragment = $document->createDocumentFragment();
527  $number = $element->childNodes->length;
528  for ($i = $number; $i > 0; $i--)
529  {
530  $child = $element->childNodes->item(0);
531  $fragment->appendChild($child);
532  }
533 
534  $element->parentNode->replaceChild($fragment, $element);
535  }
536  }
537  }
538 
539  protected function strip_attr($attrib, $document)
540  {
541  $xpath = new DOMXPath($document);
542  $elements = $xpath->query('//*[@' . $attrib . ']');
543 
544  foreach ($elements as $element)
545  {
546  $element->removeAttribute($attrib);
547  }
548  }
549 }




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.