00001 <?php 00002 00022 class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer 00023 { 00024 00028 protected $tokens = array(); 00029 00030 public function tokenizeHTML($string, $config, $context) { 00031 00032 $this->tokens = array(); 00033 00034 $string = $this->normalize($string, $config, $context); 00035 00036 $parser = new XML_HTMLSax3(); 00037 $parser->set_object($this); 00038 $parser->set_element_handler('openHandler','closeHandler'); 00039 $parser->set_data_handler('dataHandler'); 00040 $parser->set_escape_handler('escapeHandler'); 00041 00042 // doesn't seem to work correctly for attributes 00043 $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); 00044 00045 $parser->parse($string); 00046 00047 return $this->tokens; 00048 00049 } 00050 00054 public function openHandler(&$parser, $name, $attrs, $closed) { 00055 // entities are not resolved in attrs 00056 foreach ($attrs as $key => $attr) { 00057 $attrs[$key] = $this->parseData($attr); 00058 } 00059 if ($closed) { 00060 $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); 00061 } else { 00062 $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); 00063 } 00064 return true; 00065 } 00066 00070 public function closeHandler(&$parser, $name) { 00071 // HTMLSax3 seems to always send empty tags an extra close tag 00072 // check and ignore if you see it: 00073 // [TESTME] to make sure it doesn't overreach 00074 if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) { 00075 return true; 00076 } 00077 $this->tokens[] = new HTMLPurifier_Token_End($name); 00078 return true; 00079 } 00080 00084 public function dataHandler(&$parser, $data) { 00085 $this->tokens[] = new HTMLPurifier_Token_Text($data); 00086 return true; 00087 } 00088 00092 public function escapeHandler(&$parser, $data) { 00093 if (strpos($data, '--') === 0) { 00094 $this->tokens[] = new HTMLPurifier_Token_Comment($data); 00095 } 00096 // CDATA is handled elsewhere, but if it was handled here: 00097 //if (strpos($data, '[CDATA[') === 0) { 00098 // $this->tokens[] = new HTMLPurifier_Token_Text( 00099 // substr($data, 7, strlen($data) - 9) ); 00100 //} 00101 return true; 00102 } 00103 00104 } 00105 00106 // vim: et sw=4 sts=4
| Copyright © 2003 - 2009 MyOOS [Shopsystem]. All rights reserved. MyOOS [Shopsystem] is Free Software released under the GNU/GPL License. Webmaster: info@r23.de (Impressum) |
|
