HTMLPurifier/Filter/ExtractStyleBlocks.php Quellcode

ExtractStyleBlocks.php
gehe zur Dokumentation dieser Datei
1 <?php
2 
3 // why is this a top level function? Because PHP 5.2.0 doesn't seem to
4 // understand how to interpret this filter if it's a static method.
5 // It's all really silly, but if we go this route it might be reasonable
6 // to coalesce all of these methods into one.
8 {
9 }
10 
26 {
30  public $name = 'ExtractStyleBlocks';
31 
35  private $_styleMatches = array();
36 
40  private $_tidy;
41 
45  private $_id_attrdef;
46 
50  private $_class_attrdef;
51 
55  private $_enum_attrdef;
56 
57  public function __construct()
58  {
59  $this->_tidy = new csstidy();
60  $this->_tidy->set_cfg('lowercase_s', false);
61  $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
62  $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
63  $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(
64  array(
65  'first-child',
66  'link',
67  'visited',
68  'active',
69  'hover',
70  'focus'
71  )
72  );
73  }
74 
79  protected function styleCallback($matches)
80  {
81  $this->_styleMatches[] = $matches[1];
82  }
83 
92  public function preFilter($html, $config, $context)
93  {
94  $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl');
95  if ($tidy !== null) {
96  $this->_tidy = $tidy;
97  }
98  $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html);
99  $style_blocks = $this->_styleMatches;
100  $this->_styleMatches = array(); // reset
101  $context->register('StyleBlocks', $style_blocks); // $context must not be reused
102  if ($this->_tidy) {
103  foreach ($style_blocks as &$style) {
104  $style = $this->cleanCSS($style, $config, $context);
105  }
106  }
107  return $html;
108  }
109 
119  public function cleanCSS($css, $config, $context)
120  {
121  // prepare scope
122  $scope = $config->get('Filter.ExtractStyleBlocks.Scope');
123  if ($scope !== null) {
124  $scopes = array_map('trim', explode(',', $scope));
125  } else {
126  $scopes = array();
127  }
128  // remove comments from CSS
129  $css = trim($css);
130  if (strncmp('<!--', $css, 4) === 0) {
131  $css = substr($css, 4);
132  }
133  if (strlen($css) > 3 && substr($css, -3) == '-->') {
134  $css = substr($css, 0, -3);
135  }
136  $css = trim($css);
137  set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
138  $this->_tidy->parse($css);
139  restore_error_handler();
140  $css_definition = $config->getDefinition('CSS');
141  $html_definition = $config->getDefinition('HTML');
142  $new_css = array();
143  foreach ($this->_tidy->css as $k => $decls) {
144  // $decls are all CSS declarations inside an @ selector
145  $new_decls = array();
146  foreach ($decls as $selector => $style) {
147  $selector = trim($selector);
148  if ($selector === '') {
149  continue;
150  } // should not happen
151  // Parse the selector
152  // Here is the relevant part of the CSS grammar:
153  //
154  // ruleset
155  // : selector [ ',' S* selector ]* '{' ...
156  // selector
157  // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
158  // combinator
159  // : '+' S*
160  // : '>' S*
161  // simple_selector
162  // : element_name [ HASH | class | attrib | pseudo ]*
163  // | [ HASH | class | attrib | pseudo ]+
164  // element_name
165  // : IDENT | '*'
166  // ;
167  // class
168  // : '.' IDENT
169  // ;
170  // attrib
171  // : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
172  // [ IDENT | STRING ] S* ]? ']'
173  // ;
174  // pseudo
175  // : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
176  // ;
177  //
178  // For reference, here are the relevant tokens:
179  //
180  // HASH #{name}
181  // IDENT {ident}
182  // INCLUDES ==
183  // DASHMATCH |=
184  // STRING {string}
185  // FUNCTION {ident}\(
186  //
187  // And the lexical scanner tokens
188  //
189  // name {nmchar}+
190  // nmchar [_a-z0-9-]|{nonascii}|{escape}
191  // nonascii [\240-\377]
192  // escape {unicode}|\\[^\r\n\f0-9a-f]
193  // unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
194  // ident -?{nmstart}{nmchar*}
195  // nmstart [_a-z]|{nonascii}|{escape}
196  // string {string1}|{string2}
197  // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
198  // string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
199  //
200  // We'll implement a subset (in order to reduce attack
201  // surface); in particular:
202  //
203  // - No Unicode support
204  // - No escapes support
205  // - No string support (by proxy no attrib support)
206  // - element_name is matched against allowed
207  // elements (some people might find this
208  // annoying...)
209  // - Pseudo-elements one of :first-child, :link,
210  // :visited, :active, :hover, :focus
211 
212  // handle ruleset
213  $selectors = array_map('trim', explode(',', $selector));
214  $new_selectors = array();
215  foreach ($selectors as $sel) {
216  // split on +, > and spaces
217  $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
218  // even indices are chunks, odd indices are
219  // delimiters
220  $nsel = null;
221  $delim = null; // guaranteed to be non-null after
222  // two loop iterations
223  for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
224  $x = $basic_selectors[$i];
225  if ($i % 2) {
226  // delimiter
227  if ($x === ' ') {
228  $delim = ' ';
229  } else {
230  $delim = ' ' . $x . ' ';
231  }
232  } else {
233  // simple selector
234  $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
235  $sdelim = null;
236  $nx = null;
237  for ($j = 0, $cc = count($components); $j < $cc; $j++) {
238  $y = $components[$j];
239  if ($j === 0) {
240  if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
241  $nx = $y;
242  } else {
243  // $nx stays null; this matters
244  // if we don't manage to find
245  // any valid selector content,
246  // in which case we ignore the
247  // outer $delim
248  }
249  } elseif ($j % 2) {
250  // set delimiter
251  $sdelim = $y;
252  } else {
253  $attrdef = null;
254  if ($sdelim === '#') {
255  $attrdef = $this->_id_attrdef;
256  } elseif ($sdelim === '.') {
257  $attrdef = $this->_class_attrdef;
258  } elseif ($sdelim === ':') {
259  $attrdef = $this->_enum_attrdef;
260  } else {
261  throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
262  }
263  $r = $attrdef->validate($y, $config, $context);
264  if ($r !== false) {
265  if ($r !== true) {
266  $y = $r;
267  }
268  if ($nx === null) {
269  $nx = '';
270  }
271  $nx .= $sdelim . $y;
272  }
273  }
274  }
275  if ($nx !== null) {
276  if ($nsel === null) {
277  $nsel = $nx;
278  } else {
279  $nsel .= $delim . $nx;
280  }
281  } else {
282  // delimiters to the left of invalid
283  // basic selector ignored
284  }
285  }
286  }
287  if ($nsel !== null) {
288  if (!empty($scopes)) {
289  foreach ($scopes as $s) {
290  $new_selectors[] = "$s $nsel";
291  }
292  } else {
293  $new_selectors[] = $nsel;
294  }
295  }
296  }
297  if (empty($new_selectors)) {
298  continue;
299  }
300  $selector = implode(', ', $new_selectors);
301  foreach ($style as $name => $value) {
302  if (!isset($css_definition->info[$name])) {
303  unset($style[$name]);
304  continue;
305  }
306  $def = $css_definition->info[$name];
307  $ret = $def->validate($value, $config, $context);
308  if ($ret === false) {
309  unset($style[$name]);
310  } else {
311  $style[$name] = $ret;
312  }
313  }
314  $new_decls[$selector] = $style;
315  }
316  $new_css[$k] = $new_decls;
317  }
318  // remove stuff that shouldn't be used, could be reenabled
319  // after security risks are analyzed
320  $this->_tidy->css = $new_css;
321  $this->_tidy->import = array();
322  $this->_tidy->charset = null;
323  $this->_tidy->namespace = null;
324  $css = $this->_tidy->print->plain();
325  // we are going to escape any special characters <>& to ensure
326  // that no funny business occurs (i.e. </style> in a font-family prop).
327  if ($config->get('Filter.ExtractStyleBlocks.Escaping')) {
328  $css = str_replace(
329  array('<', '>', '&'),
330  array('\3C ', '\3E ', '\26 '),
331  $css
332  );
333  }
334  return $css;
335  }
336 }
337 
338 // vim: et sw=4 sts=4




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.