HTMLPurifier/Generator.php Quellcode

Generator.php
gehe zur Dokumentation dieser Datei
1 <?php
2 
11 {
12 
17  private $_xhtml = true;
18 
23  private $_scriptFix = false;
24 
30  private $_def;
31 
36  private $_sortAttr;
37 
42  private $_flashCompat;
43 
48  private $_innerHTMLFix;
49 
55  private $_flashStack = array();
56 
61  protected $config;
62 
67  public function __construct($config, $context)
68  {
69  $this->config = $config;
70  $this->_scriptFix = $config->get('Output.CommentScriptContents');
71  $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
72  $this->_sortAttr = $config->get('Output.SortAttr');
73  $this->_flashCompat = $config->get('Output.FlashCompat');
74  $this->_def = $config->getHTMLDefinition();
75  $this->_xhtml = $this->_def->doctype->xml;
76  }
77 
83  public function generateFromTokens($tokens)
84  {
85  if (!$tokens) {
86  return '';
87  }
88 
89  // Basic algorithm
90  $html = '';
91  for ($i = 0, $size = count($tokens); $i < $size; $i++) {
92  if ($this->_scriptFix && $tokens[$i]->name === 'script'
93  && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
94  // script special case
95  // the contents of the script block must be ONE token
96  // for this to work.
97  $html .= $this->generateFromToken($tokens[$i++]);
98  $html .= $this->generateScriptFromToken($tokens[$i++]);
99  }
100  $html .= $this->generateFromToken($tokens[$i]);
101  }
102 
103  // Tidy cleanup
104  if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
105  $tidy = new Tidy;
106  $tidy->parseString(
107  $html,
108  array(
109  'indent'=> true,
110  'output-xhtml' => $this->_xhtml,
111  'show-body-only' => true,
112  'indent-spaces' => 2,
113  'wrap' => 68,
114  ),
115  'utf8'
116  );
117  $tidy->cleanRepair();
118  $html = (string) $tidy; // explicit cast necessary
119  }
120 
121  // Normalize newlines to system defined value
122  if ($this->config->get('Core.NormalizeNewlines')) {
123  $nl = $this->config->get('Output.Newline');
124  if ($nl === null) {
125  $nl = PHP_EOL;
126  }
127  if ($nl !== "\n") {
128  $html = str_replace("\n", $nl, $html);
129  }
130  }
131  return $html;
132  }
133 
139  public function generateFromToken($token)
140  {
141  if (!$token instanceof HTMLPurifier_Token) {
142  trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
143  return '';
144 
145  } elseif ($token instanceof HTMLPurifier_Token_Start) {
146  $attr = $this->generateAttributes($token->attr, $token->name);
147  if ($this->_flashCompat) {
148  if ($token->name == "object") {
149  $flash = new stdclass();
150  $flash->attr = $token->attr;
151  $flash->param = array();
152  $this->_flashStack[] = $flash;
153  }
154  }
155  return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
156 
157  } elseif ($token instanceof HTMLPurifier_Token_End) {
158  $_extra = '';
159  if ($this->_flashCompat) {
160  if ($token->name == "object" && !empty($this->_flashStack)) {
161  // doesn't do anything for now
162  }
163  }
164  return $_extra . '</' . $token->name . '>';
165 
166  } elseif ($token instanceof HTMLPurifier_Token_Empty) {
167  if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
168  $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
169  }
170  $attr = $this->generateAttributes($token->attr, $token->name);
171  return '<' . $token->name . ($attr ? ' ' : '') . $attr .
172  ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
173  . '>';
174 
175  } elseif ($token instanceof HTMLPurifier_Token_Text) {
176  return $this->escape($token->data, ENT_NOQUOTES);
177 
178  } elseif ($token instanceof HTMLPurifier_Token_Comment) {
179  return '<!--' . $token->data . '-->';
180  } else {
181  return '';
182 
183  }
184  }
185 
193  public function generateScriptFromToken($token)
194  {
195  if (!$token instanceof HTMLPurifier_Token_Text) {
196  return $this->generateFromToken($token);
197  }
198  // Thanks <http://lachy.id.au/log/2005/05/script-comments>
199  $data = preg_replace('#//\s*$#', '', $token->data);
200  return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
201  }
202 
211  public function generateAttributes($assoc_array_of_attributes, $element = '')
212  {
213  $html = '';
214  if ($this->_sortAttr) {
215  ksort($assoc_array_of_attributes);
216  }
217  foreach ($assoc_array_of_attributes as $key => $value) {
218  if (!$this->_xhtml) {
219  // Remove namespaced attributes
220  if (strpos($key, ':') !== false) {
221  continue;
222  }
223  // Check if we should minimize the attribute: val="val" -> val
224  if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
225  $html .= $key . ' ';
226  continue;
227  }
228  }
229  // Workaround for Internet Explorer innerHTML bug.
230  // Essentially, Internet Explorer, when calculating
231  // innerHTML, omits quotes if there are no instances of
232  // angled brackets, quotes or spaces. However, when parsing
233  // HTML (for example, when you assign to innerHTML), it
234  // treats backticks as quotes. Thus,
235  // <img alt="``" />
236  // becomes
237  // <img alt=`` />
238  // becomes
239  // <img alt='' />
240  // Fortunately, all we need to do is trigger an appropriate
241  // quoting style, which we do by adding an extra space.
242  // This also is consistent with the W3C spec, which states
243  // that user agents may ignore leading or trailing
244  // whitespace (in fact, most don't, at least for attributes
245  // like alt, but an extra space at the end is barely
246  // noticeable). Still, we have a configuration knob for
247  // this, since this transformation is not necesary if you
248  // don't process user input with innerHTML or you don't plan
249  // on supporting Internet Explorer.
250  if ($this->_innerHTMLFix) {
251  if (strpos($value, '`') !== false) {
252  // check if correct quoting style would not already be
253  // triggered
254  if (strcspn($value, '"\' <>') === strlen($value)) {
255  // protect!
256  $value .= ' ';
257  }
258  }
259  }
260  $html .= $key.'="'.$this->escape($value).'" ';
261  }
262  return rtrim($html);
263  }
264 
275  public function escape($string, $quote = null)
276  {
277  // Workaround for APC bug on Mac Leopard reported by sidepodcast
278  // http://htmlpurifier.org/phorum/read.php?3,4823,4846
279  if ($quote === null) {
280  $quote = ENT_COMPAT;
281  }
282  return htmlspecialchars($string, $quote, 'UTF-8');
283  }
284 }
285 
286 // vim: et sw=4 sts=4




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.