library/SimplePie/Parser.php Quellcode

Parser.php
gehe zur Dokumentation dieser Datei
1 <?php
55 {
61  var $separator = ' ';
62  var $namespace = array('');
63  var $element = array('');
64  var $xml_base = array('');
65  var $xml_base_explicit = array(false);
66  var $xml_lang = array('');
67  var $data = array();
68  var $datas = array(array());
70  var $encoding;
71  protected $registry;
72 
74  {
75  $this->registry = $registry;
76  }
77 
78  public function parse(&$data, $encoding)
79  {
80  // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
81  if (strtoupper($encoding) === 'US-ASCII')
82  {
83  $this->encoding = 'UTF-8';
84  }
85  else
86  {
87  $this->encoding = $encoding;
88  }
89 
90  // Strip BOM:
91  // UTF-32 Big Endian BOM
92  if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
93  {
94  $data = substr($data, 4);
95  }
96  // UTF-32 Little Endian BOM
97  elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
98  {
99  $data = substr($data, 4);
100  }
101  // UTF-16 Big Endian BOM
102  elseif (substr($data, 0, 2) === "\xFE\xFF")
103  {
104  $data = substr($data, 2);
105  }
106  // UTF-16 Little Endian BOM
107  elseif (substr($data, 0, 2) === "\xFF\xFE")
108  {
109  $data = substr($data, 2);
110  }
111  // UTF-8 BOM
112  elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
113  {
114  $data = substr($data, 3);
115  }
116 
117  if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
118  {
119  $declaration = $this->registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
120  if ($declaration->parse())
121  {
122  $data = substr($data, $pos + 2);
123  $data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
124  }
125  else
126  {
127  $this->error_string = 'SimplePie bug! Please report this!';
128  return false;
129  }
130  }
131 
132  $return = true;
133 
134  static $xml_is_sane = null;
135  if ($xml_is_sane === null)
136  {
137  $parser_check = xml_parser_create();
138  xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
139  xml_parser_free($parser_check);
140  $xml_is_sane = isset($values[0]['value']);
141  }
142 
143  // Create the parser
144  if ($xml_is_sane)
145  {
146  $xml = xml_parser_create_ns($this->encoding, $this->separator);
147  xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
148  xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
149  xml_set_object($xml, $this);
150  xml_set_character_data_handler($xml, 'cdata');
151  xml_set_element_handler($xml, 'tag_open', 'tag_close');
152 
153  // Parse!
154  if (!xml_parse($xml, $data, true))
155  {
156  $this->error_code = xml_get_error_code($xml);
157  $this->error_string = xml_error_string($this->error_code);
158  $return = false;
159  }
160  $this->current_line = xml_get_current_line_number($xml);
161  $this->current_column = xml_get_current_column_number($xml);
162  $this->current_byte = xml_get_current_byte_index($xml);
163  xml_parser_free($xml);
164  return $return;
165  }
166  else
167  {
168  libxml_clear_errors();
169  $xml = new XMLReader();
170  $xml->xml($data);
171  while (@$xml->read())
172  {
173  switch ($xml->nodeType)
174  {
175 
176  case constant('XMLReader::END_ELEMENT'):
177  if ($xml->namespaceURI !== '')
178  {
179  $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
180  }
181  else
182  {
183  $tagName = $xml->localName;
184  }
185  $this->tag_close(null, $tagName);
186  break;
187  case constant('XMLReader::ELEMENT'):
188  $empty = $xml->isEmptyElement;
189  if ($xml->namespaceURI !== '')
190  {
191  $tagName = $xml->namespaceURI . $this->separator . $xml->localName;
192  }
193  else
194  {
195  $tagName = $xml->localName;
196  }
197  $attributes = array();
198  while ($xml->moveToNextAttribute())
199  {
200  if ($xml->namespaceURI !== '')
201  {
202  $attrName = $xml->namespaceURI . $this->separator . $xml->localName;
203  }
204  else
205  {
206  $attrName = $xml->localName;
207  }
208  $attributes[$attrName] = $xml->value;
209  }
210  $this->tag_open(null, $tagName, $attributes);
211  if ($empty)
212  {
213  $this->tag_close(null, $tagName);
214  }
215  break;
216  case constant('XMLReader::TEXT'):
217 
218  case constant('XMLReader::CDATA'):
219  $this->cdata(null, $xml->value);
220  break;
221  }
222  }
223  if ($error = libxml_get_last_error())
224  {
225  $this->error_code = $error->code;
226  $this->error_string = $error->message;
227  $this->current_line = $error->line;
228  $this->current_column = $error->column;
229  return false;
230  }
231  else
232  {
233  return true;
234  }
235  }
236  }
237 
238  public function get_error_code()
239  {
240  return $this->error_code;
241  }
242 
243  public function get_error_string()
244  {
245  return $this->error_string;
246  }
247 
248  public function get_current_line()
249  {
250  return $this->current_line;
251  }
252 
253  public function get_current_column()
254  {
255  return $this->current_column;
256  }
257 
258  public function get_current_byte()
259  {
260  return $this->current_byte;
261  }
262 
263  public function get_data()
264  {
265  return $this->data;
266  }
267 
268  public function tag_open($parser, $tag, $attributes)
269  {
270  list($this->namespace[], $this->element[]) = $this->split_ns($tag);
271 
272  $attribs = array();
273  foreach ($attributes as $name => $value)
274  {
275  list($attrib_namespace, $attribute) = $this->split_ns($name);
276  $attribs[$attrib_namespace][$attribute] = $value;
277  }
278 
279  if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
280  {
281  $base = $this->registry->call('Misc', 'absolutize_url', array($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base)));
282  if ($base !== false)
283  {
284  $this->xml_base[] = $base;
285  $this->xml_base_explicit[] = true;
286  }
287  }
288  else
289  {
290  $this->xml_base[] = end($this->xml_base);
291  $this->xml_base_explicit[] = end($this->xml_base_explicit);
292  }
293 
294  if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
295  {
296  $this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
297  }
298  else
299  {
300  $this->xml_lang[] = end($this->xml_lang);
301  }
302 
303  if ($this->current_xhtml_construct >= 0)
304  {
305  $this->current_xhtml_construct++;
306  if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
307  {
308  $this->data['data'] .= '<' . end($this->element);
309  if (isset($attribs['']))
310  {
311  foreach ($attribs[''] as $name => $value)
312  {
313  $this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
314  }
315  }
316  $this->data['data'] .= '>';
317  }
318  }
319  else
320  {
321  $this->datas[] =& $this->data;
322  $this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
323  $this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
324  if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
325  || (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml')
326  || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_20 && in_array(end($this->element), array('title')))
327  || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_090 && in_array(end($this->element), array('title')))
328  || (end($this->namespace) === SIMPLEPIE_NAMESPACE_RSS_10 && in_array(end($this->element), array('title'))))
329  {
330  $this->current_xhtml_construct = 0;
331  }
332  }
333  }
334 
335  public function cdata($parser, $cdata)
336  {
337  if ($this->current_xhtml_construct >= 0)
338  {
339  $this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
340  }
341  else
342  {
343  $this->data['data'] .= $cdata;
344  }
345  }
346 
347  public function tag_close($parser, $tag)
348  {
349  if ($this->current_xhtml_construct >= 0)
350  {
351  $this->current_xhtml_construct--;
352  if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
353  {
354  $this->data['data'] .= '</' . end($this->element) . '>';
355  }
356  }
357  if ($this->current_xhtml_construct === -1)
358  {
359  $this->data =& $this->datas[count($this->datas) - 1];
360  array_pop($this->datas);
361  }
362 
363  array_pop($this->element);
364  array_pop($this->namespace);
365  array_pop($this->xml_base);
366  array_pop($this->xml_base_explicit);
367  array_pop($this->xml_lang);
368  }
369 
370  public function split_ns($string)
371  {
372  static $cache = array();
373  if (!isset($cache[$string]))
374  {
375  if ($pos = strpos($string, $this->separator))
376  {
377  static $separator_length;
378  if (!$separator_length)
379  {
380  $separator_length = strlen($this->separator);
381  }
382  $namespace = substr($string, 0, $pos);
383  $local_name = substr($string, $pos + $separator_length);
384  if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
385  {
387  }
388 
389  // Normalize the Media RSS namespaces
395  {
397  }
398  $cache[$string] = array($namespace, $local_name);
399  }
400  else
401  {
402  $cache[$string] = array('', $string);
403  }
404  }
405  return $cache[$string];
406  }
407 }




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.