Öffentliche Methoden | |
Auth_Yadis_ParseHTML () | |
replaceEntities ($str) | |
removeQuotes ($str) | |
tagPattern ($tag_names, $close, $self_close) | |
getMetaTags ($html_string) | |
getHTTPEquiv ($html_string) | |
Datenfelder | |
$_re_flags = "si" | |
$_removed_re | |
$_tag_expr = "<%s%s(?:\s.*?)?%s>" | |
$_attr_find = '\b([-\w]+)=(".*?"|\'.*?\'|.+?)[\/\s>]' |
Definiert in Zeile 23 der Datei ParseHTML.php.
Definiert in Zeile 46 der Datei ParseHTML.php.
00047 { 00048 $this->_attr_find = sprintf("/%s/%s", 00049 $this->_attr_find, 00050 $this->_re_flags); 00051 00052 $this->_removed_re = sprintf("/%s/%s", 00053 $this->_removed_re, 00054 $this->_re_flags); 00055 00056 $this->_entity_replacements = array( 00057 'amp' => '&', 00058 'lt' => '<', 00059 'gt' => '>', 00060 'quot' => '"' 00061 ); 00062 00063 $this->_ent_replace = 00064 sprintf("&(%s);", implode("|", 00065 $this->_entity_replacements)); 00066 }
getHTTPEquiv | ( | $ | html_string | ) |
Looks for a META tag with an "http-equiv" attribute whose value is one of ("x-xrds-location", "x-yadis-location"), ignoring case. If such a META tag is found, its "content" attribute value is returned.
string | $html_string An HTML document in string format |
Definiert in Zeile 240 der Datei ParseHTML.php.
00241 { 00242 $meta_tags = $this->getMetaTags($html_string); 00243 00244 if ($meta_tags) { 00245 foreach ($meta_tags as $tag) { 00246 if (array_key_exists('http-equiv', $tag) && 00247 (in_array(strtolower($tag['http-equiv']), 00248 array('x-xrds-location', 'x-yadis-location'))) && 00249 array_key_exists('content', $tag)) { 00250 return $tag['content']; 00251 } 00252 } 00253 } 00254 00255 return null; 00256 }
getMetaTags | ( | $ | html_string | ) |
Given an HTML document string, this finds all the META tags in the document, provided they are found in the <HTML><HEAD>...</HEAD> section of the document. The <HTML> tag may be missing.
private
string | $html_string An HTMl document string |
Definiert in Zeile 157 der Datei ParseHTML.php.
00158 { 00159 $html_string = preg_replace($this->_removed_re, 00160 "", 00161 $html_string); 00162 00163 $key_tags = array($this->tagPattern('html', false, false), 00164 $this->tagPattern('head', false, false), 00165 $this->tagPattern('head', true, false), 00166 $this->tagPattern('html', true, false), 00167 $this->tagPattern(array( 00168 'body', 'frameset', 'frame', 'p', 'div', 00169 'table','span','a'), 'maybe', 'maybe')); 00170 $key_tags_pos = array(); 00171 foreach ($key_tags as $pat) { 00172 $matches = array(); 00173 preg_match($pat, $html_string, $matches, PREG_OFFSET_CAPTURE); 00174 if($matches) { 00175 $key_tags_pos[] = $matches[0][1]; 00176 } else { 00177 $key_tags_pos[] = null; 00178 } 00179 } 00180 // no opening head tag 00181 if (is_null($key_tags_pos[1])) { 00182 return array(); 00183 } 00184 // the effective </head> is the min of the following 00185 if (is_null($key_tags_pos[2])) { 00186 $key_tags_pos[2] = strlen($html_string); 00187 } 00188 foreach (array($key_tags_pos[3], $key_tags_pos[4]) as $pos) { 00189 if (!is_null($pos) && $pos < $key_tags_pos[2]) { 00190 $key_tags_pos[2] = $pos; 00191 } 00192 } 00193 // closing head tag comes before opening head tag 00194 if ($key_tags_pos[1] > $key_tags_pos[2]) { 00195 return array(); 00196 } 00197 // if there is an opening html tag, make sure the opening head tag 00198 // comes after it 00199 if (!is_null($key_tags_pos[0]) && $key_tags_pos[1] < $key_tags_pos[0]) { 00200 return array(); 00201 } 00202 $html_string = substr($html_string, $key_tags_pos[1], 00203 ($key_tags_pos[2]-$key_tags_pos[1])); 00204 00205 $link_data = array(); 00206 $link_matches = array(); 00207 00208 if (!preg_match_all($this->tagPattern('meta', false, 'maybe'), 00209 $html_string, $link_matches)) { 00210 return array(); 00211 } 00212 00213 foreach ($link_matches[0] as $link) { 00214 $attr_matches = array(); 00215 preg_match_all($this->_attr_find, $link, $attr_matches); 00216 $link_attrs = array(); 00217 foreach ($attr_matches[0] as $index => $full_match) { 00218 $name = $attr_matches[1][$index]; 00219 $value = $this->replaceEntities( 00220 $this->removeQuotes($attr_matches[2][$index])); 00221 00222 $link_attrs[strtolower($name)] = $value; 00223 } 00224 $link_data[] = $link_attrs; 00225 } 00226 00227 return $link_data; 00228 }
removeQuotes | ( | $ | str | ) |
Strip single and double quotes off of a string, if they are present.
private
string | $str The original string |
Definiert in Zeile 100 der Datei ParseHTML.php.
00101 { 00102 $matches = array(); 00103 $double = '/^"(.*)"$/'; 00104 $single = "/^\'(.*)\'$/"; 00105 00106 if (preg_match($double, $str, $matches)) { 00107 return $matches[1]; 00108 } else if (preg_match($single, $str, $matches)) { 00109 return $matches[1]; 00110 } else { 00111 return $str; 00112 } 00113 }
replaceEntities | ( | $ | str | ) |
Replace HTML entities (amp, lt, gt, and quot) as well as numeric entities (e.g. x9f;) with their actual values and return the new string.
private
string | $str The string in which to look for entities |
Definiert in Zeile 77 der Datei ParseHTML.php.
00078 { 00079 foreach ($this->_entity_replacements as $old => $new) { 00080 $str = preg_replace(sprintf("/&%s;/", $old), $new, $str); 00081 } 00082 00083 // Replace numeric entities because html_entity_decode doesn't 00084 // do it for us. 00085 $str = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $str); 00086 $str = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $str); 00087 00088 return $str; 00089 }
tagPattern | ( | $ | tag_names, | |
$ | close, | |||
$ | self_close | |||
) |
Create a regular expression that will match an opening or closing tag from a set of names.
private
mixed | $tag_names Tag names to match | |
mixed | $close false/0 = no, true/1 = yes, other = maybe | |
mixed | $self_close false/0 = no, true/1 = yes, other = maybe |
Definiert in Zeile 126 der Datei ParseHTML.php.
00127 { 00128 if (is_array($tag_names)) { 00129 $tag_names = '(?:'.implode('|',$tag_names).')'; 00130 } 00131 if ($close) { 00132 $close = '\/' . (($close == 1)? '' : '?'); 00133 } else { 00134 $close = ''; 00135 } 00136 if ($self_close) { 00137 $self_close = '(?:\/\s*)' . (($self_close == 1)? '' : '?'); 00138 } else { 00139 $self_close = ''; 00140 } 00141 $expr = sprintf($this->_tag_expr, $close, $tag_names, $self_close); 00142 00143 return sprintf("/%s/%s", $expr, $this->_re_flags); 00144 }
$_attr_find = '\b([-\w]+)=(".*?"|\'.*?\'|.+?)[\/\s>]' |
private
Definiert in Zeile 44 der Datei ParseHTML.php.
$_re_flags = "si" |
private
Definiert in Zeile 28 der Datei ParseHTML.php.
$_removed_re |
Initialisierung:
"<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>"
Definiert in Zeile 33 der Datei ParseHTML.php.
$_tag_expr = "<%s%s(?:\s.*?)?%s>" |
private
Definiert in Zeile 39 der Datei ParseHTML.php.
Copyright © 2003 - 2009 MyOOS [Shopsystem]. All rights reserved. MyOOS [Shopsystem] is Free Software released under the GNU/GPL License. Webmaster: info@r23.de (Impressum) |
|