HTMLPurifier/AttrDef/URI/Host.php Quellcode

Host.php
gehe zur Dokumentation dieser Datei
1 <?php
2 
7 {
8 
13  protected $ipv4;
14 
19  protected $ipv6;
20 
21  public function __construct()
22  {
23  $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
24  $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
25  }
26 
33  public function validate($string, $config, $context)
34  {
35  $length = strlen($string);
36  // empty hostname is OK; it's usually semantically equivalent:
37  // the default host as defined by a URI scheme is used:
38  //
39  // If the URI scheme defines a default for host, then that
40  // default applies when the host subcomponent is undefined
41  // or when the registered name is empty (zero length).
42  if ($string === '') {
43  return '';
44  }
45  if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') {
46  //IPv6
47  $ip = substr($string, 1, $length - 2);
48  $valid = $this->ipv6->validate($ip, $config, $context);
49  if ($valid === false) {
50  return false;
51  }
52  return '[' . $valid . ']';
53  }
54 
55  // need to do checks on unusual encodings too
56  $ipv4 = $this->ipv4->validate($string, $config, $context);
57  if ($ipv4 !== false) {
58  return $ipv4;
59  }
60 
61  // A regular domain name.
62 
63  // This doesn't match I18N domain names, but we don't have proper IRI support,
64  // so force users to insert Punycode.
65 
66  // There is not a good sense in which underscores should be
67  // allowed, since it's technically not! (And if you go as
68  // far to allow everything as specified by the DNS spec...
69  // well, that's literally everything, modulo some space limits
70  // for the components and the overall name (which, by the way,
71  // we are NOT checking!). So we (arbitrarily) decide this:
72  // let's allow underscores wherever we would have allowed
73  // hyphens, if they are enabled. This is a pretty good match
74  // for browser behavior, for example, a large number of browsers
75  // cannot handle foo_.example.com, but foo_bar.example.com is
76  // fairly well supported.
77  $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
78 
79  // The productions describing this are:
80  $a = '[a-z]'; // alpha
81  $an = '[a-z0-9]'; // alphanum
82  $and = "[a-z0-9-$underscore]"; // alphanum | "-"
83  // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
84  $domainlabel = "$an($and*$an)?";
85  // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
86  $toplabel = "$a($and*$an)?";
87  // hostname = *( domainlabel "." ) toplabel [ "." ]
88  if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
89  return $string;
90  }
91 
92  // If we have Net_IDNA2 support, we can support IRIs by
93  // punycoding them. (This is the most portable thing to do,
94  // since otherwise we have to assume browsers support
95 
96  if ($config->get('Core.EnableIDNA')) {
97  $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
98  // we need to encode each period separately
99  $parts = explode('.', $string);
100  try {
101  $new_parts = array();
102  foreach ($parts as $part) {
103  $encodable = false;
104  for ($i = 0, $c = strlen($part); $i < $c; $i++) {
105  if (ord($part[$i]) > 0x7a) {
106  $encodable = true;
107  break;
108  }
109  }
110  if (!$encodable) {
111  $new_parts[] = $part;
112  } else {
113  $new_parts[] = $idna->encode($part);
114  }
115  }
116  $string = implode('.', $new_parts);
117  if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
118  return $string;
119  }
120  } catch (Exception $e) {
121  // XXX error reporting
122  }
123  }
124  return false;
125  }
126 }
127 
128 // vim: et sw=4 sts=4




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.