HTMLPurifier/AttrDef/CSS/FontFamily.php Quellcode

FontFamily.php
gehe zur Dokumentation dieser Datei
1 <?php
2 
7 {
8 
9  protected $mask = null;
10 
11  public function __construct()
12  {
13  $this->mask = '_- ';
14  for ($c = 'a'; $c <= 'z'; $c++) {
15  $this->mask .= $c;
16  }
17  for ($c = 'A'; $c <= 'Z'; $c++) {
18  $this->mask .= $c;
19  }
20  for ($c = '0'; $c <= '9'; $c++) {
21  $this->mask .= $c;
22  } // cast-y, but should be fine
23  // special bytes used by UTF-8
24  for ($i = 0x80; $i <= 0xFF; $i++) {
25  // We don't bother excluding invalid bytes in this range,
26  // because the our restriction of well-formed UTF-8 will
27  // prevent these from ever occurring.
28  $this->mask .= chr($i);
29  }
30 
31  /*
32  PHP's internal strcspn implementation is
33  O(length of string * length of mask), making it inefficient
34  for large masks. However, it's still faster than
35  preg_match 8)
36  for (p = s1;;) {
37  spanp = s2;
38  do {
39  if (*spanp == c || p == s1_end) {
40  return p - s1;
41  }
42  } while (spanp++ < (s2_end - 1));
43  c = *++p;
44  }
45  */
46  // possible optimization: invert the mask.
47  }
48 
55  public function validate($string, $config, $context)
56  {
57  static $generic_names = array(
58  'serif' => true,
59  'sans-serif' => true,
60  'monospace' => true,
61  'fantasy' => true,
62  'cursive' => true
63  );
64  $allowed_fonts = $config->get('CSS.AllowedFonts');
65 
66  // assume that no font names contain commas in them
67  $fonts = explode(',', $string);
68  $final = '';
69  foreach ($fonts as $font) {
70  $font = trim($font);
71  if ($font === '') {
72  continue;
73  }
74  // match a generic name
75  if (isset($generic_names[$font])) {
76  if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
77  $final .= $font . ', ';
78  }
79  continue;
80  }
81  // match a quoted name
82  if ($font[0] === '"' || $font[0] === "'") {
83  $length = strlen($font);
84  if ($length <= 2) {
85  continue;
86  }
87  $quote = $font[0];
88  if ($font[$length - 1] !== $quote) {
89  continue;
90  }
91  $font = substr($font, 1, $length - 2);
92  }
93 
94  $font = $this->expandCSSEscape($font);
95 
96  // $font is a pure representation of the font name
97 
98  if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
99  continue;
100  }
101 
102  if (ctype_alnum($font) && $font !== '') {
103  // very simple font, allow it in unharmed
104  $final .= $font . ', ';
105  continue;
106  }
107 
108  // bugger out on whitespace. form feed (0C) really
109  // shouldn't show up regardless
110  $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
111 
112  // Here, there are various classes of characters which need
113  // to be treated differently:
114  // - Alphanumeric characters are essentially safe. We
115  // handled these above.
116  // - Spaces require quoting, though most parsers will do
117  // the right thing if there aren't any characters that
118  // can be misinterpreted
119  // - Dashes rarely occur, but they fairly unproblematic
120  // for parsing/rendering purposes.
121  // The above characters cover the majority of Western font
122  // names.
123  // - Arbitrary Unicode characters not in ASCII. Because
124  // most parsers give little thought to Unicode, treatment
125  // of these codepoints is basically uniform, even for
126  // punctuation-like codepoints. These characters can
127  // show up in non-Western pages and are supported by most
128  // major browsers, for example: "MS 明朝" is a
129  // legitimate font-name
130  // <http://ja.wikipedia.org/wiki/MS_明朝>. See
131  // the CSS3 spec for more examples:
132  // <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
133  // You can see live samples of these on the Internet:
134  // <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
135  // However, most of these fonts have ASCII equivalents:
136  // for example, 'MS Mincho', and it's considered
137  // professional to use ASCII font names instead of
138  // Unicode font names. Thanks Takeshi Terada for
139  // providing this information.
140  // The following characters, to my knowledge, have not been
141  // used to name font names.
142  // - Single quote. While theoretically you might find a
143  // font name that has a single quote in its name (serving
144  // as an apostrophe, e.g. Dave's Scribble), I haven't
145  // been able to find any actual examples of this.
146  // Internet Explorer's cssText translation (which I
147  // believe is invoked by innerHTML) normalizes any
148  // quoting to single quotes, and fails to escape single
149  // quotes. (Note that this is not IE's behavior for all
150  // CSS properties, just some sort of special casing for
151  // font-family). So a single quote *cannot* be used
152  // safely in the font-family context if there will be an
153  // innerHTML/cssText translation. Note that Firefox 3.x
154  // does this too.
155  // - Double quote. In IE, these get normalized to
156  // single-quotes, no matter what the encoding. (Fun
157  // fact, in IE8, the 'content' CSS property gained
158  // support, where they special cased to preserve encoded
159  // double quotes, but still translate unadorned double
160  // quotes into single quotes.) So, because their
161  // fixpoint behavior is identical to single quotes, they
162  // cannot be allowed either. Firefox 3.x displays
163  // single-quote style behavior.
164  // - Backslashes are reduced by one (so \\ -> \) every
165  // iteration, so they cannot be used safely. This shows
166  // up in IE7, IE8 and FF3
167  // - Semicolons, commas and backticks are handled properly.
168  // - The rest of the ASCII punctuation is handled properly.
169  // We haven't checked what browsers do to unadorned
170  // versions, but this is not important as long as the
171  // browser doesn't /remove/ surrounding quotes (as IE does
172  // for HTML).
173  //
174  // With these results in hand, we conclude that there are
175  // various levels of safety:
176  // - Paranoid: alphanumeric, spaces and dashes(?)
177  // - International: Paranoid + non-ASCII Unicode
178  // - Edgy: Everything except quotes, backslashes
179  // - NoJS: Standards compliance, e.g. sod IE. Note that
180  // with some judicious character escaping (since certain
181  // types of escaping doesn't work) this is theoretically
182  // OK as long as innerHTML/cssText is not called.
183  // We believe that international is a reasonable default
184  // (that we will implement now), and once we do more
185  // extensive research, we may feel comfortable with dropping
186  // it down to edgy.
187 
188  // Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
189  // str(c)spn assumes that the string was already well formed
190  // Unicode (which of course it is).
191  if (strspn($font, $this->mask) !== strlen($font)) {
192  continue;
193  }
194 
195  // Historical:
196  // In the absence of innerHTML/cssText, these ugly
197  // transforms don't pose a security risk (as \\ and \"
198  // might--these escapes are not supported by most browsers).
199  // We could try to be clever and use single-quote wrapping
200  // when there is a double quote present, but I have choosen
201  // not to implement that. (NOTE: you can reduce the amount
202  // of escapes by one depending on what quoting style you use)
203  // $font = str_replace('\\', '\\5C ', $font);
204  // $font = str_replace('"', '\\22 ', $font);
205  // $font = str_replace("'", '\\27 ', $font);
206 
207  // font possibly with spaces, requires quoting
208  $final .= "'$font', ";
209  }
210  $final = rtrim($final, ', ');
211  if ($final === '') {
212  return false;
213  }
214  return $final;
215  }
216 
217 }
218 
219 // vim: et sw=4 sts=4




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.