HTMLPurifier/Injector/AutoParagraph.php Quellcode

AutoParagraph.php
gehe zur Dokumentation dieser Datei
1 <?php
2 
10 {
14  public $name = 'AutoParagraph';
15 
19  public $needed = array('p');
20 
24  private function _pStart()
25  {
26  $par = new HTMLPurifier_Token_Start('p');
27  $par->armor['MakeWellFormed_TagClosedError'] = true;
28  return $par;
29  }
30 
34  public function handleText(&$token)
35  {
36  $text = $token->data;
37  // Does the current parent allow <p> tags?
38  if ($this->allowsElement('p')) {
39  if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) {
40  // Note that we have differing behavior when dealing with text
41  // in the anonymous root node, or a node inside the document.
42  // If the text as a double-newline, the treatment is the same;
43  // if it doesn't, see the next if-block if you're in the document.
44 
45  $i = $nesting = null;
46  if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) {
47  // State 1.1: ... ^ (whitespace, then document end)
48  // ----
49  // This is a degenerate case
50  } else {
51  if (!$token->is_whitespace || $this->_isInline($current)) {
52  // State 1.2: PAR1
53  // ----
54 
55  // State 1.3: PAR1\n\nPAR2
56  // ------------
57 
58  // State 1.4: <div>PAR1\n\nPAR2 (see State 2)
59  // ------------
60  $token = array($this->_pStart());
61  $this->_splitText($text, $token);
62  } else {
63  // State 1.5: \n<hr />
64  // --
65  }
66  }
67  } else {
68  // State 2: <div>PAR1... (similar to 1.4)
69  // ----
70 
71  // We're in an element that allows paragraph tags, but we're not
72  // sure if we're going to need them.
73  if ($this->_pLookAhead()) {
74  // State 2.1: <div>PAR1<b>PAR1\n\nPAR2
75  // ----
76  // Note: This will always be the first child, since any
77  // previous inline element would have triggered this very
78  // same routine, and found the double newline. One possible
79  // exception would be a comment.
80  $token = array($this->_pStart(), $token);
81  } else {
82  // State 2.2.1: <div>PAR1<div>
83  // ----
84 
85  // State 2.2.2: <div>PAR1<b>PAR1</b></div>
86  // ----
87  }
88  }
89  // Is the current parent a <p> tag?
90  } elseif (!empty($this->currentNesting) &&
91  $this->currentNesting[count($this->currentNesting) - 1]->name == 'p') {
92  // State 3.1: ...<p>PAR1
93  // ----
94 
95  // State 3.2: ...<p>PAR1\n\nPAR2
96  // ------------
97  $token = array();
98  $this->_splitText($text, $token);
99  // Abort!
100  } else {
101  // State 4.1: ...<b>PAR1
102  // ----
103 
104  // State 4.2: ...<b>PAR1\n\nPAR2
105  // ------------
106  }
107  }
108 
112  public function handleElement(&$token)
113  {
114  // We don't have to check if we're already in a <p> tag for block
115  // tokens, because the tag would have been autoclosed by MakeWellFormed.
116  if ($this->allowsElement('p')) {
117  if (!empty($this->currentNesting)) {
118  if ($this->_isInline($token)) {
119  // State 1: <div>...<b>
120  // ---
121  // Check if this token is adjacent to the parent token
122  // (seek backwards until token isn't whitespace)
123  $i = null;
124  $this->backward($i, $prev);
125 
126  if (!$prev instanceof HTMLPurifier_Token_Start) {
127  // Token wasn't adjacent
128  if ($prev instanceof HTMLPurifier_Token_Text &&
129  substr($prev->data, -2) === "\n\n"
130  ) {
131  // State 1.1.4: <div><p>PAR1</p>\n\n<b>
132  // ---
133  // Quite frankly, this should be handled by splitText
134  $token = array($this->_pStart(), $token);
135  } else {
136  // State 1.1.1: <div><p>PAR1</p><b>
137  // ---
138  // State 1.1.2: <div><br /><b>
139  // ---
140  // State 1.1.3: <div>PAR<b>
141  // ---
142  }
143  } else {
144  // State 1.2.1: <div><b>
145  // ---
146  // Lookahead to see if <p> is needed.
147  if ($this->_pLookAhead()) {
148  // State 1.3.1: <div><b>PAR1\n\nPAR2
149  // ---
150  $token = array($this->_pStart(), $token);
151  } else {
152  // State 1.3.2: <div><b>PAR1</b></div>
153  // ---
154 
155  // State 1.3.3: <div><b>PAR1</b><div></div>\n\n</div>
156  // ---
157  }
158  }
159  } else {
160  // State 2.3: ...<div>
161  // -----
162  }
163  } else {
164  if ($this->_isInline($token)) {
165  // State 3.1: <b>
166  // ---
167  // This is where the {p} tag is inserted, not reflected in
168  // inputTokens yet, however.
169  $token = array($this->_pStart(), $token);
170  } else {
171  // State 3.2: <div>
172  // -----
173  }
174 
175  $i = null;
176  if ($this->backward($i, $prev)) {
177  if (!$prev instanceof HTMLPurifier_Token_Text) {
178  // State 3.1.1: ...</p>{p}<b>
179  // ---
180  // State 3.2.1: ...</p><div>
181  // -----
182  if (!is_array($token)) {
183  $token = array($token);
184  }
185  array_unshift($token, new HTMLPurifier_Token_Text("\n\n"));
186  } else {
187  // State 3.1.2: ...</p>\n\n{p}<b>
188  // ---
189  // State 3.2.2: ...</p>\n\n<div>
190  // -----
191  // Note: PAR<ELEM> cannot occur because PAR would have been
192  // wrapped in <p> tags.
193  }
194  }
195  }
196  } else {
197  // State 2.2: <ul><li>
198  // ----
199  // State 2.4: <p><b>
200  // ---
201  }
202  }
203 
212  private function _splitText($data, &$result)
213  {
214  $raw_paragraphs = explode("\n\n", $data);
215  $paragraphs = array(); // without empty paragraphs
216  $needs_start = false;
217  $needs_end = false;
218 
219  $c = count($raw_paragraphs);
220  if ($c == 1) {
221  // There were no double-newlines, abort quickly. In theory this
222  // should never happen.
223  $result[] = new HTMLPurifier_Token_Text($data);
224  return;
225  }
226  for ($i = 0; $i < $c; $i++) {
227  $par = $raw_paragraphs[$i];
228  if (trim($par) !== '') {
229  $paragraphs[] = $par;
230  } else {
231  if ($i == 0) {
232  // Double newline at the front
233  if (empty($result)) {
234  // The empty result indicates that the AutoParagraph
235  // injector did not add any start paragraph tokens.
236  // This means that we have been in a paragraph for
237  // a while, and the newline means we should start a new one.
238  $result[] = new HTMLPurifier_Token_End('p');
239  $result[] = new HTMLPurifier_Token_Text("\n\n");
240  // However, the start token should only be added if
241  // there is more processing to be done (i.e. there are
242  // real paragraphs in here). If there are none, the
243  // next start paragraph tag will be handled by the
244  // next call to the injector
245  $needs_start = true;
246  } else {
247  // We just started a new paragraph!
248  // Reinstate a double-newline for presentation's sake, since
249  // it was in the source code.
250  array_unshift($result, new HTMLPurifier_Token_Text("\n\n"));
251  }
252  } elseif ($i + 1 == $c) {
253  // Double newline at the end
254  // There should be a trailing </p> when we're finally done.
255  $needs_end = true;
256  }
257  }
258  }
259 
260  // Check if this was just a giant blob of whitespace. Move this earlier,
261  // perhaps?
262  if (empty($paragraphs)) {
263  return;
264  }
265 
266  // Add the start tag indicated by \n\n at the beginning of $data
267  if ($needs_start) {
268  $result[] = $this->_pStart();
269  }
270 
271  // Append the paragraphs onto the result
272  foreach ($paragraphs as $par) {
273  $result[] = new HTMLPurifier_Token_Text($par);
274  $result[] = new HTMLPurifier_Token_End('p');
275  $result[] = new HTMLPurifier_Token_Text("\n\n");
276  $result[] = $this->_pStart();
277  }
278 
279  // Remove trailing start token; Injector will handle this later if
280  // it was indeed needed. This prevents from needing to do a lookahead,
281  // at the cost of a lookbehind later.
282  array_pop($result);
283 
284  // If there is no need for an end tag, remove all of it and let
285  // MakeWellFormed close it later.
286  if (!$needs_end) {
287  array_pop($result); // removes \n\n
288  array_pop($result); // removes </p>
289  }
290  }
291 
298  private function _isInline($token)
299  {
300  return isset($this->htmlDefinition->info['p']->child->elements[$token->name]);
301  }
302 
308  private function _pLookAhead()
309  {
310  if ($this->currentToken instanceof HTMLPurifier_Token_Start) {
311  $nesting = 1;
312  } else {
313  $nesting = 0;
314  }
315  $ok = false;
316  $i = null;
317  while ($this->forwardUntilEndToken($i, $current, $nesting)) {
318  $result = $this->_checkNeedsP($current);
319  if ($result !== null) {
320  $ok = $result;
321  break;
322  }
323  }
324  return $ok;
325  }
326 
333  private function _checkNeedsP($current)
334  {
335  if ($current instanceof HTMLPurifier_Token_Start) {
336  if (!$this->_isInline($current)) {
337  // <div>PAR1<div>
338  // ----
339  // Terminate early, since we hit a block element
340  return false;
341  }
342  } elseif ($current instanceof HTMLPurifier_Token_Text) {
343  if (strpos($current->data, "\n\n") !== false) {
344  // <div>PAR1<b>PAR1\n\nPAR2
345  // ----
346  return true;
347  } else {
348  // <div>PAR1<b>PAR1...
349  // ----
350  }
351  }
352  return null;
353  }
354 }
355 
356 // vim: et sw=4 sts=4




Korrekturen, Hinweise und Ergänzungen

Bitte scheuen Sie sich nicht und melden Sie, was auf dieser Seite sachlich falsch oder irreführend ist, was ergänzt werden sollte, was fehlt usw. Dazu bitte oben aus dem Menü Seite den Eintrag Support Forum wählen. Es ist eine kostenlose Anmeldung erforderlich, um Anmerkungen zu posten. Unpassende Postings, Spam usw. werden kommentarlos entfernt.