01.
02.
03.
04.
05.
06.
07.
08.
09.
10.
11.
12.
13.
14.
15.
16.
17.
18.
19.
20.
21.
22.
23.
24.
25.
26.
27.
28.
29.
30.
31.
32.
33.
34.
35.
36.
37.
38.
39.
40.
41.
42.
43.
44.
45.
46.
47.
48.
49.
50.
51.
52.
53.
54.
55.
56.
57.
58.
59.
60.
61.
62.
63.
64.
65.
66.
67.
68.
69.
70.
71.
72.
73.
74.
75.
76.
77.
78.
79.
80.
81.
82.
83.
84.
85.
86.
87.
88.
89.
90.
91.
92.
93.
94.
95.
96.
97.
98.
99.
100.
101.
102.
103.
104.
105.
106.
107.
108.
109.
110.
111.
112.
113.
114.
115.
116.
117.
118.
119.
120.
121.
122.
123.
124.
125.
126.
127.
128.
129.
130.
131.
132.
133.
134.
135.
136.
137.
138.
139.
140.
141.
142.
143.
144.
145.
146.
147.
148.
149.
150.
151.
152.
153.
154.
155.
156.
157.
158.
159.
160.
161.
162.
163.
164.
165.
166.
167.
168.
169.
170.
171.
172.
173.
174.
175.
176.
177.
178.
179.
180.
181.
182.
183.
184.
185.
186.
187.
188.
189.
190.
191.
192.
193.
194.
195.
196.
197.
198.
199.
200.
201.
202.
203.
204.
205.
206.
207.
208.
209.
210.
211.
212.
213.
214.
215.
216.
217.
218.
219.
220.
221.
222.
223.
224.
225.
226.
227.
228.
229.
230.
231.
232.
233.
234.
235.
236.
237.
238.
239.
240.
241.
242.
243.
244.
245.
246.
247.
248.
249.
250.
251.
252.
253.
254.
255.
256.
257.
258.
259.
260.
261.
262.
263.
264.
265.
266.
267.
268.
269.
270.
271.
272.
273.
274.
275.
276.
277.
278.
279.
280.
281.
282.
283.
284.
285.
286.
287.
288.
289.
290.
291.
292.
293.
294.
295.
296.
297.
298.
299.
300.
301.
302.
303.
304.
305.
306.
307.
308.
309.
310.
311.
312.
313.
314.
315.
316.
317.
318.
319.
320.
321.
322.
323.
324.
325.
326.
327.
328.
329.
330.
331.
332.
333.
334.
335.
336.
337.
338.
339.
340.
341.
342.
343.
344.
345.
346.
347.
348.
349.
350.
351.
352.
353.
354.
355.
356.
357.
358.
359.
360.
361.
362.
363.
364.
365.
366.
367.
368.
369.
370.
371.
372.
373.
374.
375.
376.
377.
378.
379.
380.
381.
382.
383.
384.
385.
386.
387.
388.
389.
390.
391.
392.
393.
394.
395.
396.
397.
398.
399.
400.
401.
402.
403.
404.
405.
406.
407.
408.
409.
410.
411.
412.
413.
414.
415.
416.
417.
418.
419.
420.
421.
422.
423.
424.
425.
426.
427.
428.
429.
430.
431.
432.
433.
434.
435.
436.
437.
438.
439.
440.
441.
442.
443.
444.
445.
446.
447.
448.
449.
450.
451.
452.
453.
454.
455.
456.
457.
458.
459.
460.
461.
462.
463.
464.
465.
466.
467.
468.
469.
470.
471.
472.
473.
474.
475.
476.
477.
478.
479.
480.
481.
482.
483.
484.
485.
486.
487.
488.
489.
|
|
<?php
/******************************************************************************/
/* */
/* __ ____ */
/* ___ / / ___ / __/__ __ _____________ ___ */
/* / _ \/ _ \/ _ \_\ \/ _ \/ // / __/ __/ -_|_-< */
/* / .__/_//_/ .__/___/\___/\_,_/_/ \__/\__/___/ */
/* /_/ /_/ */
/* */
/* */
/******************************************************************************/
/* */
/* Titre : Tronquer une chaîne de caractères HTML */
/* */
/* URL : http://www.phpsources.org/scripts391-PHP.htm */
/* Auteur : forty */
/* Date édition : 21 Mai 2008 */
/* Website auteur : http://www.toplien.fr/ */
/* */
/******************************************************************************/
/*
* Script base sur le parser html disponible ici :
http://php-html.sourceforge.net/
*/
define ("NODE_TYPE_START",0);
define ("NODE_TYPE_ELEMENT",1);
define ("NODE_TYPE_ENDELEMENT",2);
define ("NODE_TYPE_TEXT",3);
define ("NODE_TYPE_COMMENT",4);
define ("NODE_TYPE_DONE",5);
define ("NODE_TYPE_ELEMENT_END",6);
/**
* Class HtmlParser.
* To use, create an instance of the class passing
* HTML text. Then invoke parse() until it's false.
* When parse() returns true, $iNodeType, $iNodeName
* $iNodeValue and $iNodeAttributes are updated.
*
* To create an HtmlParser instance you may also
* use convenience functions HtmlParser_ForFile
* and HtmlParser_ForURL.
*/
class HtmlParser {
/**
* Field iNodeType.
* May be one of the NODE_TYPE_* constants above.
*/
var $iNodeType;
/**
* Field iNodeName.
* For elements, it's the name of the element.
*/
var $iNodeName = "";
/**
* Field iNodeValue.
* For text nodes, it's the text.
*/
var $iNodeValue = "";
/**
* Field iNodeAttributes.
* A string-indexed array containing attribute values
* of the current node. Indexes are always lowercase.
*/
var $iNodeAttributes;
/**
* Field iNodeStart.
* The position of the first char.
*/
var $iNodeStart;
/**
* Field iNodeEnd.
* The position of the last char.
*/
var $iNodeEnd;
// The following fields should be
// considered private:
var $iHtmlText;
var $iHtmlTextLength;
var $iHtmlTextIndex = 0;
var $iHtmlCurrentChar;
var $BOE_ARRAY;
var $B_ARRAY;
var $BOS_ARRAY;
var $no_comment = false;
//Liste des balises autofermantes
var $BalisesSimples = array('hr', 'br', 'input', 'meta', 'link', 'img',
'area', 'param');
/**
* Constructor.
* Constructs an HtmlParser instance with
* the HTML text given.
*/
function HtmlParser ($aHtmlText) {
$this->iHtmlText = $aHtmlText;
$this->iHtmlTextLength = strlen($aHtmlText);
$this->iNodeAttributes = array();
$this->setTextIndex (0);
$this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
$this->B_ARRAY = array (" ", "\t", "\r", "\n" );
$this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
}
/**
* Method parse.
* Parses the next node. Returns false only if
* the end of the HTML text has been reached.
* Updates values of iNode* fields.
*/
function parse() {
$this->iNodeStart = $this->iHtmlTextIndex;
$text = $this->skipToElement();
if ($text != "") {
$this->iNodeType = NODE_TYPE_TEXT;
$this->iNodeName = "Text";
$this->iNodeValue = $text;
$this->iNodeEnd = $this->iHtmlTextIndex;
return true;
}
return $this->readTag();
}
function clearAttributes() {
$this->iNodeAttributes = array();
}
function readTag() {
if ($this->iCurrentChar != "<") {
$this->iNodeType = NODE_TYPE_DONE;
return false;
}
$this->clearAttributes();
$this->skipMaxInTag ("<", 1);
if ($this->iCurrentChar == '/') {
$this->moveNext();
$name = $this->skipToBlanksInTag();
if (strtolower($name) == 'script') {
$this->no_comment = false;
}
$this->iNodeType = NODE_TYPE_ENDELEMENT;
$this->iNodeName = $name;
$this->iNodeValue = "";
$this->skipEndOfTag();
$this->iNodeEnd = $this->iHtmlTextIndex;
return true;
}
$name = $this->skipToBlanksOrSlashInTag();
if (!$this->isValidTagIdentifier ($name)) {
$comment = false;
if ((strpos($name, "!--") === 0) && (!$this->no_comment)) {
$ppos = strpos($name, "--", 3);
if (strpos($name, "--", 3) === (strlen($name) - 2)) {
$this->iNodeType = NODE_TYPE_COMMENT;
$this->iNodeName = "Comment";
$this->iNodeValue = "<" . $name . ">";
$comment = true;
} else {
$rest = $this->skipToStringInTag ("-->");
if ($rest != "") {
$this->iNodeType = NODE_TYPE_COMMENT;
$this->iNodeName = "Comment";
$this->iNodeValue = "<" . $name . $rest;
$comment = true;
// Already skipped end of tag
$this->iNodeEnd = $this->iHtmlTextIndex;
return true;
}
}
}
if (!$comment) {
$this->iNodeType = NODE_TYPE_TEXT;
$this->iNodeName = "Text";
$this->iNodeValue = "<" . $name;
$this->iNodeEnd = $this->iHtmlTextIndex;
return true;
}
} else {
if (strtolower($name) == 'script') {
$this->no_comment = true;
}
$this->iNodeType = NODE_TYPE_ELEMENT;
$this->iNodeValue = "";
$this->iNodeName = $name;
while ($this->skipBlanksInTag()) {
$attrName = $this->skipToBlanksOrEqualsInTag();
if ($attrName != "" && $attrName != "/") {
$this->skipBlanksInTag();
if ($this->iCurrentChar == "=") {
$this->skipEqualsInTag();
$this->skipBlanksInTag();
$value = $this->readValueInTag();
$this->iNodeAttributes[strtolower($attrName)] = $value;
} else {
$this->iNodeAttributes[strtolower($attrName)] = "";
$this->setTextIndex ($this->iHtmlTextIndex - 1);
}
}
}
}
if (($this->iHtmlText{$this->iHtmlTextIndex - 1} == '/') || (in_array(
$this->iNodeName, $this->BalisesSimples))) {
$this->iNodeType = NODE_TYPE_ELEMENT_END;
}
$this->skipEndOfTag();
$this->iNodeEnd = $this->iHtmlTextIndex;
return true;
}
function isValidTagIdentifier ($name) {
return ereg ("^[A-Za-z0-9_\\-]+$", $name);
}
function skipBlanksInTag() {
return "" != ($this->skipInTag ($this->B_ARRAY));
}
function skipToBlanksOrEqualsInTag() {
return $this->skipToInTag ($this->BOE_ARRAY);
}
function skipToBlanksInTag() {
return $this->skipToInTag ($this->B_ARRAY);
}
function skipToBlanksOrSlashInTag() {
return $this->skipToInTag ($this->BOS_ARRAY);
}
function skipEqualsInTag() {
return $this->skipMaxInTag ("=", 1);
}
function readValueInTag() {
$ch = $this->iCurrentChar;
$value = "";
if ($ch == "\"") {
$this->skipMaxInTag ("\"", 1);
$value = $this->skipToInTag ("\"");
$this->skipMaxInTag ("\"", 1);
} elseif ($ch == "'") {
$this->skipMaxInTag ("'", 1);
$value = $this->skipToInTag ("'");
$this->skipMaxInTag ("'", 1);
} else {
$value = $this->skipToBlanksInTag();
}
return $value;
}
function setTextIndex ($index) {
$this->iHtmlTextIndex = $index;
if ($index >= $this->iHtmlTextLength) {
$this->iCurrentChar = -1;
} else {
$this->iCurrentChar = $this->iHtmlText{$index};
}
}
function moveNext() {
if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
$this->setTextIndex ($this->iHtmlTextIndex + 1);
return true;
} else {
return false;
}
}
function skipEndOfTag() {
while (($ch = $this->iCurrentChar) !== -1) {
if ($ch == ">") {
$this->moveNext();
return;
}
$this->moveNext();
}
}
function skipInTag ($chars) {
$sb = "";
while (($ch = $this->iCurrentChar) !== -1) {
if ($ch == ">") {
return $sb;
} else {
$match = false;
for ($idx = 0; $idx < count($chars); $idx++) {
if ($ch == $chars[$idx]) {
$match = true;
break;
}
}
if (!$match) {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
}
return $sb;
}
function skipMaxInTag ($chars, $maxChars) {
$sb = "";
$count = 0;
while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
if ($ch == ">") {
return $sb;
} else {
$match = false;
for ($idx = 0; $idx < count($chars); $idx++) {
if ($ch == $chars[$idx]) {
$match = true;
break;
}
}
if (!$match) {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
}
return $sb;
}
function skipToInTag ($chars) {
$sb = "";
while (($ch = $this->iCurrentChar) !== -1) {
$match = $ch == ">";
if (!$match) {
for ($idx = 0; $idx < count($chars); $idx++) {
if ($ch == $chars[$idx]) {
$match = true;
break;
}
}
}
if ($match) {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
return $sb;
}
function skipToElement() {
$sb = "";
while (($ch = $this->iCurrentChar) !== -1) {
if ($ch == "<") {
return $sb;
}
$sb .= $ch;
$this->moveNext();
}
return $sb;
}
/**
* Returns text between current position and $needle,
* inclusive, or "" if not found. The current index is moved to a point
* after the location of $needle, or not moved at all
* if nothing is found.
*/
function skipToStringInTag ($needle) {
$pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
if ($pos === false) {
return "";
}
$top = $pos + strlen($needle);
$retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top -
$this->iHtmlTextIndex);
$this->setTextIndex ($top);
return $retvalue;
}
}
function HtmlParser_ForFile ($fileName) {
return HtmlParser_ForURL($fileName);
}
function HtmlParser_ForURL ($url) {
$fp = fopen ($url, "r");
$content = "";
while (true) {
$data = fread ($fp, 8192);
if (strlen($data) == 0) {
break;
}
$content .= $data;
}
fclose ($fp);
return new HtmlParser ($content);
}
function TronqueHtml($chaine, $max, $separateur = ' ', $suffix = ' ...') {
if (strlen(strip_tags($chaine)) > $max) {
$tabElements = array();
$cur_len = 0;
$parser = new HtmlParser($chaine);
while ($parser->parse()) {
if ($parser->iNodeType == NODE_TYPE_ELEMENT) {
array_push($tabElements, $parser->iNodeName);
} elseif ($parser->iNodeType == NODE_TYPE_ENDELEMENT) {
while (array_pop($tabElements) != $parser->iNodeName) {
if (count($tabElements) < 1) {
echo 'Erreur : pas de balise ouvrante pour ' . $parser->
iNodeName;
}
}
} elseif ($parser->iNodeType == NODE_TYPE_TEXT) {
$cur_max = $cur_len + $parser->iNodeEnd - $parser->iNodeStart;
if ($cur_max == $max) {
$resultat = substr($chaine, 0, $parser->iNodeEnd) . $suffix;
while (($balise = array_pop($tabElements)) !== null) {
$resultat .= '</' . $balise . '>';
}
return $resultat;
} elseif ($cur_max > $max) {
if (($pos = strrpos(substr($parser->iNodeValue, 0, ($max -
$cur_len + strlen( $separateur ))), $separateur)) !== false) {
$resultat = substr($chaine, 0, $parser->iNodeStart +
$pos) . $suffix;
while (($balise = array_pop($tabElements)) !== null) {
$resultat .= '</' . $balise . '>';
}
return $resultat;
} else {
$resultat = substr($chaine, 0, $parser->iNodeEnd) .
$suffix;
while (($balise = array_pop($tabElements)) !== null) {
$resultat .= '</' . $balise . '>';
}
return $resultat;
}
} else {
$cur_len += $parser->iNodeEnd - $parser->iNodeStart;
}
}
}
}
return $chaine;
}
?>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<title>Test tronque html</title>
</head>
<body>
<?php
$chaine = '<h2>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. </h2>
<p>
<span class="Style1">Praesent tortor purus, <strong>commodo</strong> quis,' .
' interdum et, tincidunt ut, lacus.
Etiam condimentum volutpat dolor. Proin faucibus libero eu lectus.<br />
<em>Lorem ipsum dolor sit amet, consectetuer adipiscing elit. <strong>Nam' .
' aliquam tellus eget ligula</strong>. Phasellus pretium neque ut felis.' .
' Quisque lacinia congue ante. Lorem ipsum dolor sit amet, consectetuer' .
' adipiscing elit. Quisque at metus quis tortor auctor faucibus.</em></span><b' .
'r />
Curabitur quis lectus. Integer felis est, congue id, luctus quis, congue' .
' volutpat, lacus. Curabitur malesuada felis semper nisl. Aenean laoreet. Nunc' .
' vitae nisi. <br />
</p>';
echo "Tronqué à 280 caractères (hors balises) :<br>\n";
echo TronqueHtml($chaine, 280, ' ', ' ...');
echo "\n<hr>\n";
echo $chaine;
?>
</body>
</html>
|