I don't know exactly where this source is from but I've been using this piece of code, explanations of which you will find in its comments:
/**
* @desc Cut given plain/HTML text nicely
* @param string text to cut
* @param int approximetly length of desired text length
* @param int optional length, how far text can variante from approximetly length
* @param bool optional can we cut words
* @param bool optional do we need to append three dots to the end of cutted text
* @return string cutted text
*/
function htmlSubstr($text, $approxLength, $lengthOffset = 20, $cutWords = FALSE, $dotsAtEnd = TRUE) {
mb_internal_encoding('UTF-8');
// $approxLength:
// The approximate length you want the concatenated text to be
// $lengthOffset:
// The variation in how long the text can be in this example text
// length will be between 200 and 200-20=180 characters and the
// character where the last tag ends
// Reset tag counter & quote checker
$tag_counter = 0;
$quotes_on = FALSE;
// Check if the text is too long
if (mb_strlen($text) > $approxLength) {
// Reset the tag_counter and pass through (part of) the entire text
$c = 0;
for ($i = 0; $i < mb_strlen($text); $i++) {
// Load the current character and the next one
// if the string has not arrived at the last character
$current_char = mb_substr($text,$i,1);
if ($i < mb_strlen($text) - 1) {
$next_char = mb_substr($text,$i + 1,1);
} else {
$next_char = "";
}
// First check if quotes are on
if (!$quotes_on) {
// Check if it's a tag
// On a "<" add 3 if it's an opening tag (like <a href...)
// or add only 1 if it's an ending tag (like </a>)
if ($current_char == '<') {
if ($next_char == '/') {
$tag_counter += 1;
} else {
$tag_counter += 3;
}
}
// Slash signifies an ending (like </a> or ... />)
// substract 2
if ($current_char == '/' && $tag_counter <> 0) $tag_counter -= 2;
// On a ">" substract 1
if ($current_char == '>') $tag_counter -= 1;
// If quotes are encountered, start ignoring the tags
// (for directory slashes)
if ($current_char == '"') $quotes_on = TRUE;
} else {
// IF quotes are encountered again, turn it back off
if ($current_char == '"') $quotes_on = FALSE;
}
// Count only the chars outside html tags
if($tag_counter == 2 || $tag_counter == 0) $c++;
// Check if the counter has reached the minimum length yet,
// then wait for the tag_counter to become 0, and chop the string there
if ($c > $approxLength - $lengthOffset && $tag_counter == 0 && ($next_char == ' ' || $cutWords == TRUE)) {
$text = mb_substr($text,0,$i + 1);
if($dotsAtEnd){
$text .= '...';
}
return $text;
}
}
}
return $text;
}