Many programs bypass this problem and often manually decide where to truncate, which is too troublesome.
Implementation content: Truncate a text containing HTML code, but there will be no problem that the containment tag is not closed.
A PHP version is here! : Automatically generate article abstracts [PHP version].
The core part is as follows:
function Generate_Brief(text, length){
if(text.length < length) return text;
var Foremost = text.substr(0,length);
var re = /<(/?)(BODY|SCRIPT |P|DIV|H1|H2|H3|H4|H5|H6|ADDRESS|PRE|TABLE|TR|TD|TH|INPUT|SELECT|TEXTAREA|OBJECT|A|UL|OL|LI|BASE|META|LINK |HR|BR|PARAM|IMG|AREA|INPUT|SPAN)[^>]*(>?)/ig;
var Singlable = /BASE|META|LINK|HR|BR|PARAM|IMG| AREA|INPUT/i
var Stack = new Array(), posStack = new Array();
while(true){
var newone = re.exec(Foremost);
if(newone == null) break;
if(newone[1] == ""){
var Elem = newone[2];
if(Elem.match(Singlable) && newone[3]!= "") { Continue;
}
stack.push (newone [2] .touppercase ());
posstack.push (newone.index); ] == "") break;
}else{
var StackTop = Stack[Stack.length-1];
var End = newone[2].toUpperCase();
if(StackTop == End){
Stack.pop();
posStack.pop();
if(newone[3] == ""){
Foremost = Foremost ">";
} emost;
}