http://www.mizuiren.com/485.html
最近在用node重写我的博客(憔悴了许多),既然从wordpress转过来的,wordpress基本功能还是不能变的,无形中加大了开发难度。比如说加密算法,数据表关系串联,数据库高并发IO性能优化。。。等等一大堆需要解决的问题,要做就要做最好的,用最优的方法,最快的算法,最安全的code来以太阳肩并肩。起初以为增删改查就完事了,其实没那么简单。就连读取文章看似很简单的事情其实并不是想象那样,直接从数据库里面读取出来的文章内容是不会换行的,尤其是没有标签的内容但是又换行了,这着实有点尴尬。
起初在分析换行的种种情况,思来想去突然明白了一个真理,直接看wordpress实现过程吧~,翻到他的源码,发现wordpress作者对文章内容处理做了孜孜不倦的字符串算法~,没错,就加一个p标签小小的事情,可是并不简单。它要考虑到各种标签,把它切割重组抛光打蜡,最后给你return一个完美的内容。以下是我把那个php处理函数转成javascript的代码:
function autop($pee, $br) {
var $pre_tags = {};
var trim = function (str) {
return str.replace(/(^\s*)|(\s*$)/g, "");
}
if (trim($pee) === '') {
return '';
}
$pee = $pee + "\n"; // just to make things a little easier, pad the end
if ($pee.indexOf('<pre') !== -1) {
var $pee_parts = $pee.split('</pre>');
var $last_pee = $pee_parts.pop();
$pee = '';
var $start, $name;
for (var i = 0, len = $pee_parts.length; i < len; i++) {
$start = $pee_parts[i].indexOf('<pre');
// Malformed html?
if ($start === -1) {
$pee += $pee_parts[i];
continue;
}
$name = "<pre wp-pre-tag-" + i + "></pre>";
$pre_tags[$name] = $pee_parts[i].substr($start) + '</pre>';
$pee += $pee_parts[i].substr(0, $start) + $name;
}
$pee += $last_pee;
}
$pee = $pee.replace(new RegExp('<br \/>\\s*<br \/>', 'gi'), "\n\n");
var $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|details|menu|summary)';
$pee = $pee.replace(new RegExp('(<' + $allblocks + '[^>]*>)', 'gi'), "\n$1");
$pee = $pee.replace(new RegExp('(<\/' + $allblocks + '>)', 'gi'), "$1\n\n");
$pee = $pee.replace(/(\r\n|\r)/gi, "\n"); // cross-platform newlines
if ($pee.indexOf('<option') !== -1) {
// no P/BR around option
$pee = $pee.replace(/\s*<option/gi, '<option');
$pee = $pee.replace(/<\/option>\s*/gi, '</option>');
}
if ($pee.indexOf('</object>') !== -1) {
// no P/BR around param and embed
$pee = $pee.replace(/(<object[^>]*>)\s*/gi, '$1');
$pee = $pee.replace(/\s*<\/object>/, '</object>');
$pee = $pee.replace(/\s*(<\/?(?:param|embed)[^>]*>)\s*/gi, '$1');
}
if ($pee.indexOf('<source') !== -1 || $pee.indexOf('<track') !== -1) {
// no P/BR around source and track
$pee = $pee.replace(/([<\[](?:audio|video)[^>\]]*[>\]])\s*/gi, '$1');
$pee = $pee.replace(/\s*([<\[]\/(?:audio|video)[>\]])/gi, '$1');
$pee = $pee.replace(/\s*(<(?:source|track)[^>]*>)\s*/gi, '$1');
}
$pee = $pee.replace(/\n\n+/, "\n\n"); // take care of duplicates
// make paragraphs, including one at the end
$pees = $pee.split(/\n\s*\n/).filter(function (item) {
return (item != false);
});
$pee = '';
for (var i = 0, len = $pees.length; i < len; i++) {
$pee += '<p>' + trim($pees[i], "\n") + "</p>\n";
}
$pee = $pee.replace(/<p>\s*<\/p>/gi, ''); // under certain strange conditions it could create a P of entirely whitespace
$pee = $pee.replace(/<p>([^<]+)<\/(div|address|form)>/gi, "<p>$1</p></$2>");
$pee = $pee.replace(new RegExp('<p>\s*(<\/?' + $allblocks + '[^>]*>)\s*<\/p>', 'gi'), "$1"); // don't pee all over a tag
$pee = $pee.replace(/<p>(<li.+?)<\/p>/gi, "$1"); // problem with nested lists
$pee = $pee.replace(/<p><blockquote([^>]*)>/gi, "<blockquote$1><p>");
$pee = $pee.replace('</blockquote></p>', '</p></blockquote>');
$pee = $pee.replace(new RegExp('<p>\s*(<\/?' + $allblocks + '[^>]*>)', 'gi'), "$1");
$pee = $pee.replace(new RegExp('(<\/?' + $allblocks + '[^>]*>)\s*</p>', 'gi'), "$1");
if ($br) {
//$pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee);
$pee = $pee.replace(/<br \/>\s*\n/, "<br />\n"); // optionally make line breaks
$pee = $pee.replace('<WPPreserveNewline />', "\n");
}
$pee = $pee.replace(new RegExp('(</?' + $allblocks + '[^>]*>)\s*<br />', 'gi'), "$1");
$pee = $pee.replace(/<br \/>(\s*<\/?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)/gi, '$1');
$pee = $pee.replace(/\n<\/p>$/, '</p>');
if (!$pre_tags.length) {
for (var i in $pre_tags) {
if ($pre_tags.hasOwnProperty(i)) {
$pee = $pee.replace(i, $pre_tags[i]);
}
}
}
return $pee;
}
module.exports = {
autop
}
使用的话
autop(content);
content为需要处理的内容。
转载请注明:有爱前端 » 用Javascript给文本段落加上p标签实现换行
