
便捷蛙
文章
背景:
网站官方审查时,涉及到敏感、暴恐、广告、涉黄等不良关键词检索过滤。
代码(未优化版本):
/**
* 优化后的关键词屏蔽类
* @param { Array } matchKeywords 屏蔽关键词数据
* @param { Array } excludeKeywords 排除关键词,比如包含关系
* @param { Object } options 配置
**/
class KeywordFilter {
constructor(matchKeywords, excludeKeywords, options = {}) {
// 参数验证
if (!Array.isArray(matchKeywords)) {
throw new Error('matchKeywords must be an array');
}
this.matchKeywords = this.prepareKeywords(matchKeywords);
this.excludeKeywords = this.prepareKeywords(excludeKeywords || []);
this.options = {
caseSensitive: false,
useRegExp: true,
...options
};
// 预编译正则表达式
this.prepareRegExps();
// 性能监控
this.performance = {
startTime: 0,
endTime: 0,
nodesProcessed: 0,
replacements: 0
};
}
// 准备关键词(排序和去重)
prepareKeywords(keywords) {
return [...new Set(keywords)]
.filter(kw => kw && kw.trim())
.sort((a, b) => b.length - a.length);
}
// 预编译正则表达式
prepareRegExps() {
if (this.matchKeywords.length === 0) return;
// 创建匹配正则
const pattern = this.matchKeywords
.map(kw => this.escapeRegExp(kw))
.join('|');
this.matchRegExp = new RegExp(pattern, this.options.caseSensitive ? 'g' : 'gi');
// 创建排除正则(如果有排除词)
if (this.excludeKeywords.length > 0) {
const excludePattern = this.excludeKeywords
.map(kw => this.escapeRegExp(kw))
.join('|');
this.excludeRegExp = new RegExp(excludePattern, this.options.caseSensitive ? 'g' : 'gi');
}
}
// 转义正则特殊字符
escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
// 执行过滤
filter() {
this.performance.startTime = performance.now();
this.performance.nodesProcessed = 0;
this.performance.replacements = 0;
try {
this.processNode(document.body);
this.performance.endTime = performance.now();
this.logPerformance();
return {
success: true,
nodesProcessed: this.performance.nodesProcessed,
replacements: this.performance.replacements,
time: this.performance.endTime - this.performance.startTime
};
} catch (error) {
console.error('Keyword filtering error:', error);
return {
success: false,
error: error.message
};
}
}
// 处理节点
processNode(rootNode) {
// 使用TreeWalker高效遍历文本节点
const treeWalker = document.createTreeWalker(
rootNode,
NodeFilter.SHOW_TEXT,
{
acceptNode: function(node) {
// 跳过脚本、样式等元素内的文本
if (node.parentNode.nodeName === 'SCRIPT' ||
node.parentNode.nodeName === 'STYLE' ||
node.parentNode.nodeName === 'IFRAME') {
return NodeFilter.FILTER_REJECT;
}
// 跳过空文本或过短文本
if (!node.textContent || node.textContent.trim().length < 2) {
return NodeFilter.FILTER_REJECT;
}
return NodeFilter.FILTER_ACCEPT;
}
},
false
);
let currentNode;
const nodesToProcess = [];
// 首先收集所有需要处理的节点
while (currentNode = treeWalker.nextNode()) {
nodesToProcess.push(currentNode);
}
// 然后处理这些节点
nodesToProcess.forEach(node => {
this.processTextNode(node);
this.performance.nodesProcessed++;
});
}
// 处理文本节点
processTextNode(textNode) {
let text = textNode.textContent;
let modified = false;
// 使用预编译的正则进行匹配
let match;
while ((match = this.matchRegExp.exec(text)) !== null) {
const matchedText = match[0];
// 检查是否在排除列表中
if (this.shouldExclude(matchedText, text, match.index)) {
continue;
}
// 执行替换
text = text.substring(0, match.index) +
'[已屏蔽]' +
text.substring(match.index + matchedText.length);
// 重置正则表达式lastIndex,因为字符串长度已改变
this.matchRegExp.lastIndex = match.index + 3; // "[已屏蔽]"长度为4,但替换文本更短
modified = true;
this.performance.replacements++;
}
if (modified) {
textNode.textContent = text;
}
}
// 检查是否应该排除当前匹配
shouldExclude(matchedText, fullText, matchIndex) {
if (!this.excludeRegExp) return false;
// 检查匹配周围的上下文(前后各2个字符)
const contextStart = Math.max(0, matchIndex - 2);
const contextEnd = Math.min(fullText.length, matchIndex + matchedText.length + 2);
const context = fullText.substring(contextStart, contextEnd);
return this.excludeRegExp.test(context);
}
// 记录性能信息
logPerformance() {
const time = this.performance.endTime - this.performance.startTime;
console.log(`关键词过滤完成:
- 处理节点: ${this.performance.nodesProcessed}
- 替换次数: ${this.performance.replacements}
- 耗时: ${time.toFixed(2)}ms`);
}
}示例用法:
// 示例用法
document.addEventListener('DOMContentLoaded', function() {
const matchKeywords = ["首", "NO.1", "100%", "百度"];
const excludeKeywords = ["首页", "%"];
// 创建过滤器实例
const filter = new KeywordFilter(matchKeywords, excludeKeywords);
// 执行过滤
const result = filter.filter();
// 性能测试按钮事件
document.getElementById('runTest').addEventListener('click', function() {
// 模拟大量文本进行性能测试
const testContent = document.createElement('div');
testContent.innerHTML = `
<p>欢迎来到首页,我们提供NO.1的服务,100%的满意度保证。通过百度搜索可以找到我们。</p>
<p>这是首段测试文本,包含多个关键词如NO.1和100%等。</p>
<p>百度是中国最大的搜索引擎,首页设计简洁大方。</p>
<p>我们的产品质量达到100%合格率,服务排名NO.1。</p>
`.repeat(50); // 重复50次以增加测试数据量
document.body.appendChild(testContent);
// 执行性能测试
const startTime = performance.now();
const testFilter = new KeywordFilter(matchKeywords, excludeKeywords);
const testResult = testFilter.filter();
const endTime = performance.now();
alert(`性能测试完成!\n处理节点: ${testResult.nodesProcessed}\n替换次数: ${testResult.replacements}\n耗时: ${(endTime - startTime).toFixed(2)}ms`);
// 清理测试内容
document.body.removeChild(testContent);
});
});下一篇>
没有下一篇咯~
点击登录,秀出你的神评!