首页 > 编程知识 正文

敏感词检测 java,java敏感词汇过滤代码

时间:2023-05-04 18:25:45 阅读:275004 作者:3678

[Java]代码

import java.io.File;

import java.io.IOException;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import org.apache.commons.io.FileUtils;

import org.apache.commons.lang.StringUtils;

public class BadWordsUtil {

public static final int WORDS_MAX_LENGTH = 10;

public static final String BAD_WORDS_LIB_FILE_NAME = "badWords.txt";

//敏感词列表

public static Map[] badWordsList = null;

//敏感词索引

public static Map wordIndex = new HashMap();

/*

* 初始化敏感词库

*/

public static void initbadWordsList() throws IOException {

if (badWordsList == null) {

badWordsList = new Map[WORDS_MAX_LENGTH];

for (int i = 0; i < badWordsList.length; i++) {

badWordsList[i] = new HashMap();

}

}

//敏感词词库所在目录,这里为txt文本,一个敏感词一行

String path = BadWordsUtil.class.getClassLoader()

.getResource(BAD_WORDS_LIB_FILE_NAME)

.getPath();

System.out.println(path);

List words = FileUtils.readLines(new File(path),"UTF-8");

for (String w : words) {

if (StringUtils.isNotBlank(w)) {

//将敏感词按长度存入map

badWordsList[w.length()].put(w.toLowerCase(), "");

Integer index = wordIndex.get(w.substring(0, 1));

//生成敏感词索引,存入map

if (index == null) {

index = 0;

}

int x = (int) Math.pow(2, w.length());

index = (index | x);

wordIndex.put(w.substring(0, 1), index);

}

}

}

/**

* 检索敏感词

* @param content

* @return

*/

public static List searchBanWords(String content) {

if (badWordsList == null) {

try {

initbadWordsList();

} catch (IOException e) {

throw new RuntimeException(e);

}

}

List result = new ArrayList();

for (int i = 0; i < content.length(); i++) {

Integer index = wordIndex.get(content.substring(i, i + 1));

int p = 0;

while ((index != null) && (index > 0)) {

p++;

index = index >> 1;

String sub = "";

if ((i + p) < (content.length() - 1)) {

sub = content.substring(i, i + p);

} else {

sub = content.substring(i);

}

if (((index % 2) == 1) && badWordsList[p].containsKey(sub)) {

result.add(content.substring(i, i + p));

}

}

}

return result;

}

public static void main(String[] args) throws IOException {

String content = "含有敏感词的测试";

BadWordsUtil.initbadWordsList();

List badWordList = BadWordsUtil.searchBanWords(content);

if (badWordList.size() == 0){

System.out.println("没有找到敏感词!");

}else{

for(String s : badWordList){

System.out.println("找到敏感词:"+s);

}

}

}

}

版权申明:本站文章部分自网络,如有侵权,请联系:west999com@outlook.com

特别注意:本站所有转载文章言论不代表本站观点!

本站所提供的图片等素材,版权归原作者所有,如需使用,请与原作者联系。

版权声明:该文观点仅代表作者本人。处理文章:请发送邮件至 三1五14八八95#扣扣.com 举报,一经查实,本站将立刻删除。