Efficient .NET dirty word filtering algorithm and application examples

高洛峰
Release: 2017-01-21 15:20:39
Original
1560 people have browsed it

The example in this article describes the efficient .NET dirty word filtering algorithm. Share it with everyone for your reference, the details are as follows:

BadWordsFilter.cs class

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Collections;
using System.Data;
namespace WNF
{
  public class BadWordsFilter
  {
    private HashSet<string> hash = new HashSet<string>(); //关键字
    private byte[] fastCheck = new byte[char.MaxValue];
    private byte[] fastLength = new byte[char.MaxValue];
    private BitArray charCheck = new BitArray(char.MaxValue);
    private BitArray endCheck = new BitArray(char.MaxValue);
    private int maxWordLength = 0;
    private int minWordLength = int.MaxValue;
    public BadWordsFilter()
    {
    }
    //初始化关键字
    public void Init(DataTable badwords)
    {
      for (int j = 0; j < badwords.Rows.Count; j++)
      {
        string word = badwords.Rows[j][0].ToString();
        maxWordLength = Math.Max(maxWordLength, word.Length);
        minWordLength = Math.Min(minWordLength, word.Length);
        for (int i = 0; i < 7 && i < word.Length; i++)
        {
          fastCheck[word[i]] |= (byte)(1 << i);
        }
        for (int i = 7; i < word.Length; i++)
        {
          fastCheck[word[i]] |= 0x80;
        }
        if (word.Length == 1)
        {
          charCheck[word[0]] = true;
        }
        else
        {
          fastLength[word[0]] |= (byte)(1 << (Math.Min(7, word.Length - 2)));
          endCheck[word[word.Length - 1]] = true;
          hash.Add(word);
        }
      }
    }
    public string Filter(string text, string mask)
    {
      throw new NotImplementedException();
    }
    //检查是否有关键字
    public bool HasBadWord(string text)
    {
      int index = 0;
      while (index < text.Length)
      {
        int count = 1;
        if (index > 0 || (fastCheck[text[index]] & 1) == 0)
        {
          while (index < text.Length - 1 && (fastCheck[text[++index]] & 1) == 0) ;
        }
        char begin = text[index];
        if (minWordLength == 1 && charCheck[begin])
        {
          return true;
        }
        for (int j = 1; j <= Math.Min(maxWordLength, text.Length - index - 1); j++)
        {
          char current = text[index + j];
          if ((fastCheck[current] & 1) == 0)
          {
            ++count;
          }
          if ((fastCheck[current] & (1 << Math.Min(j, 7))) == 0)
          {
            break;
          }
          if (j + 1 >= minWordLength)
          {
            if ((fastLength[begin] & (1 << Math.Min(j - 1, 7))) > 0 && endCheck[current])
            {
              string sub = text.Substring(index, j + 1);
              if (hash.Contains(sub))
              {
                return true;
              }
            }
          }
        }
        index += count;
      }
      return false;
    }
  }
}
Copy after login

Quote:

string sql = "select keywords from tb_keyword";
BadWordsFilter badwordfilter = new BadWordsFilter();
//初始化关键字
badwordfilter.Init(oEtb.GetDataSet(sql).Tables[0]);
//检查是否有存在关键字
bool a = badwordfilter.HasBadWord(TextBox1.Text);
if (a == true)
{
    Page.RegisterClientScriptBlock("a", "<script>alert(&#39;该评论含有不合法文字!&#39;)</script>");
}
else
{
    PingLun();//写入评论表
}
Copy after login

I hope this article will be helpful to everyone in asp.net programming .

For more efficient .NET dirty word filtering algorithms and application examples, please pay attention to the PHP Chinese website!

Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template
About us Disclaimer Sitemap
php.cn:Public welfare online PHP training,Help PHP learners grow quickly!