add badword filter version 1

This commit is contained in:
NguyenVanDat
2026-03-23 10:46:10 +07:00
parent a91faac68f
commit f87dcbd7f8
10 changed files with 1671 additions and 2 deletions
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,7 @@
fileFormatVersion: 2
guid: 6ab087813f7c53349baf2a476e6904f1
TextScriptImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
@@ -1,5 +1,6 @@
using UnityEngine;
using System.Collections;
using BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter;
using UnityEngine.SceneManagement;
namespace BrewMonster
@@ -10,6 +11,7 @@ namespace BrewMonster
IEnumerator Start()
{
ChatFilterService.Init();
// Load the next scene after 1 second
yield return new WaitForSeconds(1f);
LoadNextScene();
@@ -20,8 +22,8 @@ namespace BrewMonster
#if TESTFAST
SceneManager.LoadSceneAsync(_nextSceneName,LoadSceneMode.Additive);
#else
SceneManager.LoadScene(_nextSceneName);
SceneManager.LoadScene(_nextSceneName);
#endif
}
}
}
}
@@ -0,0 +1,8 @@
fileFormatVersion: 2
guid: 89107298c9450ab4fae2d12e2426713e
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
@@ -0,0 +1,183 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
namespace BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter
{
public class ChatFilter
{
private HashSet<string> badWordSet = new HashSet<string>();
// =========================
// BUILD
// =========================
public void Build(string[] words)
{
badWordSet.Clear();
foreach (var w in words)
{
if (!string.IsNullOrWhiteSpace(w))
badWordSet.Add(w.Trim());
}
}
// =========================
// NORMALIZE
// =========================
private string NormalizeRuntime(string input, out List<int> map)
{
map = new List<int>();
string formD = input.Normalize(NormalizationForm.FormD);
StringBuilder sb = new StringBuilder();
bool lastWasSpace = false;
for (int i = 0; i < formD.Length; i++)
{
char c = formD[i];
var uc = Char.GetUnicodeCategory(c);
if (uc == UnicodeCategory.NonSpacingMark)
continue;
char n = NormalizeChar(c);
if (n == '\0')
{
if (!lastWasSpace)
{
sb.Append(' ');
map.Add(i);
lastWasSpace = true;
}
}
else
{
sb.Append(n);
map.Add(i);
lastWasSpace = false;
}
}
return sb.ToString().Trim();
}
private char NormalizeChar(char c)
{
c = char.ToLower(c);
if (c == 'đ') return 'd';
switch (c)
{
case '@': return 'a';
case '4': return 'a';
case '0': return 'o';
case '1': return 'i';
case '!': return 'i';
case '$': return 's';
case '3': return 'e';
}
if (char.IsLetterOrDigit(c))
return c;
return '\0';
}
// =========================
// TOKEN → CHAR INDEX
// =========================
private int FindCharIndex(string normalized, int tokenIndex, List<int> map)
{
int tokenCounter = 0;
for (int i = 0; i < normalized.Length; i++)
{
if (normalized[i] == ' ')
continue;
if (tokenCounter == tokenIndex)
return map[i];
// skip current token
while (i < normalized.Length && normalized[i] != ' ')
i++;
tokenCounter++;
}
return -1;
}
// =========================
// MATCH JOINED TOKENS
// =========================
private bool MatchJoinedTokens(string[] tokens, int index, out int length)
{
length = 0;
// thử ghép tối đa 3 token
for (int size = 1; size <= 3 && index + size <= tokens.Length; size++)
{
string merged = "";
for (int j = 0; j < size; j++)
merged += tokens[index + j];
if (badWordSet.Contains(merged))
{
length = size;
return true;
}
}
return false;
}
// =========================
// FILTER
// =========================
public string Filter(string input, out bool isValidWord)
{
isValidWord = false;
if (string.IsNullOrEmpty(input))
return input;
isValidWord = true;
List<int> map;
string normalized = NormalizeRuntime(input, out map);
var tokens = normalized.Split(' ', StringSplitOptions.RemoveEmptyEntries);
char[] result = input.ToCharArray();
for (int i = 0; i < tokens.Length; i++)
{
if (MatchJoinedTokens(tokens, i, out int len))
{
int startToken = i;
int endToken = i + len - 1;
int startChar = FindCharIndex(normalized, startToken, map);
int endChar = FindCharIndex(normalized, endToken, map);
if (startChar >= 0 && endChar >= 0)
{
for (int k = startChar; k <= endChar && k < result.Length; k++)
{
result[k] = '*';
isValidWord = false;
}
}
i += len - 1;
}
}
return new string(result);
}
}
}
@@ -0,0 +1,2 @@
fileFormatVersion: 2
guid: 436ad91ec808a7d4d80d00ab164d42b7
@@ -0,0 +1,68 @@
using System.IO;
using UnityEngine;
namespace BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter
{
public static class ChatFilterService
{
private static ChatFilter _filter;
private static bool _initialized = false;
// =========================
// INIT
// =========================
public static void Init()
{
if (_initialized) return;
_filter = new ChatFilter();
string path = Path.Combine(Application.dataPath, "PerfectWorld/Data/clean_words.txt");
if (!File.Exists(path))
{
BMLogger.LogError("ChatFilter file not found: " + path);
return;
}
var lines = File.ReadAllLines(path);
_filter.Build(lines);
_initialized = true;
Debug.Log("ChatFilterV2 initialized");
}
// =========================
// FILTER
// =========================
public static string Filter(string input, out bool isValidWord)
{
isValidWord =false;
if (!_initialized)
{
BMLogger.LogError("ChatFilter not initialized!");
return input;
}
return _filter.Filter(input, out isValidWord);
}
// =========================
// CHECK ONLY (không mask)
// =========================
public static bool ContainsBadWord(string input)
{
if (!_initialized)
{
BMLogger.LogError("ChatFilter not initialized!");
return false;
}
_filter.Filter(input, out var isValidWord);
return !isValidWord;
}
}
}
@@ -0,0 +1,2 @@
fileFormatVersion: 2
guid: b09bed44157324f46b0f980aa4da0720
@@ -0,0 +1,21 @@
using BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter;
using UnityEngine;
namespace BrewMonster
{
public class TestFilter : MonoBehaviour
{
public string input;
[ContextMenu("Filter")]
void Test()
{
ChatFilterService.Init();
// string msg = "d*m*m mày";
string filtered = ChatFilterService.Filter(input, out var isValidWord);
Debug.Log($"Is valid: {isValidWord}: "+filtered);
}
}
}
@@ -0,0 +1,2 @@
fileFormatVersion: 2
guid: c235df647ffffd54a84fdae3527eedff