upate filter logic

This commit is contained in:
NguyenVanDat
2026-03-23 15:51:22 +07:00
parent 93519b7f2e
commit 83f339bd70
2 changed files with 133 additions and 23 deletions
@@ -7,6 +7,8 @@ namespace BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter
{
public class ChatFilter
{
private const string BadWordReplacement = "**";
private HashSet<string> badWordSet = new HashSet<string>();
// =========================
@@ -26,14 +28,16 @@ namespace BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter
// =========================
// NORMALIZE
// =========================
private string NormalizeRuntime(string input, out List<int> map)
private string NormalizeRuntime(string input, out List<int> charMap, out List<(int start, int end)> tokenRanges)
{
map = new List<int>();
charMap = new List<int>();
tokenRanges = new List<(int, int)>();
string formD = input.Normalize(NormalizationForm.FormD);
StringBuilder sb = new StringBuilder();
bool lastWasSpace = false;
bool inToken = false;
int tokenStart = -1;
for (int i = 0; i < formD.Length; i++)
{
@@ -47,22 +51,32 @@ namespace BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter
if (n == '\0')
{
if (!lastWasSpace)
if (inToken)
{
sb.Append(' ');
map.Add(i);
lastWasSpace = true;
tokenRanges.Add((tokenStart, charMap.Count - 1));
inToken = false;
}
sb.Append(' ');
charMap.Add(i);
}
else
{
if (!inToken)
{
tokenStart = charMap.Count;
inToken = true;
}
sb.Append(n);
map.Add(i);
lastWasSpace = false;
charMap.Add(i);
}
}
return sb.ToString().Trim();
if (inToken)
tokenRanges.Add((tokenStart, charMap.Count - 1));
return sb.ToString();
}
private char NormalizeChar(char c)
@@ -138,21 +152,75 @@ namespace BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter
return false;
}
private static List<(int start, int end)> MergeOverlappingSpans(List<(int start, int end)> spans)
{
if (spans == null || spans.Count == 0)
return spans;
spans.Sort((a, b) => a.start.CompareTo(b.start));
var merged = new List<(int start, int end)>(spans.Count);
foreach (var span in spans)
{
if (merged.Count == 0)
{
merged.Add(span);
continue;
}
var last = merged[merged.Count - 1];
if (span.start <= last.end)
merged[merged.Count - 1] = (last.start, Math.Max(last.end, span.end));
else
merged.Add(span);
}
return merged;
}
private static string BuildFilteredString(string input, List<(int start, int end)> mergedSpans, string replacement)
{
if (mergedSpans == null || mergedSpans.Count == 0)
return input;
var sb = new StringBuilder(input.Length);
int last = 0;
foreach (var (s, e) in mergedSpans)
{
if (s > last)
sb.Append(input, last, s - last);
sb.Append(replacement);
last = e + 1;
}
if (last < input.Length)
sb.Append(input, last, input.Length - last);
return sb.ToString();
}
// =========================
// FILTER
// =========================
public string Filter(string input, out bool isValidWord)
{
isValidWord = false;
if (string.IsNullOrEmpty(input))
return input;
isValidWord = true;
List<int> map;
string normalized = NormalizeRuntime(input, out map);
List<int> charMap;
List<(int start, int end)> tokenRanges;
string normalized = NormalizeRuntime(input, out charMap, out tokenRanges);
var tokens = normalized.Split(' ', StringSplitOptions.RemoveEmptyEntries);
char[] result = input.ToCharArray();
var matchSpans = new List<(int start, int end)>();
for (int i = 0; i < tokens.Length; i++)
{
@@ -161,23 +229,27 @@ namespace BrewMonster.PerfectWorld.Scripts.Utility.ChatFilter
int startToken = i;
int endToken = i + len - 1;
int startChar = FindCharIndex(normalized, startToken, map);
int endChar = FindCharIndex(normalized, endToken, map);
if (startChar >= 0 && endChar >= 0)
if (startToken < tokenRanges.Count && endToken < tokenRanges.Count)
{
for (int k = startChar; k <= endChar && k < result.Length; k++)
{
result[k] = '*';
isValidWord = false;
}
int normStart = tokenRanges[startToken].start;
int normEnd = tokenRanges[endToken].end;
int realStart = charMap[normStart];
int realEnd = charMap[normEnd];
matchSpans.Add((realStart, realEnd));
isValidWord = false;
}
i += len - 1;
}
}
return new string(result);
if (matchSpans.Count == 0)
return input;
var merged = MergeOverlappingSpans(matchSpans);
return BuildFilteredString(input, merged, BadWordReplacement);
}
}
}
@@ -47,6 +47,11 @@ namespace BrewMonster.PerfectWorld.Editor.ChatFilter
RunTest();
}
if (GUILayout.Button("Run plan examples", GUILayout.Height(28)))
{
RunPlanExamplesFromCleanWords();
}
if (GUILayout.Button("Clear", GUILayout.Width(72), GUILayout.Height(28)))
{
_input = "";
@@ -78,5 +83,38 @@ namespace BrewMonster.PerfectWorld.Editor.ChatFilter
_hasRun = true;
Repaint();
}
/// <summary>
/// Runs sample inputs that use entries from clean_words.txt (plan: ** replacement, spaces preserved).
/// </summary>
private static void RunPlanExamplesFromCleanWords()
{
ChatFilterService.Init();
var cases = new[]
{
("con chó", "con **", false),
("bitch cho", "** **", false),
("hello world", "hello world", true),
};
int failed = 0;
foreach (var (input, expectedFiltered, expectedValid) in cases)
{
string got = ChatFilterService.Filter(input, out bool isValid);
bool ok = got == expectedFiltered && isValid == expectedValid;
if (!ok)
{
failed++;
Debug.LogWarning(
$"[ChatFilter plan check] FAIL\n in: {input}\n expected: {expectedFiltered} (valid={expectedValid})\n got: {got} (valid={isValid})");
}
else
Debug.Log($"[ChatFilter plan check] OK: \"{input}\" -> \"{got}\"");
}
if (failed == 0)
Debug.Log("[ChatFilter plan check] All examples passed.");
}
}
}