Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | 9x 48x 48x 1177x 1177x 1177x 48x 48x 48x 48x 48x 49x 1x 48x 25x 80x 80x 20x 60x 60x 60x 70x 70x 69x 1x 60x 60x 60x 69x 27x 69x 69x 60x 29x 60x 80x 80x 80x 80x 80x 80x 80x 80x 80x 81x 81x 81x 81x 81x 81x 70x 80x 81x 36x 36x 77x 46x 46x 70x 82x | // Matcher that performs case-insensitive, whole-word redaction using
// an Aho–Corasick automaton for efficient multi-pattern search.
interface IMatch {
// inclusive
start: number;
// exclusive
end: number;
}
type Match = IMatch;
export class Matcher {
private lowerPatterns: string[];
private ac: any | null;
private constructor(dictionary: string[]) {
// Normalize patterns once for case-insensitive matching
const dedup = new Set<string>();
for (const p of dictionary) {
const lp = p.toLowerCase();
Eif (lp.length > 0) {
dedup.add(lp);
}
}
this.lowerPatterns = Array.from(dedup);
// Initialize Aho–Corasick if available; fallback handled in findAllMatches
let AhoCtor: any = null;
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires
AhoCtor = require('ahocorasick');
} catch {
AhoCtor = null;
}
this.ac = AhoCtor ? new AhoCtor(this.lowerPatterns) : null;
}
static async build(dictionary: string[]): Promise<Matcher> {
if (dictionary.length === 0) {
throw new Error('Empty dictionary for matcher');
}
return new Matcher(dictionary);
}
// Exposes whether the Aho–Corasick automaton is active (useful for tests/diagnostics)
public isAutomatonEnabled(): boolean {
return !!this.ac;
}
redact(text: string): string {
const matches = this.findAllMatches(text);
if (matches.length === 0) {
return text;
}
// Sort by start and merge overlapping/adjacent ranges
matches.sort((a, b) => a.start - b.start);
const merged: Match[] = [];
for (const m of matches) {
const last = merged[merged.length - 1];
if (!last || m.start > last.end) {
merged.push({ start: m.start, end: m.end });
} else Iif (m.end > last.end) {
last.end = m.end;
}
}
// Build the redacted string
let result = '';
let cursor = 0;
for (const m of merged) {
if (m.start > cursor) {
result += text.substring(cursor, m.start);
}
result += '[REDACTED]';
cursor = m.end;
}
if (cursor < text.length) {
result += text.substring(cursor);
}
return result;
}
private findAllMatches(text: string): Match[] {
const matches: Match[] = [];
const lowerText = text.toLowerCase();
Eif (this.ac) {
// The ahocorasick package returns an array of results. Different versions expose
// slightly different shapes. We handle common shapes conservatively.
const results: any[] = this.ac.search(lowerText) || [];
for (const r of results) {
// Common shape: [endIndex, outputs[]]
const endIdxRaw = Array.isArray(r) ? r[0] : (r?.index ?? r?.end ?? null);
const outputs = Array.isArray(r) ? (r[1] || []) : (r?.outputs || r?.matches || r?.result || []);
Iif (typeof endIdxRaw !== 'number' || !outputs || !Array.isArray(outputs)) {
continue;
}
for (const w of outputs) {
Iif (typeof w !== 'string' || w.length === 0) continue;
const len = w.length;
// Most implementations return end index (inclusive). Convert to [start, endExclusive]
const endExclusive = endIdxRaw + 1;
const start = endExclusive - len;
const end = endExclusive;
if (start >= 0 && end <= lowerText.length && this.isWholeWord(text, start, end)) {
matches.push({ start, end });
}
}
}
return matches;
}
// Fallback: sequential indexOf search (still case-insensitive & whole-word)
for (const pattern of this.lowerPatterns) {
let searchIndex = 0;
while (true) {
const index = lowerText.indexOf(pattern, searchIndex);
if (index === -1) break;
const start = index;
const end = index + pattern.length;
if (this.isWholeWord(text, start, end)) {
matches.push({ start, end });
}
searchIndex = index + 1;
}
}
return matches;
}
private isWholeWord(text: string, start: number, end: number): boolean {
if (start > 0) {
const before = text[start - 1];
if (this.isWordChar(before)) return false;
}
if (end < text.length) {
const after = text[end];
if (this.isWordChar(after)) return false;
}
return true;
}
private isWordChar(char: string): boolean {
return /[a-zA-Z0-9_]/.test(char);
}
}
|