All files / src/redaction matcher.ts

79.74% Statements 63/79
65.57% Branches 40/61
100% Functions 8/8
81.08% Lines 60/74

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148                      9x           48x 48x 1177x 1177x 1177x     48x     48x 48x   48x       48x       49x 1x   48x         25x       80x 80x 20x       60x 60x 60x 70x 70x 69x 1x           60x 60x 60x 69x 27x   69x 69x   60x 29x   60x       80x 80x   80x     80x 80x   80x 80x 80x     80x 81x 81x   81x 81x 81x 81x 70x       80x                                         81x 36x 36x   77x 46x 46x   70x       82x        
// Matcher that performs case-insensitive, whole-word redaction using
// an Aho–Corasick automaton for efficient multi-pattern search.
 
interface IMatch {
  // inclusive
  start: number;
  // exclusive
  end: number;
}
type Match = IMatch;
 
export class Matcher {
  private lowerPatterns: string[];
  private ac: any | null;
 
  private constructor(dictionary: string[]) {
    // Normalize patterns once for case-insensitive matching
    const dedup = new Set<string>();
    for (const p of dictionary) {
      const lp = p.toLowerCase();
      Eif (lp.length > 0) {
        dedup.add(lp);
      }
    }
    this.lowerPatterns = Array.from(dedup);
 
    // Initialize Aho–Corasick if available; fallback handled in findAllMatches
    let AhoCtor: any = null;
    try {
      // eslint-disable-next-line @typescript-eslint/no-var-requires
      AhoCtor = require('ahocorasick');
    } catch {
      AhoCtor = null;
    }
    this.ac = AhoCtor ? new AhoCtor(this.lowerPatterns) : null;
  }
 
  static async build(dictionary: string[]): Promise<Matcher> {
    if (dictionary.length === 0) {
      throw new Error('Empty dictionary for matcher');
    }
    return new Matcher(dictionary);
  }
 
  // Exposes whether the Aho–Corasick automaton is active (useful for tests/diagnostics)
  public isAutomatonEnabled(): boolean {
    return !!this.ac;
  }
 
  redact(text: string): string {
    const matches = this.findAllMatches(text);
    if (matches.length === 0) {
      return text;
    }
 
    // Sort by start and merge overlapping/adjacent ranges
    matches.sort((a, b) => a.start - b.start);
    const merged: Match[] = [];
    for (const m of matches) {
      const last = merged[merged.length - 1];
      if (!last || m.start > last.end) {
        merged.push({ start: m.start, end: m.end });
      } else Iif (m.end > last.end) {
        last.end = m.end;
      }
    }
 
    // Build the redacted string
    let result = '';
    let cursor = 0;
    for (const m of merged) {
      if (m.start > cursor) {
        result += text.substring(cursor, m.start);
      }
      result += '[REDACTED]';
      cursor = m.end;
    }
    if (cursor < text.length) {
      result += text.substring(cursor);
    }
    return result;
  }
 
  private findAllMatches(text: string): Match[] {
    const matches: Match[] = [];
    const lowerText = text.toLowerCase();
 
    Eif (this.ac) {
      // The ahocorasick package returns an array of results. Different versions expose
      // slightly different shapes. We handle common shapes conservatively.
      const results: any[] = this.ac.search(lowerText) || [];
      for (const r of results) {
        // Common shape: [endIndex, outputs[]]
        const endIdxRaw = Array.isArray(r) ? r[0] : (r?.index ?? r?.end ?? null);
        const outputs = Array.isArray(r) ? (r[1] || []) : (r?.outputs || r?.matches || r?.result || []);
        Iif (typeof endIdxRaw !== 'number' || !outputs || !Array.isArray(outputs)) {
          continue;
        }
        for (const w of outputs) {
          Iif (typeof w !== 'string' || w.length === 0) continue;
          const len = w.length;
          // Most implementations return end index (inclusive). Convert to [start, endExclusive]
          const endExclusive = endIdxRaw + 1;
          const start = endExclusive - len;
          const end = endExclusive;
          if (start >= 0 && end <= lowerText.length && this.isWholeWord(text, start, end)) {
            matches.push({ start, end });
          }
        }
      }
      return matches;
    }
 
    // Fallback: sequential indexOf search (still case-insensitive & whole-word)
    for (const pattern of this.lowerPatterns) {
      let searchIndex = 0;
      while (true) {
        const index = lowerText.indexOf(pattern, searchIndex);
        if (index === -1) break;
        const start = index;
        const end = index + pattern.length;
        if (this.isWholeWord(text, start, end)) {
          matches.push({ start, end });
        }
        searchIndex = index + 1;
      }
    }
    return matches;
  }
 
  private isWholeWord(text: string, start: number, end: number): boolean {
    if (start > 0) {
      const before = text[start - 1];
      if (this.isWordChar(before)) return false;
    }
    if (end < text.length) {
      const after = text[end];
      if (this.isWordChar(after)) return false;
    }
    return true;
  }
 
  private isWordChar(char: string): boolean {
    return /[a-zA-Z0-9_]/.test(char);
  }
}