All files / src/redaction redaction.service.ts

91.3% Statements 105/115
88.52% Branches 54/61
87.5% Functions 14/16
91.81% Lines 101/110

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 2118x   8x     8x 8x     8x 39x 39x 39x 39x     28x 1x     1x     27x   27x 27x   24x           24x 24x     24x 24x     24x     3x 3x         8x 2x   8x       23x 6x     17x 10x     7x       54x 23x 23x   31x 11x   25x 19x 19x 31x   19x   6x       7x   7x 31x 7x 7x   24x 6x   21x 15x 15x 30x 12x 11x 11x   1x     18x     15x   6x     7x       27x 27x 2x       25x 25x       25x       25x 25x 1x     24x       24x   24x 48x 48x 48x           48x 48x 48x   144x 144x 48x       24x 24x 48x 48x     24x       25x 25x 1x 1x               24x 24x 1x       23x 23x 23x 23x 23x 1x     23x                  
import { Storage } from '@google-cloud/storage';
 
import { Injectable, Logger } from '@nestjs/common';
 
import { RedactionOptions } from '../config/types';
import { Matcher } from './matcher';
import { redactGeneric } from './scanner-generic';
 
@Injectable()
export class RedactionService {
  private readonly logger = new Logger(RedactionService.name);
  private matcher: Matcher | null = null;
  private initError: Error | null = null;
  private initialized = false;
 
  async initialize(): Promise<void> {
    if (this.initialized) {
      Iif (this.initError) {
        throw this.initError;
      }
      return;
    }
 
    this.initialized = true;
 
    try {
      const dictionary = await this.readPIIListsFromGCS();
 
      Iif (dictionary.length === 0) {
        this.initError = new Error('Empty dictionary for matcher');
        this.logger.error('Redaction service init failed: empty dictionary');
        return;
      }
 
      this.matcher = await Matcher.build(dictionary);
      this.logger.log(
        [ 'Redaction service initialized with ', String(dictionary.length), ' terms' ].join('')
      );
      try {
        const engine = this.matcher.isAutomatonEnabled() ?
          'Aho–Corasick automaton' :
          'sequential scan';
        this.logger.log([ 'Redaction engine: ', engine ].join(''));
      } catch {}
    } catch (error) {
      this.initError = error as Error;
      this.logger.error([ 'Redaction service init failed: ', String(error) ].join(''));
    }
  }
 
  async getService(): Promise<{ matcher: Matcher | null; error: Error | null }> {
    if (!this.initialized) {
      await this.initialize();
    }
    return { matcher: this.matcher, error: this.initError };
  }
 
  redactResponse(data: any, config: RedactionOptions | null | undefined): any {
    if (!config || !config.enabled || !this.matcher) {
      return data;
    }
 
    if (!config.keys || config.keys.length === 0) {
      return this.redactAllStrings(data);
    }
 
    return this.redactByKeys(data, config.keys);
  }
 
  private redactAllStrings(data: unknown): unknown {
    if (typeof data === 'string') {
      const genericRedacted = redactGeneric(data);
      return this.matcher!.redact(genericRedacted);
    }
    if (Array.isArray(data)) {
      return data.map((item) => this.redactAllStrings(item));
    }
    if (data && typeof data === 'object') {
      const result: Record<string, unknown> = {};
      for (const [ key, value ] of Object.entries(data as Record<string, unknown>)) {
        result[key] = this.redactAllStrings(value);
      }
      return result;
    }
    return data;
  }
 
  private redactByKeys(data: unknown, keys: string[]): unknown {
    const keySet = new Set(keys);
 
    const walk = (obj: unknown): unknown => {
      if (typeof obj === 'string') {
        const genericRedacted = redactGeneric(obj);
        return this.matcher!.redact(genericRedacted);
      }
      if (Array.isArray(obj)) {
        return obj.map((item) => walk(item));
      }
      if (obj && typeof obj === 'object') {
        const result: Record<string, unknown> = {};
        for (const [ key, value ] of Object.entries(obj as Record<string, unknown>)) {
          if (keySet.has(key)) {
            if (typeof value === 'string') {
              const genericRedacted = redactGeneric(value);
              result[key] = this.matcher!.redact(genericRedacted);
            } else {
              result[key] = this.redactAllStrings(value);
            }
          } else {
            result[key] = walk(value);
          }
        }
        return result;
      }
      return obj;
    };
 
    return walk(data);
  }
 
  private async readPIIListsFromGCS(): Promise<string[]> {
    const bucketName = process.env.MCP_PROXY_GCS_BUCKET?.trim();
    if (!bucketName) {
      throw new Error('MCP_PROXY_GCS_BUCKET environment variable is not set');
    }
 
    // Get file names from environment variable, default to names.txt,emails.txt
    const filesEnv = process.env.MCP_PROXY_GCS_FILES?.trim();
    const fileNames = filesEnv ?
      filesEnv.split(',').map((f) => f.trim()).filter((f) => f.length > 0) :
      [ 'names.txt', 'emails.txt' ];
 
    Iif (fileNames.length === 0) {
      throw new Error('MCP_PROXY_GCS_FILES must contain at least one file name');
    }
 
    const serviceAccountJSON = this.getServiceAccountJSON();
    if (!serviceAccountJSON) {
      throw new Error('MCP_PROXY_SERVICE_ACCOUNT(_B64) is not set or empty');
    }
 
    const storage = new Storage({
      credentials: JSON.parse(serviceAccountJSON)
    });
 
    const bucket = storage.bucket(bucketName);
 
    const readObject = async (objectName: string): Promise<string[]> => {
      const file = bucket.file(objectName);
      const [ exists ] = await file.exists();
      Iif (!exists) {
        throw new Error(
          [ 'Object ', objectName, ' does not exist in bucket ', bucketName ].join('')
        );
      }
 
      const [ contents ] = await file.download();
      const text = contents.toString('utf-8');
      const lines = text
        .split('\n')
        .map((line) => line.trim())
        .filter((line) => line.length > 0);
      return lines;
    };
 
    // Read all files and combine into a single dictionary
    const allTerms: string[] = [];
    for (const fileName of fileNames) {
      const terms = await readObject(fileName);
      allTerms.push(...terms);
    }
 
    return allTerms;
  }
 
  private getServiceAccountJSON(): string | null {
    const b64 = process.env.MCP_PROXY_SERVICE_ACCOUNT_B64?.trim();
    if (b64) {
      try {
        return Buffer.from(b64, 'base64').toString('utf-8');
      } catch (error) {
        throw new Error(
          [ 'Failed to base64-decode MCP_PROXY_SERVICE_ACCOUNT_B64: ', String(error) ].join('')
        );
      }
    }
 
    const raw = process.env.MCP_PROXY_SERVICE_ACCOUNT?.trim();
    if (!raw) {
      return null;
    }
 
    // Try to normalize private_key newlines when JSON is embedded via shell
    try {
      const parsed = JSON.parse(raw);
      Eif (parsed.private_key && typeof parsed.private_key === 'string') {
        const pk = parsed.private_key;
        if (pk.includes('\\n') && !pk.includes('\n')) {
          parsed.private_key = pk.replace(/\\n/g, '\n');
        }
      }
      return JSON.stringify(parsed);
    } catch (error) {
      throw new Error(
        [ 'Invalid service account JSON: ', String(error) ].join('')
      );
    }
  }
}