/* 
The PII Redaction Module will have the following functionality:

Named entity recognition: It will have a function that detects 
named entities in a given text. This can be implemented using 
Natural Language Processing (NLP) libraries like compromise.js, 
or simply by checking against a list of common names if we only 
want to detect and remove first names.

Replacement of PII: It will replace detected named entities with 
generic nicknames. We can use a list of generic nicknames for replacement.
*/

import nlp from 'compromise';
import maleNames from 'datasets-male-first-names-en';
import femaleNames from 'datasets-female-first-names-en';

import PiiNicknames from './PiiNicknames.js';

// Combine both male and female names into one array
const firstNames = [...maleNames, ...femaleNames];

// List of words and that should not be replaced.
// May add historical figures and public personalities in the future.
const doNotReplace = ["See", "Will", "Say", "Way", "Any", "My", "Son"];

// Create a list of very common English words that are unlikely to be names
const commonWords = [
  "Any", "My", "Son", "See", "Will", "Say",
  "Ace", "Aid", "Ally", "Amp", "Angel", "Ant", "Arch", "Arm", "Art", "Ash",
  "Bake", "Ball", "Band", "Bar", "Bark", "Bass", "Bat", "Bath", "Bear", "Belle",
  "Bill", "Bird", "Blaze", "Bolt", "Book", "Buck", "Bud", "Cane", "Cap", "Chip",
  "Clay", "Clove", "Coal", "Cove", "Dale", "Dash", "Dawn", "Dean", "Dice", "Dill",
  "Don", "Dove", "Drake", "Drew", "Duke", "Dusk", "Dust", "Fawn", "Fern", "Finn",
  "Fleet", "Flip", "Flow", "Gale", "Gem", "Gill", "Grace", "Grant", "Hale",
  "Hall", "Hart", "Herb", "Hope", "Hunt", "Ivy", "Jade", "Jazz",
  "Jet", "Jill", "Job", "Joy", "Lane", "Lark", "Lash", "Leaf", "Link",
  "Mace", "Mead", "Mint", "Moss", "Pace", "Palm", "Pat", "Penny"
];


// Function to replace a name with a nickname
const replaceNameWithNickname = (name) => {
  const nickname = PiiNicknames[Math.floor(Math.random() * PiiNicknames.length)];
  return name.replace(/./g, '') + nickname;
};

export const redactPii = (text) => {
  // Replace punctuation with spaces for PII detection, preserving the original text
  let punctuationAsSpaces = text.replace(/[.,;:!?(){}'"“”‘’—-]/g, ' ');

  // Temporarily hold strings from doNotReplace
  const tempReplacements = {};
  doNotReplace.forEach((name, index) => {
    const placeholder = `__TEMP${index}__`;
    const regex = new RegExp('\\b' + name + '\\b', 'gi'); 
    text = text.replace(regex, placeholder);
    punctuationAsSpaces = punctuationAsSpaces.replace(regex, placeholder);
    tempReplacements[placeholder] = name;
  });

  let processed = nlp(punctuationAsSpaces);
  let people = processed.people().out('array');

  // Iterate through the firstNames array and add names that pass the checks
  firstNames.forEach(name => {
    // Ignore single-letter names
    if (name.length === 1) return;

    // Ignore names that are in the commonWords list
    if (commonWords.includes(name)) return;

    // Add the name to the people array if the text includes the name
    const regex = new RegExp('\\b' + name + '\\b', 'gi');
    if (regex.test(punctuationAsSpaces)) {
      people.push(name);
    }
  });
  
    // Remove duplicates from the people array
    // Convert to lowercase for proper comparison, create a set to remove duplicates, 
    // then convert back to original case for replacement
    people = Array.from(new Set(people.map(name => name.toLowerCase()))).map(name => {
        const originalForm = people.find(origName => origName.toLowerCase() === name);
        return originalForm;
    });
  
    const replacements = [];
  
    // Replace each identified person with a nickname, unless they're in the doNotReplace list
    people.forEach((person) => {
      if (!doNotReplace.includes(person)) {
        const nickname = replaceNameWithNickname(person);
        const regex = new RegExp('\\b' + person + '\\b', 'gi');
        text = text.replace(regex, nickname);
        replacements.push({ original: person, replacement: nickname });
      }
    });
  
    // Restore the strings from doNotReplace
    for (let placeholder in tempReplacements) {
      text = text.replace(new RegExp(placeholder, 'g'), tempReplacements[placeholder]);
    }
  
    return {
      text: text,
      replacements: replacements,
      nicknameMap: replacements.reduce((map, item) => {
        map[item.original] = item.replacement;
        return map;
      }, {}),
    };
  };