// ES6 because this runs inside the React app (in the buzzer)

import RIFFWAVE from './riffwave'

// this code very much gains inspiration from https://github.com/Acedio/animalese.js

// voice recording properties
const HEADER_LENGTH = 44;
const HERTZ = 8000;
const VOICE_LETTER_DURATION = 0.15;
const VOICE_SAMPLES_PER_LETTER = Math.floor(VOICE_LETTER_DURATION * HERTZ);
export const OUTPUT_LETTER_DURATION = 0.075;
const OUTPUT_SAMPLES_PER_LETTER = Math.floor(OUTPUT_LETTER_DURATION * HERTZ);

// estimated average time between Alex finishing reading the clue and the
// assisstant turning on the buzzers
export const POST_CLUE_DELAY = 130

// mapping non a-z phonetics to signal chars
const SIGNAL_START = 48
const LONG_A = 'A';
const LONG_E = 'E';
const LONG_I = 'I';
const LONG_O = 'O';
const OO = String.fromCharCode(SIGNAL_START + 1);
const SH = String.fromCharCode(SIGNAL_START + 2);
const CH = String.fromCharCode(SIGNAL_START + 3);
const TH = String.fromCharCode(SIGNAL_START + 4);
const WH = String.fromCharCode(SIGNAL_START + 5);
const OW = String.fromCharCode(SIGNAL_START + 6);
const OI = String.fromCharCode(SIGNAL_START + 7);
const AW = String.fromCharCode(SIGNAL_START + 8);

// this is how the voice file is organized
// phonetics for a-z first, then long vowels, then some digraphs and diphthongs
const Z_INDEX = inputCharIndex('z')
const charToVoiceIndex = new Map([
  [LONG_A, Z_INDEX + 1],
  [LONG_E, Z_INDEX + 2],
  [LONG_I, Z_INDEX + 3],
  [LONG_O, Z_INDEX + 4],
  [OO, Z_INDEX + 5],
  [CH, Z_INDEX + 6],
  [SH, Z_INDEX + 7],
  [TH, Z_INDEX + 8],
  [WH, Z_INDEX + 9],
  [OW, Z_INDEX + 10],
  [OI, Z_INDEX + 11],
  [AW, Z_INDEX + 12],
]);

function replaceChars(text) {
  return text.toLowerCase()
    .replace(/1/g, 'wun ')    // numbers get read one digit at a time
    .replace(/2/g, 'too ')
    .replace(/3/g, 'three ')
    .replace(/4/g, 'foar ')
    .replace(/5/g, 'fiev ')
    .replace(/6/g, 'six ')
    .replace(/7/g, 'seven ')
    .replace(/8/g, 'ayt ')
    .replace(/9/g, 'nien ')
    .replace(/0/g, 'oh ')
    .replace(/&/g, 'and ')
    .replace(/%/g, 'persent ')
    .replace(/\+/g, 'plus ')
    .replace(/_/g, 'blank ')
    .replace(/^\(.*?\)/, '')  // ignore prefix in parentheses
    .replace(/,/g, '   ')     // these get a longer pause
    .replace(/:/g, '   ')
    .replace(/;/g, '   ')
    .replace(/[^a-z ]/g, '')  // clear out everything else
}

// replace multi-char substrings with special 'signal' char representing the sound
function replacePhonetics(text) {
  return text
    .replace(/eigh/g, LONG_A)
    .replace(/igh/g, LONG_I)
    .replace(/ai/g, LONG_A)
    .replace(/ay/g, LONG_A)
    .replace(/ee/g, LONG_E)
    .replace(/ie/g, LONG_I)
    .replace(/oa/g, LONG_O)
    .replace(/oh/g, LONG_O)
    .replace(/oo/g, OO)
    .replace(/sh/g, SH)
    .replace(/ch/g, CH)
    .replace(/th/g, TH)
    .replace(/wh/g, WH)
    .replace(/ou/g, OW)
    .replace(/ow/g, OW)
    .replace(/oi/g, OI)
    .replace(/oy/g, OI)
    .replace(/au/g, AW)
    .replace(/aw/g, AW)
    .replace(/ph/g, 'f')
    .replace(/kn/g, 'n')
    .replace(/gn/g, 'n')
    .replace(/ck/g, 'k')
}

export function preprocessInput(text) {
  return replacePhonetics(replaceChars(text)).trim();
}

function inputCharIndex(ch) {
  return ch.charCodeAt(0) - 'a'.charCodeAt(0);
}

export default function toSpeech(voice, input, pitch) {
  const data = [];

  const text = preprocessInput(input);

  for (let inputIndex = 0; inputIndex < text.length; inputIndex++) {
    const c = text[inputIndex];

    if (c >= 'a' && c <= 'z') {
      // map "normal" single letter to its phonetic
      const voiceStart = VOICE_SAMPLES_PER_LETTER * inputCharIndex(c)

      for (let i = 0; i < OUTPUT_SAMPLES_PER_LETTER; i++) {
        data[inputIndex * OUTPUT_SAMPLES_PER_LETTER + i] = voice[HEADER_LENGTH + voiceStart + Math.floor(i * pitch)];
      }

    } else if (charToVoiceIndex.has(c)) {
      // map "special" char (originally representing multi-char sequence) to its phonetic
      const voiceStart = VOICE_SAMPLES_PER_LETTER * charToVoiceIndex.get(c)

      for (let i = 0; i < OUTPUT_SAMPLES_PER_LETTER; i++) {
        data[inputIndex * OUTPUT_SAMPLES_PER_LETTER + i] = voice[HEADER_LENGTH + voiceStart + Math.floor(i * pitch)];
      }

    } else {
      // everything else gets silence
      for (let i = 0; i < OUTPUT_SAMPLES_PER_LETTER; i++) {
        data[inputIndex * OUTPUT_SAMPLES_PER_LETTER + i] = 127;
      }
    }
  }

  const output = new RIFFWAVE();
  output.header.sampleRate = HERTZ;
  output.header.numChannels = 1;
  output.Make(data);
  
  return output;
}
