import {
  getTranscriptWordsFromString,
  normalizeSpecialChars,
} from '@masala-lib/misc/editorial-string-utils';
import { NO_INDEX } from '@tikka/basic-types';
import { splitLines } from '@tikka/misc/string-utils';

export type MatchElement = {
  tags: string[];
  line: string;
  wordPos: number;
};

export type LINE_KIND = 'EMPTY_LINE' | 'PARAGRAPH' | 'SENTENCE';

export function getLineKind(line: string): LINE_KIND {
  if (line.startsWith('@')) {
    return 'PARAGRAPH';
  }
  if (!line.length) {
    return 'EMPTY_LINE';
  }
  return 'SENTENCE';
}

export function splitWords(line: string): string[] {
  return getTranscriptWordsFromString(line);
}

export function hasTag(melement: MatchElement, tag: string): boolean {
  return melement.tags.includes(tag);
}

export function hasTags(melement: MatchElement, tags: string[]): boolean {
  for (const tag of tags) {
    if (hasTag(melement, tag)) {
      return true;
    }
  }
  return false;
}

export function addTag(melement: MatchElement, tag: string) {
  melement.tags.push(tag);
}

export function filterMElementsWithTags(
  melements: MatchElement[],
  tags: string[]
) {
  return melements.filter(m => hasTags(m, tags));
}

export function normalizeScriptText(text: string): string {
  // text <- text.Replace('\ufeff', '')
  text = text.replace(/\ufeff/g, '');

  const endToken = 'STOP-HERE';
  let endTokenIndex = text.indexOf(endToken);
  if (endTokenIndex !== NO_INDEX) {
    // text <- text.[0 .. (endTokenIndex + endToken.Length + 1)]
    text = text.slice(0, endTokenIndex);
  } else {
    // failwith "missing STOP-HERE while importing script"
    throw new Error('missing STOP-HERE while importing script');
  }

  text = normalizeSpecialChars(text);

  // trim leading spaces from directive lines for matching
  // text <- subm @"^[ \t]{1,4}(?=[>%#/@=])" "" text
  text = text.replace(/^[ \t]{1,4}(?=[@])/gm, '');
  text = text.replace(/^\s*\/\/!\s*\S*/gm, '');
  // normalize empty lines
  // text <- sub @"[ \t]+\n" "\n" text  // TODO
  text = text.replace(/[ \t]+$/gm, '');

  // connect standalone -- to previous word via char that won't split
  // text <- subm @"\s+--\s*?$" "\u200b--\r" text
  text = text.replace(/\s+--\s*?$/gm, '\u200b--\r');
  // text <- sub @"\s+--\s+" "\u200b-- " text
  text = text.replace(/\s+--\s+/g, '\u200b-- ');

  // TODO currently stripping those google comment markers because they can land places that make it
  // impossible for parser design to work, consider if there is other approach
  // text <- sub @"\[[a-zA-Z\s]+?\]" "" text
  text = text.replace(/\[[a-zA-Z\s]+?\]/, '');
  return text;
}

export class ScriptScanner {
  text: string = null;
  lines: string[];
  _words: string[] = [];
  matchElements: MatchElement[] = [];

  constructor(text: string) {
    this.text = normalizeScriptText(text);
  }

  doScan(): void {
    this.lines = splitLines(this.text);
    let melement: MatchElement = null;
    for (const [index, line] of this.lines.entries()) {
      melement = null;
      const lineKind = getLineKind(line);
      const wordPos = this._words.length;
      if (lineKind === 'SENTENCE') {
        const words = splitWords(line);
        melement = {
          tags: [lineKind],
          line,
          wordPos,
        };
        this._words.push(...words);
      } else if (lineKind === 'PARAGRAPH') {
        melement = {
          tags: [lineKind],
          line,
          wordPos,
        };
      } else if (lineKind === 'EMPTY_LINE') {
        if (index < this.lines.length - 1) {
          if (getLineKind(this.lines[index + 1]) === 'SENTENCE') {
            melement = {
              tags: [lineKind, 'PARAGRAPH'],
              line,
              wordPos,
            };
          }
        }
      }
      if (melement) {
        this.matchElements.push(melement);
      }
    }
  }

  get words(): string[] {
    return this._words.map(w => w.replace(/\u200b/g, ' '));
  }
}
