import { isEmpty, kebabCase } from 'lodash';
import ASCIIFolder from 'fold-to-ascii';
import { notEmpty } from '@utils/conditionals';
import { disdElipsis, elipsis, emdash } from '@utils/content-string-utils';
import {
  allPunctuationRegex,
  noQuotesPunctuation,
  punctuation,
  punctuationRegex,
  trimPunctuation,
  whitespaceRegex,
} from '@tikka/misc/string-utils';

// TODO remove disallowed chars from all the punctuation regex when we feel data is clean so won't effect chaat matching

export const slugify = (text: string): string => {
  // return kebabCase(text.replace(/&/g, '-and-'));
  return kebabCase(text);
};

//
// beware, largely duplicated in jwscript-scanner.fs (and falcon ?)
//

// content-string-utils
// const emdash = '\u2014';
// const elipsis = '\u2026';
// const disdElipsis = '\u22ef';

const emdashRegex = new RegExp(emdash, 'g');
const elipsisRegex = new RegExp(`[${elipsis}${disdElipsis}]`, 'g');

const disdAlways = '{}<>\\|`^~*'; // disallowed for both structural and vertabim text
const disdOnlyVerbatim = '[]=+'; // only disallowed for vertabim (aside from smart quotes)

// https://unicode-table.com/en/sets/quotation-marks/
const disdDoubleQuotes = '\u201c\u201d\u201f\u275d\u275e\u301d\u301e';
const disdSingleQuotes = '\u2018\u2019\u201b\u275b\u275c\u275f';
// lower quotes - allowed for now - needed by german content
// lower double quotes '\u201e\u2e42\u301f';
// lower single quotes '\u201a';

const autoTransformChars = `${emdash}${elipsis}${disdElipsis}${disdDoubleQuotes}${disdSingleQuotes}\n\r`;

const disdDoubleQuotesRegex = new RegExp(`[${disdDoubleQuotes}]`, 'g');
const disdSingleQuoteRegex = new RegExp(`[${disdSingleQuotes}]`, 'g');

// export const otherDisallowedChars =
//   /[{}<>\\|`^~*\u201c\u201d\u2018\u2019\u2026\u22ef\u201e\u201f\u275d\u275e\u2e42\u301d\u301e\u301f\u201a\u201b\u275b\u275c\u275f\n]/;
// for structural content
export const otherDisallowedChars = new RegExp(
  `${disdAlways}${autoTransformChars}`
);

// export const verbatimDisallowedChars =
//   /[[\]{}=<>\\|`^~+*\u201c\u201d\u2018\u2019\u2026\u22ef\u201e\u201f\u275d\u275e\u2e42\u301d\u301e\u301f\u201a\u201b\u275b\u275c\u275f\n]/;
export const verbatimDisallowedChars = new RegExp(
  `${disdOnlyVerbatim}${disdAlways}${autoTransformChars}`
);

const standaloneEmDashRegex = /\s+--\s+/g;
const joinedEmDashRegex = /---/g;
// TODO make complete equivalence of punctuation here with punctuation in jw_script_processor.py
// const standalonePunctuationRegexOld =
//   /\s+([!"'()+,\-./:;<=>?[\]^_`{|}~¡¿—–]+)\s+/g;
const standalonePunctuationRegex = new RegExp(
  `\\s+([${punctuation}]+)\\s+`,
  'g'
);

// const trailingStandalonePunctuationRegexOld =
//   /\s+([!"'()+,\-./:;<=>?[\]^_`{|}~¡¿—–]+)\s+$/g;
const trailingStandalonePunctuationRegex = new RegExp(
  `\\s+([${punctuation}]+)\\s+$`,
  'g'
);

const startingWhitespaceRegex = /^\s+/g;
const trailingWhitespaceRegex = /\s+$/g;
const chapterNoteOrderRegex = /^\s*\[\s*([^\]]*)\]\s*/;
const quotesRegex = /['"]/g;

// const noQuotesTrailingPunctuationRegexOld =
//   /([!()+,\-./:;<=>?[\]^_`{|}~¡¿—–]+)\s*$/g;
const noQuotesTrailingPunctuationRegex = new RegExp(
  `([${noQuotesPunctuation}]+)\\s*$`,
  'g'
);

// const noQuotesLeadingPunctuationRegexOld =
//   /^([!()+,\-./:;<=>?[\]^_`{|}~¡¿—–]+)\s*/g;
const noQuotesLeadingPunctuationRegex = new RegExp(
  `^([${noQuotesPunctuation}]+)\\s*`,
  'g'
);

export function hasVerbatimDisallowedChars(str: string) {
  return !!str.match(verbatimDisallowedChars);
}

export function hasOtherDisallowedChars(str: string) {
  return !!str.match(otherDisallowedChars);
}

export function hasLeadingTrailingWhitespace(str: string) {
  if (!str) {
    return false;
  }
  if (str.match(startingWhitespaceRegex)) {
    return true;
  }
  if (str.match(trailingWhitespaceRegex)) {
    return true;
  }
  return false;
}

export function normalizeSpecialChars(str: string) {
  if (!str) {
    return str;
  }
  return str
    .trim()
    .replace(emdashRegex, '--')
    .replace(elipsisRegex, '...')
    .replace(disdDoubleQuotesRegex, '"')
    .replace(disdSingleQuoteRegex, "'")
    .replace(/[ \u00a0]+/g, ' ');
}

// content-string-utils
// export function toUnicodePunctuation<T extends string | StringToString>(
//   obj: T
// ): T {
//   if (typeof obj === 'string') {
//     return toUnicodePunctuationStr(obj) as T; // assume we're not dealing with subclassed arg
//   } else {
//     return toUnicodePunctuationObj(obj as StringToString) as T;
//   }
// }

// export function toUnicodePunctuationObj(obj: StringToString): StringToString {
//   const result: StringToString = {};
//   for (const key in obj) {
//     result[key] = toUnicodePunctuationStr(obj[key]);
//   }
//   return result;
// }

// export function toUnicodePunctuationStr(str: string): string {
//   // avoid barfage from word group content map which might include boolean props
//   if (!str || !((str as any) instanceof String)) {
//     return str;
//   }
//   return normalizeSpecialChars(str) // not really expecting any special chars at this stage any more, but being paranoid for old, dirty data
//     .replace(/--/g, emdash)
//     .replace(/\.\.\./g, elipsis);
// }

export function stripAllPunctuation(str: string): string {
  if (!str) {
    return str;
  }
  return str.replace(allPunctuationRegex, '');
}

// content-string-utils
// export function stripUnderscores(str: string): string {
//   if (!str) {
//     return str;
//   }
//   return str.replace(/_/g, '');
// }

// content-string-utils
// export function stripLinefeeds(str: string) {
//   if (!str) {
//     return str;
//   }
//   return str.replace(/\n/g, '').replace(/\r/g, '');
// }

// content-string-utils
// // used by the player data generation to transform em-dash and ellipsis markdown chars and strip
// // the underscores (italics assumed to be handled separately)
// export function transformTranscriptText(str: string): string {
//   return stripUnderscores(toUnicodePunctuation(str));
// }

// content-string-utils
// used for structural content and translation player data generation
// export function transformPlayerText(str: string): string {
//   if (isEmpty(str)) {
//     return '';
//   }
//   return stripLinefeeds(toUnicodePunctuation(str));
// }

export function joinStandaloneEmDashes(str: string) {
  if (!str) {
    return str;
  }
  // TODO with replacing with space hyphenated words may count as two words, what is correct?
  return str.replace(standaloneEmDashRegex, '--- ');
}

export function restoreEmDashes(str: string) {
  if (!str) {
    return str;
  }
  return str.replace(joinedEmDashRegex, ' --');
}

export function joinStandalonePunctuation(str: string) {
  if (!str) {
    return str;
  }
  str = str.replace(standalonePunctuationRegex, '$1 ');
  // TODO not sure will handle linefeed correctly, look JS regex doc and test
  return str.replace(trailingStandalonePunctuationRegex, '$1');
  // TODO need to handle standalone quotes " ' differently open should join with following and closing with preceeding
  // TODO or should just have alarm and message when trying save edit with standalone quotes to simplify implementation?
}

export function normalizeWhiteSpace(str: string) {
  if (!str) {
    return str;
  }
  str = str.replace(whitespaceRegex, ' ');
  if (str === ' ') {
    str = '';
  }
  str = str.replace(startingWhitespaceRegex, '');
  return str.replace(trailingWhitespaceRegex, '');
}

export function normalizePunctuation(str: string) {
  str = joinStandaloneEmDashes(str);
  return joinStandalonePunctuation(str);
}

export function normalizeTranscriptText(str: string) {
  str = normalizePunctuation(str);
  return normalizeWhiteSpace(str);
}

export function validateTranscriptText(str: string) {
  // TODO check for standalone quotes or other things not dealt with by automatic normalization
}

function getWords(str: string) {
  return str.split(' ');
}

export function getTranscriptWordsFromString(str: string) {
  if (!str) {
    return [];
  }
  str = normalizeTranscriptText(str);
  if (!str) {
    return [];
  }
  const words = getWords(str);
  return words.map(word => restoreEmDashes(word));
}

export function trimWordGroupUsagePunctuation(str: string) {
  if (!str) {
    return str;
  }
  const result = trimPunctuation(str);
  if (result.length === str.length) {
    return str; // nothing is changed, return original
  }
  const numInputQuotes = (str.match(quotesRegex) || []).length;
  if (!numInputQuotes) {
    return result; // there were no quotes, return result
  }
  const numOutputQuotes = (result.match(quotesRegex) || []).length;
  if (!numOutputQuotes) {
    return result; // there are no quotes in output, cannot be bad symmetry of quotes, return result
  }
  if (numInputQuotes % 2 === numOutputQuotes % 2) {
    return result; // result did not result in symmetry of quotes different from input, return result
  }
  // deal with special case where result introduced new dissymmetry of quotes by not stripping any quotes
  str = str.replace(noQuotesLeadingPunctuationRegex, '');
  str = str.replace(noQuotesTrailingPunctuationRegex, '');
  return str;
}

export function strongNormalizeWord(word: string): string {
  // don't barf if null or undefined
  if (isEmpty(word)) {
    return ''; //word; // perhaps return ''?
  }
  word = word.replace(punctuationRegex, '');
  word = ASCIIFolder.foldReplacing(word);
  // looks like have issue with german punctuation persisting through first punctuation removal
  // now is ascii so can remove, maybe don't need first removal?
  word = word.replace(punctuationRegex, '');
  // note, the 'trim' is currently needed to handle some edgecase related to the em-dash hack
  return word.toLowerCase().trim();
}

export function strongNormalizeWordArray(words: string[]) {
  return words.map(word => strongNormalizeWord(word));
}

// true if the first character is the only capitalized character in the string
// used to determine if we should automatically downcase the vocab usage text
export const onlyFirstCapitalized = (text: string): boolean => {
  if (isEmpty(text)) {
    return false;
  }
  const trimmed = trimPunctuation(text);
  const firstLowered = trimmed[0].toLocaleLowerCase() + trimmed.slice(1);
  return firstLowered === trimmed.toLocaleLowerCase();
};

// is this really the best solution??
// https://stackoverflow.com/questions/31712808/how-to-force-javascript-to-deep-copy-a-string
export const forceCopyString = (original: string): string => {
  return (' ' + original).slice(1);
};

export function stringArraysAreEqual(a: string[], b: string[]): boolean {
  if (a.length !== b.length) {
    return false;
  }

  for (const [index, value] of a.entries()) {
    if (b[index] !== value) {
      return false;
    }
  }
  return true;
}

export const notEmptyOrNA = (text: string): boolean => {
  return notEmpty(text) && text.toUpperCase() !== 'N/A';
};

export const isNA = (text: string): boolean => {
  return notEmpty(text) && text.toUpperCase() === 'N/A';
};

export const fetchJson = async (url: string): Promise<any> => {
  if (isEmpty(url)) {
    throw Error('fetchJson: missing source url');
  }
  const response = await global.fetch(url);
  const text = await response.text();
  const json = await JSON.parse(text);
  return json;
};

export function stripChapterNoteOrderAnnotation(str: string): string {
  if (!str) {
    return str;
  }
  return str.replace(chapterNoteOrderRegex, '');
}

export function getChapterNoteOrderAnnotation(str: string): string {
  if (!str) {
    return str;
  }
  const match = str.match(chapterNoteOrderRegex);
  return match ? match[1].trim() : null;
}
