import {uniq} from "lodash";

const excludedWords = ["and", "or", "on", "in"];

// list of English determiners (excluding compound determiners)
const excludedSentenceStartWords = [
  "a",
  "a few",
  "a little",
  "all",
  "another",
  "any",
  "both",
  "certain",
  "each",
  "either",
  "enough",
  "every",
  "few",
  "fewer",
  "fewest",
  "last",
  "least",
  "less",
  "little",
  "many",
  "many a",
  "more",
  "most",
  "much",
  "neither",
  "next",
  "no",
  "none",
  "once",
  "one",
  "said",
  "several",
  "some",
  "sufficient",
  "that",
  "the",
  "these",
  "this",
  "those",
  "three",
  "thrice",
  "twice",
  "two",
  "us",
  "various",
  "we",
  "what",
  "whatever",
  "which",
  "whichever",
  "you",
  "zero",
];

const uppecaseFirstLetterRegex = /^[A-Z]/;

/* eslint-disable no-continue */
function getCapitalizedWordsFromText(baseText) {
  const result = [];
  if (!baseText) {
    return result;
  }

  // ; is treated like dot
  const dotSeparatedText = baseText.split(/[.;\n]/gi).filter(item => item);
  dotSeparatedText.forEach(dotSegment => {
    const clearDotSegment = dotSegment.replace(/^[A-Z]?[\d.]{1,7} /, "");
    const commaSeparatedText = clearDotSegment.split(",").filter(item => item);
    commaSeparatedText.forEach((commaSegment, commaSegmentIndex) => {
      const punctuationSplittedText = commaSegment
        .split(/ +[^\w\s]|[^\w\s] +/gi)
        .filter(item => item);

      punctuationSplittedText.forEach(
        (punctuationSegment, punctuationSegmentIndex) => {
          const splittedText = punctuationSegment
            .split(/\s/)
            .filter(item => item);
          const firstWordLowercase = (splittedText[0] || "").toLowerCase();

          // "test" is used as dummy data for code not to break
          const secondWord = splittedText[1] || "test";
          const isSecondWordCapitalized =
            secondWord[0] === secondWord[0].toUpperCase();
          let startIndex =
            commaSegmentIndex === 0 && punctuationSegmentIndex === 0 ? 1 : 0;

          if (
            isSecondWordCapitalized &&
            !excludedSentenceStartWords.find(
              word => word === firstWordLowercase,
            )
          ) {
            startIndex = 0;
          }

          let accumulator = "";
          for (let i = startIndex; i < splittedText.length; i++) {
            const currentWord = clearWord(splittedText[i]);
            if (excludedWords.includes(currentWord.toLowerCase())) {
              continue;
            }
            if (uppecaseFirstLetterRegex.test(currentWord)) {
              accumulator = accumulator
                ? `${accumulator} ${currentWord}`
                : currentWord;
            } else if (accumulator) {
              result.push(accumulator);
              accumulator = "";
            }
          }
          if (accumulator) {
            result.push(accumulator);
          }
        },
      );
    });
  });
  return uniq(result);
}

function clearWord(text) {
  return text.replace(/(\(s\)?)|[?!"“”\(\)\[\]:]$/gi, "");
}

export default getCapitalizedWordsFromText;
