import syllables from 'constants/syllables';
import {AUTO_IGNORE, POLYSYLLABLE_THRESHOLD} from 'constants/vars';

const wordRegex = new RegExp('[A-Z0-9](?:\\S*[A-Z0-9])*', 'gi');
const conjRegex = new RegExp('\\b(?:for|and|nor|but|or|yet|so|after|although|as|because|before|by the time|even if|even though|if|in order that|in case|lest|once|since|than|that|though|till|unless|until|when|whenever|where|wherever|while)\\b', 'gi');
const acronymRegex = new RegExp('\\b(([A-Z]\\.?){2,})\\b', 'g');
const acronymSeparatorRegex = new RegExp('\\.', 'g');
// const sentenceEndRegex = new RegExp('(?<!\\b(i\\.?e)|(e\\.?g))[.?!][\'\"]?(\\s|$)', 'g');
const sentenceEndRegex = new RegExp('(?<!\\b(?:(?:i\\.?e)|(?:e\\.?g)))(?:[.?!:][\'"]?(?:\\s|$))|\\n', 'g');
const questionEndRegex = new RegExp('\\?[\'"]?(\\s|$|\\n)');
const syllableRegex = new RegExp('[^aeiouy]*[aeiouy]+(?:[^aeiouy]*$|[^aeiouy](?=[^aeiouy]))?', 'gi');
const syllableAddRegex = new RegExp('i[ao](?!n)|ie[nt]|ios|ism', 'i');
const syllableSubtractRegex = new RegExp(
    '(?<![^aeiouy]t|[aeiouys]s|ir|i|sh|dg)(es)$|(?<!t|ir|d|[^aeiouy]i)(ed)$|(?<!ir|[^aeiouy]l)e$|dge',
    'i');
const urlRegex = new RegExp('[(http(s)?):\\/\\/(www\\.)?a-zA-Z0-9@:%._\\+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%_\\+.~#?&//=]*)', 'i');


/**
 * Count all commas (,) in a string.
 * @param str
 * @returns {number}
 */
export const commaCountInString = (str) => {
    // Not a string or empty string
    if (typeof str !== 'string' || !str?.trim()) {
        return 0;
    }

    // .match returns null with no results, hence the empty array
    return (str.match(/,/g) || []).length;
};

/**
 * Find all sentences in a string.
 * @param str
 * @param withIndices: if true, return array of objects including index of sentences' first char in string
 *                     if false, just return array of sentences
 * @returns [{index: Number, text: String}] array of sentences
 */
export const sentencesInString = (str, withIndices = false) => {
    if (!str.trim()) {
        return [];
    }

    const sentences = [];

    // We add a "fake" break at the end of the last full sentence
    // (used while typing mid sentence to count it even if it is not complete yet)
    const matches = [...str.matchAll(sentenceEndRegex), {index: str.length}];
    let start = 0;
    matches.forEach(match => {
        const end = match[0] ? match.index + match[0].trim().length : match.index;
        const text = str.slice(start, end);
        if (text.trim()) {
            sentences.push({text, index: start});
        }
        start = match.index + match[0]?.length;
    });
    if (withIndices) {
        return sentences;
    }
    return sentences.map(sentence => sentence.text);
};

/**
 * Determines if a sentence (usually from sentencesInString) is a question
 * @param sentence string sentence to check
 * @returns {boolean} true if is a question, else false
 */
export const sentenceIsQuestion = sentence => !!sentence.match(questionEndRegex);

/**
 * Find all words in a string
 * @param str
 * @param withIndices: if true, return array of objects including index of word's first char in string
 *                     if false, just return array of words
 * @returns array of words
 */
export const wordsInString = (str, withIndices = false) => {
    const matches = [...str.matchAll(wordRegex)];
    if (withIndices) {
        return matches;
    }
    return matches.map(match => match[0]);
};


/**
 * Find all conjunctions in a string
 * @param str
 * @param withIndices: if true, return array of objects including index of conjunctions's first char in string
 *                     if false, just return array of conjucnctions
 * @returns array of conjunctions
 */
export const conjInString = (str, withIndices = false) => {
    const matches = [...str.matchAll(conjRegex)];
    if (withIndices) {
        return matches;
    }
    return matches.map(match => match[0]);
};


/**
 * Find all syllables in a word (fallback if data/syllables.js does not contain the word)
 * @param word
 * @returns {number|*}
 */
export const syllableCountInWord = word => {
    // Check if its a url
    const isUrl = word.match(urlRegex);
    if (isUrl) {
        return 0;
    }

    // Check if its an acronym
    const isAcronym = word.match(acronymRegex);
    if (isAcronym) {
        return word.replace(acronymSeparatorRegex, '').length;
    }

    // If it isnt a url or an acronym,
    // First check if its in syllable dictionary
    let syllableCount = syllables[word.toLowerCase()];

    // Otherwise use regex to guess syllables
    if (typeof syllableCount === 'undefined') {
        syllableCount = 0;
        const syllableMatches = word.match(syllableRegex);
        if (syllableMatches) {
            syllableCount = syllableMatches.length;
            if (word.match(syllableSubtractRegex)) syllableCount--;
            if (word.match(syllableAddRegex)) syllableCount++;
        }
    }

    return syllableCount;
};

/**
 * Find all char/word/sentence counts for a string
 * @param contentState
 * @returns {{characters: number, sentences: number, uniqueWords: number, words: number, paragraphs: number}}
 */
export const stringCounts = contentState => {
    // Get full editor text, (lowercase for type-token ratios so case doesnt affect unique words)
    const str = contentState.getPlainText().toLowerCase();

    // Get all words in text
    const wordMatches = wordsInString(str);

    // Get all unique words from above list
    const uniqueWords = new Set(wordMatches);

    // Count paragraphs (i.e. non-empty blocks)
    let paragraphs = 0;
    contentState.getBlocksAsArray().forEach(({text}) => {
        if (text.trim()) {
            paragraphs++;
        }
    });

    return {
        characters: str.replace(/\n/g, '').length,
        uniqueWords: uniqueWords.size,
        words: wordMatches?.length || 0,
        sentences: sentencesInString(str, true).length,
        paragraphs
    };
};

/**
 * Calculate a bunch of things related to readability score (see SMOG: https://en.wikipedia.org/wiki/SMOG)
 * @param contentState = editor content state
 * @param excluded = string[] list of excluded words
 * @returns {object} readability score, sentenceCount, polysyllableCount used in smog formula + some other useful stats
 */
export const calculateReadability = (contentState, excluded) => {
    if (!excluded) excluded = [];
    // Make sure excluded words are lower case
    excluded = excluded.map(word => word.toLowerCase());

    const blocks = contentState.getBlocksAsArray();
    let polysyllableCount = 0;
    let sentenceCount = 0;
    let wordCount = 0; // Kinda unrelated but its more efficient to just count it here instead of its own function
    let excludeCount = 0; // Number of times an excluded word is seen

    blocks.forEach(block => {
        // Find all sentences in the block
        const sentences = sentencesInString(block.getText(), true);
        if (!sentences.length) return;

        sentences.forEach(sentence => {
            // Find all words in the sentence
            const words = wordsInString(sentence.text, true);
            if (!words.length) return;

            let includedSentence = 0;

            words.forEach(word => {
                const wordText = word[0];

                // Exclude words that are in the exclusion list
                if (excluded.includes(wordText.toLowerCase())) {
                    excludeCount++;
                    return;
                }


                // Then check if the word is part of an entity
                // TODO: only checking first char of the word, find a better way?
                //  works for now, if you can only ignore entire words and not parts of them
                const index = sentence.index + word.index;
                const entityKey = block.getEntityAt(index);

                const entityType = entityKey ? contentState.getEntity(entityKey).getType() : null;

                // IF word is ignored (i.e. due to entity or whole sentence has <=AUTO_IGNORE words)
                if (entityType === 'IGNORED' || (entityType !== 'INCLUDED' && words.length <= AUTO_IGNORE)) {
                    // move to next word
                    return;
                }

                // todo: bug? when a sentence is completely made up of words in the
                //  excluded list, the sentence is still counted - but should it be?
                includedSentence = 1; // if 1 or more words in this sentence are included, it is counted in sentenceCount

                // Add to the word count, doesnt matter if it passes the polysyllable check
                wordCount++;

                // Add a polysyllable if word meets threshold requirement
                if (syllableCountInWord(wordText) >= POLYSYLLABLE_THRESHOLD) {
                    polysyllableCount++;
                }
            });
            sentenceCount += includedSentence;

        });

    });
    let score = 0;
    if (sentenceCount > 0) {
        score = 1.043 * Math.sqrt(polysyllableCount * (30 / sentenceCount)) + 3.1291;
    }
    return {
        score: Math.round(score * 10) / 10,
        scoreNoRounded: score,
        polysyllableCount,
        sentenceCount,
        wordCount,
        excludeCount
    };
};
