/**
 * Return the number of words in the data object
 *
 * @param {object} data - the Editor.js data object
 * @return {number} Number of words
 */

const REGEX_AMPERSAND_ENTITIES = /&[^;]+;/g;
const REGEX_HTML_TAGS = /<[^>]+>/g;
const REGEX_MID_WORD_PUNCTUATION = /['\-–,.]+/g;
const REGEX_NONLATIN_CHARACTERS = /[^\u0000-\u007F]+/g; // eslint-disable-line no-control-regex
const REGEX_PICTOGRAPH_CHARACTERS = /[^0-9a-zA-Zㄱ-힣+×÷=%♤♡☆♧)(*&^/~#@!-:;,?`_|<>{}¥£€$◇■□●○•°※¤《》¡¿₩[\]"' \\]/g;
const REGEX_SPACES = /&nbsp;/g;

export function countWords(string) {
  if (typeof string !== 'string') return 0;
  const text = string
    .replace(REGEX_NONLATIN_CHARACTERS, '')
    .replace(REGEX_PICTOGRAPH_CHARACTERS, '')
    .replace(REGEX_HTML_TAGS, '')
    .replace(REGEX_SPACES, ' ')
    .replace(REGEX_AMPERSAND_ENTITIES, '')
    .replace(REGEX_MID_WORD_PUNCTUATION, '')
    .trim();
  const match = text.match(/\s+/g);
  return match ? match.length + 1 : text.length ? 1 : 0;
}

/**
 * Count the number of words per content-type block
 * Note we only count text-based content types (paragraph, list, blockquote, etc...)
 * And exclude media types (youtube, tweet, iframe, etc...)
 * @param {object} block - article block object
 * @return {number} Number of words for this block
 */
export function numWordsPerBlock(block) {
  const blockTypes = ['blockquote', 'header', 'ol', 'paragraph', 'quote_indent', 'quote_pull', 'subheader', 'ul'];

  if (blockTypes.includes(block.type)) {
    if (block.data.items) return block.data.items.reduce((total, item) => total + countWords(item), 0);
    return countWords(block.data.text);
  }
  return 0;
}

export const getWordCount = (data) => {
  if (typeof data !== 'object') return 0;

  return data.blocks.reduce((total, block) => total + numWordsPerBlock(block), 0);
};
