import TurndownService from "turndown";

import getRules from "./turndownRules";
import { HARD_BREAK, INLINE_NODES, SUBJECT_MARKDOWN, replaceHardbreaksPlaceHolder } from "./util";

export default function htmlToMarkdown(input: string) {
  const html = new DOMParser().parseFromString(input, "text/html");

  const subject = html.querySelector("subject");

  const subjectText = subject?.textContent;

  subject?.remove();

  const subjectMarkDown = subject ? `${SUBJECT_MARKDOWN} ${subjectText || ""}\n` : "";

  return (
    subjectMarkDown +
    replaceHardbreaksPlaceHolder(
      removeTrailingHardBreak(turndownService.turndown(parseHTML(html.body.innerHTML)))
    )
  );
}

const { blockquote, fencedCodeBlock, mentions, paragraphsInsideListItems, strikethrough } =
  getRules();

const turndownService = new TurndownService({
  br: HARD_BREAK,
  codeBlockStyle: "fenced",
  headingStyle: "atx",
})
  .addRule("Blockquote", blockquote)
  .addRule("ParagraphsInsideListItems", paragraphsInsideListItems)
  .addRule("Mentions", mentions)
  .addRule("FencedCodeBlock", fencedCodeBlock)
  .addRule("Strikethrough", strikethrough);

// Preserve empty lines and hard breaks
const parseHTML = (html: string) => parseEmpty(preserveHardBreaks(html));

const isEmptyParagraph = (el: Element) => el.tagName === "P" && el.innerHTML === "";

function preserveHardBreaks(htmlString: string) {
  const html = new DOMParser().parseFromString(htmlString, "text/html");

  // TODO figure out why this query is not working "br:first-of-type:not(blockquote *, li *)" - need to check installed packages
  Array.from(html.body.querySelectorAll("br:first-of-type"))
    .reduce<{ newParent: HTMLElement; parent: HTMLElement }[]>((acc, br) => {
      const inlineAncestor = findInlineAncestor(br);
      const parent = inlineAncestor ? inlineAncestor.parentElement : br.parentElement;
      const isLastBreak = isLastChildHardBreak(parent);
      const newParent = document.createElement("div");

      // Todo remove once this query "br:first-of-type:not(blockquote *)" works in tests
      if (parent?.parentElement?.tagName === "BLOCKQUOTE") return acc;
      if (parent?.parentElement?.tagName === "LI") return acc;

      if (isLastBreak) appendBreak(newParent);

      /* istanbul ignore next */
      if (!parent) return acc; // just a type guard

      const elements: ArrayMinLength<HTMLElement, 1> = [document.createElement("P")];

      let element = elements[0];

      Array.from(parent.childNodes).forEach(child => {
        if (child instanceof Element && isInlineNode(child.nodeName)) {
          const closestParent = child.querySelector("br")?.parentElement;
          const leadingHardbreaks: ChildNode[] = [];
          const trailingHardbreaks: ChildNode[] = [];
          let hasText = false;

          for (const childNode of [...(closestParent?.childNodes || [])]) {
            if (childNode.nodeName === "BR") {
              !hasText && leadingHardbreaks.push(childNode);
              hasText && trailingHardbreaks.push(childNode);

              continue;
            }

            hasText = true;
          }

          leadingHardbreaks.length > 2 && appendBreak(element);

          leadingHardbreaks.forEach(br => element.appendChild(br));

          element.appendChild(child);

          trailingHardbreaks.length > 2 && appendBreak(element);

          trailingHardbreaks.forEach(br => element.appendChild(br));

          return;
        }

        if (child.nodeName === "BR" && !isLastBreak) {
          element = document.createElement("P");

          elements.push(element);

          return;
        }

        element.appendChild(child);
      });

      elements.forEach(el => newParent.appendChild(el));

      acc.push({ newParent, parent });

      return acc;
    }, [])
    .forEach(obj => obj.parent.parentElement?.replaceChild(obj.newParent, obj.parent));

  // Insert hack nodes in paragraphs that are nested in block quotes
  // where ProseMirror would insert a "ProseMirror-trailingBreak"
  Array.from(html.body.querySelectorAll("blockquote > p")).forEach(
    p => (isLastChildHardBreak(p) || isEmptyParagraph(p)) && appendBreak(p)
  );

  return html.body.innerHTML;
}

/**
 * To preserve empty paragraphs.
 *
 * To prevent turndown from removing empty lines we insert a hardbreak.
 *
 * @param htmlString
 * @returns string
 */
function parseEmpty(htmlString: string) {
  const html = new DOMParser().parseFromString(htmlString, "text/html");

  // Remove empty elements
  Array.from(html.body.querySelectorAll("*")).forEach(el => {
    if (el.innerHTML !== "") return;

    if (el.nodeName === "BR") return;

    if (el.nodeName === "HR") return;

    if (isEmptyParagraph(el)) {
      appendBreak(el);
      return;
    }

    el.parentElement?.removeChild(el);
  });

  return html.body.innerHTML;
}

function isLastChildHardBreak(node: Element | null) {
  return node?.lastChild?.nodeName === "BR";
}

function isInlineNode(nodeName = "") {
  return INLINE_NODES.includes(nodeName || "");
}

function findInlineAncestor(child: Element): HTMLElement | null {
  let node = child.parentElement;
  while (node !== null) {
    if (isInlineNode(node.nodeName) && !isInlineNode(node.parentElement?.nodeName)) {
      return node;
    }
    node = node.parentElement;
  }
  return null;
}

function appendBreak(el: Element) {
  const br = document.createElement("br");

  el.appendChild(br);
}

/**
 * Removing trailing hard break from Remirror TrailingNodeExtension.
 *
 * @param input
 * @returns string
 */
function removeTrailingHardBreak(input: string): string {
  const br = `\n\n${HARD_BREAK}`;
  const brLength = br.length;
  const hasBrAt = (pos: number) => input.indexOf(br, pos) === pos;
  const start = 0;
  let end = input.length;

  if (brLength > end) return input;

  while (end > start && hasBrAt(end - brLength)) end -= brLength;

  return end < input.length ? input.substring(start, end) : input;
}
