import { PDFDocumentProxy, TextContentItem } from "pdfjs-dist";
import { range } from "lodash";

interface IEnhancedTextContentItem {
  item: TextContentItem;
  page: number;
  pageHeight: number;
  x: number;
  y: number;
}

interface IDataRowTestResult {
  code: string;
  note: string;
  nextStep?: number;
}

export interface IDataRow {
  key: string;
  stepNumber: string;
  checkDescription: string;
  testResults: {
    success?: IDataRowTestResult;
    failure?: IDataRowTestResult;
  };
}

interface ColumnBoundary {
  start: number;
  end: number;
}

interface RowBoundary extends ColumnBoundary {
  key: string;
}
const calculateItemSeparator = function(
  item1: IEnhancedTextContentItem,
  item2: IEnhancedTextContentItem
) {
  const diff = Math.abs(item1.y - item2.y);
  if (diff >= 1.5 * item1.item.height)
    return "\n".repeat(Math.round(item1.item.height / (diff / 1.5)));
  if (diff >= item1.item.height) {
    return " ";
  }
  return "";
};

export default async function analyze(
  document: PDFDocumentProxy,
  from: number,
  to: number
) {
  const columns: Array<ColumnBoundary> = [];
  const rows: Array<RowBoundary> = [];
  const dataRows: Array<IDataRow> = [];
  const headlines = ["Nr.", "Prüf schritt", "Prüf ergebnis", "Code", "Hinweis"];
  let pageWidth = 0;
  let pageHeight = 0;
  let tableStart = 0;
  let tableEnd = 0;

  const fixYCoordinates = (y: number, page: number) => {
    return pageHeight - y + page * pageHeight;
  };

  const textItems: Array<IEnhancedTextContentItem> = (
    await Promise.all(
      range(from, to + 1).map(async (pageNumber, index) => {
        const page = await document.getPage(pageNumber);
        if (!page) return [];
        if (index === 0) {
          pageWidth = page.view[3];
          pageHeight = page.view[2];
        }
        return (await page.getTextContent()).items
          .filter(
            item =>
              item.dir === "ltr" &&
              item.transform[1] === 0 &&
              item.transform[2] === 0 &&
              item.transform[5] < pageHeight - 300 &&
              item.transform[5] > 35
          )
          .map(item => ({
            item,
            page: pageNumber - from + 1,
            pageHeight: page.view[3],
            x: item.transform[4],
            y: fixYCoordinates(item.transform[5], pageNumber - from)
          }));
      })
    )
  ).reduce((acc, val) => acc.concat(val), []);

  headlines.forEach(headline => {
    const textItem = textItems.find(textItem => textItem.item.str === headline);
    if (!textItem) throw new Error(`Failed to find column ${headline}`);
    tableStart = textItem.y;
    columns.push({ start: textItem.x, end: -1 });
  });
  columns.forEach((column, index) => {
    if (index === columns.length - 1) return (column.end = pageWidth + 100);
    column.end = columns[index + 1].start;
  });

  textItems.sort((i1, i2) => {
    if (i1.y !== i2.y) {
      return i1.y < i2.y ? -1 : 1;
    }
    if (i1.x !== i2.x) {
      return i1.x < i2.x ? -1 : 1;
    }
    return 0;
  });

  const numbers = textItems.filter(
    textItem =>
      textItem.x >= columns[0].start &&
      textItem.x < columns[0].end &&
      textItem.y > tableStart &&
      textItem.item.str.trim()
  );

  for (const numberIndex in numbers) {
    const number = numbers[numberIndex];
    if (number.item.str.match(/^[0-9]+$/)) {
      const key = `${number.page}-${number.item.transform[5]}`;
      if (rows.findIndex(row => row.key === key) === -1) {
        rows.push({ start: number.y - 2, end: -1, key });
      }
    } else {
      tableEnd = number.y;
      if (numberIndex !== "0") break;
    }
  }

  if (!tableEnd) tableEnd = pageHeight * (to - from + 1);

  rows.forEach((_row, index) => {
    if (index < rows.length - 1)
      return (rows[index].end = rows[index + 1].start);
    rows[index].end = tableEnd;
  });

  // console.log("pageWidth", pageWidth);
  // console.log("pageHeight", pageHeight);
  // console.log("tableStart", tableStart);
  // console.log("tableEnds", tableEnd);
  // console.log("columns", columns);
  // console.log("Rows", rows);

  // Now we have all the data we need!

  const getCellItems = (rowIndex: number, columnIndex: number) => {
    return textItems.filter(
      textItem =>
        textItem.x >= columns[columnIndex].start &&
        textItem.x < columns[columnIndex].end &&
        textItem.y >= rows[rowIndex].start &&
        textItem.y < rows[rowIndex].end
    );
  };

  const getNumberCell = (rowIndex: number, columnIndex: number): string =>
    getCellItems(rowIndex, columnIndex)
      .map(textItem => textItem.item.str.trim())
      .filter(str => str.match(/^[0-9]+$/))
      .join("");

  const getTextCell = (rowIndex: number, columnIndex: number): string =>
    getCellItems(rowIndex, columnIndex)
      .map(textItem => textItem.item.str)
      .join("");

  const getMultilineTextCell = (
    rowIndex: number,
    columnIndex: number
  ): Array<string> => {
    const textLines: Array<string> = [];
    const items = getCellItems(rowIndex, columnIndex);

    const lines = Array.from(new Set(items.map(item => item.y)));
    lines.forEach(lineNumber => {
      textLines.push(
        items
          .filter(item => item.y === lineNumber)
          .reduce((acc, val) => acc + val.item.str, "")
      );
    });

    return textLines;
  };

  range(0, rows.length).forEach(row => {
    dataRows.push({
      key: String(row),
      stepNumber: getNumberCell(row, 0),
      checkDescription: "",
      testResults: {}
    });
  });
  range(0, rows.length).forEach(row => {
    dataRows[row].checkDescription = getTextCell(row, 1);
  });

  //const getTestResultContent = (rowIndex: number, colIndex: number) {}

  range(0, rows.length).forEach(row => {
    const items = getCellItems(row, 2);
    dataRows[row].checkDescription = getTextCell(row, 1);
    const successRow = items.find(item => item.item.str.match(/^.*j.*$/i));
    const failureRow = items.find(item => item.item.str.match(/^.*n.*$/i));
    if (successRow) {
      const success: IDataRowTestResult = { code: "", note: "" };
      const nextStep: string = items
        .filter(
          item =>
            item.y <= successRow.y + 3 &&
            item.y >= successRow.y - 3 &&
            item.item.str.match(/\d+/)
        )
        .reduce((acc, val) => {
          const m = val.item.str.match(/\d+/);
          if (m) return `${acc}${m[0]}`;
          return acc;
        }, "");
      if (nextStep) success.nextStep = Number(nextStep);
      dataRows[row].testResults.success = success;

      const code = getCellItems(row, 3)
        .filter(
          item => item.y <= successRow.y + 4 && item.y >= successRow.y - 4
        )
        .reduce((acc, val) => acc + val.item.str, "");
      if (code) dataRows[row].testResults.success!.code = code.trim();
      const noteItems: Array<IEnhancedTextContentItem> = getCellItems(row, 4);
      const start = successRow.y - 4;
      const end = failureRow
        ? failureRow.y < successRow.y
          ? rows[row].end
          : failureRow.y
        : rows[row].end;

      dataRows[row].testResults.success!.note = noteItems
        .filter(noteItem => noteItem.y >= start && noteItem.y < end)
        .map(
          (item, index) =>
            (index === 0
              ? ""
              : calculateItemSeparator(item, noteItems[index - 1])) +
            item.item.str
        )
        .join("")
        .trim();
    } else {
      console.warn("failed to parse test result cell");
    }
    if (failureRow) {
      const failure: IDataRowTestResult = { code: "", note: "" };
      const nextStep: string = items
        .filter(
          item =>
            item.y <= failureRow.y + 3 &&
            item.y >= failureRow.y - 3 &&
            item.item.str.match(/\d+/)
        )
        .reduce((acc, val) => {
          const m = val.item.str.match(/\d+/);
          if (m) return `${acc}${m[0]}`;
          return acc;
        }, "");
      if (nextStep) failure.nextStep = Number(nextStep);
      dataRows[row].testResults.failure = failure;

      const code = getCellItems(row, 3)
        .filter(
          item => item.y <= failureRow.y + 4 && item.y >= failureRow.y - 4
        )
        .reduce((acc, val) => acc + val.item.str, "");
      if (code) dataRows[row].testResults.failure!.code = code.trim();

      const noteItems: Array<IEnhancedTextContentItem> = getCellItems(row, 4);
      const start = failureRow.y - 4;
      const end = successRow
        ? failureRow.y > successRow.y
          ? rows[row].end
          : successRow.y
        : rows[row].end;

      dataRows[row].testResults.failure!.note = noteItems
        .filter(noteItem => noteItem.y >= start && noteItem.y < end)
        .map(
          (item, index) =>
            (index === 0
              ? ""
              : calculateItemSeparator(item, noteItems[index - 1])) +
            item.item.str
        )
        .join("")
        .trim();
    } else {
      console.warn("failed to parse test result cell");
    }
  });
  return dataRows;
}
