interface Entity {
  content: string;
  occurrences: number[];
}

interface RecognizedData {
  entities: Entity[];
  recognizer: string;
}

interface Data {
  [key: string]: RecognizedData[];
}

export function groupByRecognizer(data: Data) {
  const groupedData: { [key: string]: RecognizedData[] } = {};

  for (const key in data) {
    const items = data[key];

    items.forEach((item) => {
      if (!groupedData[item.recognizer]) {
        groupedData[item.recognizer] = [
          { ...item, entities: [...item.entities] },
        ];
      } else {
        groupedData[item.recognizer][0].entities.push(...item.entities);
      }
    });
  }

  return groupedData;
}

export function mergeDuplicateEntities(groupedData: {
  [key: string]: RecognizedData[];
}): { [key: string]: RecognizedData[] } {
  const mergedData: { [key: string]: RecognizedData[] } = {};

  for (const recognizer in groupedData) {
    const items = groupedData[recognizer];
    const mergedEntities: { [content: string]: Entity } = {};

    items.forEach((item) => {
      item.entities.forEach((entity) => {
        if (mergedEntities[entity.content]) {
          const existingEntity = mergedEntities[entity.content];
          existingEntity.occurrences = Array.from(
            new Set([...existingEntity.occurrences, ...entity.occurrences])
          );
        } else {
          mergedEntities[entity.content] = { ...entity };
        }
      });
    });

    const finalEntities = Object.values(mergedEntities);
    mergedData[recognizer] = [
      { recognizer: items[0].recognizer, entities: finalEntities },
    ];
  }

  return mergedData;
}

interface SearchResult {
  word: string;
  position: Array<{
    pageIndex: string;
    occurrences: number[];
  }>;
}

export function findWordOccurrences(
  data: { [key: string]: string },
  searchWord: string
): SearchResult {
  const result: SearchResult = { word: searchWord, position: [] };

  for (const pageIndex in data) {
    const text = data[pageIndex];
    const occurrences: number[] = [];

    const words = text.split(/\s+/);
    words.forEach((word, index) => {
      if (word.replace(/[.,]/g, "") === searchWord) {
        occurrences.push(index);
      }
    });

    if (occurrences.length > 0) {
      result.position.push({ pageIndex, occurrences });
    }
  }

  return result;
}
