/*
 * Decompiled with CFR 0.152.
 */
package ro.sync.textsearch.webhelp;

import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ro.sync.textsearch.FieldConstants;
import ro.sync.textsearch.html.BufferedEmbedder;
import ro.sync.textsearch.webhelp.embeddings.IEmbedding;
import ro.sync.textsearch.webhelp.embeddings.IEmbeddingProvider;

public class DocumentEmbeddingProcessor {
    private static final Logger logger = LoggerFactory.getLogger(DocumentEmbeddingProcessor.class);
    private final IEmbeddingProvider embeddingProvider;

    public DocumentEmbeddingProcessor(IEmbeddingProvider embeddingProvider) {
        this.embeddingProvider = embeddingProvider;
    }

    public void computeEmbeddings(Document[] documents) {
        ArrayList<Document> batchDocuments = new ArrayList<Document>();
        int estimatedTokenCountInCurrentBuffer = 0;
        for (Document document : documents) {
            int estimation = BufferedEmbedder.getTokenNumberEstimations(document);
            if (this.shouldFlushBuffer(estimatedTokenCountInCurrentBuffer, estimation)) {
                batchDocuments.add(document);
                this.processBatch(batchDocuments);
                estimatedTokenCountInCurrentBuffer = 0;
                batchDocuments.clear();
                continue;
            }
            batchDocuments.add(document);
            estimatedTokenCountInCurrentBuffer += estimation;
        }
        this.processBatch(batchDocuments);
    }

    private boolean shouldFlushBuffer(int currentBufferSize, int newDocumentTokens) {
        return currentBufferSize >= 7500 || currentBufferSize + newDocumentTokens > 7500;
    }

    private void processBatch(List<Document> batchDocuments) {
        if (batchDocuments.isEmpty()) {
            return;
        }
        EmbeddableContent embeddableContent = this.extractEmbeddableContent(batchDocuments);
        List<? extends IEmbedding> embeddings = this.embeddingProvider.createEmbeddings(embeddableContent.contentList);
        this.assignEmbeddingsToDocuments(batchDocuments, embeddings, embeddableContent.emptyContentIndices);
    }

    private EmbeddableContent extractEmbeddableContent(List<Document> documents) {
        ArrayList<String> contentList = new ArrayList<String>();
        ArrayList<Integer> emptyContentIndices = new ArrayList<Integer>();
        for (int docIndex = 0; docIndex < documents.size(); ++docIndex) {
            Document document = documents.get(docIndex);
            for (int fieldIndex = 0; fieldIndex < FieldConstants.EMBEDDABLE_FIELDS.size(); ++fieldIndex) {
                String fieldName = FieldConstants.EMBEDDABLE_FIELDS.get(fieldIndex);
                String fieldValue = document.get(fieldName);
                if (!("text".equals(fieldName) && "keywords".equals(document.getField("__document_type__").stringValue()) || fieldValue == null || fieldValue.trim().isEmpty())) {
                    contentList.add(fieldValue);
                    continue;
                }
                emptyContentIndices.add(docIndex * FieldConstants.EMBEDDABLE_FIELDS.size() + fieldIndex);
            }
        }
        return new EmbeddableContent(contentList, emptyContentIndices);
    }

    private void assignEmbeddingsToDocuments(List<Document> documents, List<? extends IEmbedding> embeddings, List<Integer> emptyContentIndices) {
        if (embeddings == null || embeddings.isEmpty()) {
            logger.debug("The embeddings could not be computed for the following documents:");
            documents.forEach(doc -> logger.debug("DOC: {}", (Object)doc.getField("__uri__")));
            return;
        }
        int embeddingIndex = 0;
        for (int docIndex = 0; docIndex < documents.size(); ++docIndex) {
            Document document = documents.get(docIndex);
            for (int fieldIndex = 0; fieldIndex < FieldConstants.EMBEDDING_FIELDS.size(); ++fieldIndex) {
                if (emptyContentIndices.contains(docIndex * FieldConstants.EMBEDDING_FIELDS.size() + fieldIndex) || embeddingIndex >= embeddings.size()) continue;
                String fieldName = FieldConstants.EMBEDDING_FIELDS.get(fieldIndex);
                float[] vector = embeddings.get(embeddingIndex).getVector();
                if (vector != null) {
                    document.add((IndexableField)new KnnFloatVectorField(fieldName, vector, VectorSimilarityFunction.DOT_PRODUCT));
                } else if (logger.isDebugEnabled()) {
                    logger.debug("Null embedding for content:\n{}", (Object)embeddings.get(embeddingIndex));
                }
                ++embeddingIndex;
            }
            if (!"semantic".equals(document.getField("__document_type__").stringValue())) continue;
            document.removeField("text");
        }
    }

    private static final class EmbeddableContent {
        List<String> contentList;
        List<Integer> emptyContentIndices;

        public EmbeddableContent(List<String> contentList, List<Integer> emptyContentIndices) {
            this.contentList = contentList;
            this.emptyContentIndices = emptyContentIndices;
        }
    }
}

