/*
 * Decompiled with CFR 0.152.
 */
package ro.sync.textsearch.html;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import org.apache.lucene.document.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ro.sync.textsearch.FieldConstants;
import ro.sync.textsearch.html.LuceneDocumentCloner;

public class BufferedEmbedder {
    private static final Logger logger = LoggerFactory.getLogger((String)BufferedEmbedder.class.getName());
    public static final int MAX_TOKENS = 7500;
    private List<Document> toEmbed = new ArrayList<Document>();
    private int indexed = 0;
    private int estimatedTokenCountInCurrentBuffer = 0;
    private Consumer<Document> documentConsummer;
    private int maxTokens;
    private int maxChunkChars;

    public BufferedEmbedder(Consumer<Document> documentConsummer, String uuid) {
        this(documentConsummer, 7500, uuid);
    }

    public BufferedEmbedder(Consumer<Document> documentConsummer, int maxTokens, String uuid) {
        this.documentConsummer = documentConsummer;
        this.maxTokens = maxTokens;
        this.maxChunkChars = maxTokens * 3;
    }

    void addDocument(Document doc) throws IOException, EmbeddingsTokensMaxLimitExceededException {
        int estimation = BufferedEmbedder.getTokenNumberEstimations(doc);
        if (this.estimatedTokenCountInCurrentBuffer >= this.maxTokens || this.estimatedTokenCountInCurrentBuffer + estimation > this.maxTokens) {
            this.flush();
        }
        if (estimation > this.maxTokens) {
            String chunk;
            String content = doc.get("text");
            for (int pos = 0; pos < content.length(); pos += chunk.length()) {
                chunk = BufferedEmbedder.getChunk(content, pos, this.maxChunkChars);
                Document newDoc = this.createChunkDocument(doc, chunk);
                this.toEmbed.add(newDoc);
                estimation = BufferedEmbedder.getTokenNumberEstimations(newDoc);
                this.estimatedTokenCountInCurrentBuffer += estimation;
                if (this.estimatedTokenCountInCurrentBuffer + estimation <= this.maxTokens) continue;
                logger.debug("Flushing in chunk {} docs.", (Object)this.toEmbed.size());
                this.flush();
            }
        } else {
            this.toEmbed.add(doc);
            this.estimatedTokenCountInCurrentBuffer += estimation;
            logger.debug("Adding normally, total so far {}", (Object)this.estimatedTokenCountInCurrentBuffer);
        }
    }

    private Document createChunkDocument(Document doc, String chunk) {
        return LuceneDocumentCloner.cloneDocumentWithContent(doc, chunk);
    }

    public static int getTokenNumberEstimations(Document doc) {
        int estimate = 0;
        for (String fieldName : FieldConstants.EMBEDDABLE_FIELDS) {
            estimate += BufferedEmbedder.getTokenNumberEstimations(doc.get(fieldName));
        }
        return estimate;
    }

    static String getChunk(String content, int pos, int sizeLimit) {
        int nextPos = pos + sizeLimit;
        int len = content.length();
        if (nextPos > len) {
            nextPos = len;
        } else {
            int nlPos = content.lastIndexOf(10, nextPos);
            if (nlPos > pos) {
                nextPos = nlPos + 1;
            }
        }
        return content.substring(pos, nextPos);
    }

    private static int getTokenNumberEstimations(String content) {
        int estimate = 0;
        if (content != null) {
            estimate = content.length() / 4 + 1;
        }
        return estimate;
    }

    public void flush() throws IOException, EmbeddingsTokensMaxLimitExceededException {
        try {
            if (!this.toEmbed.isEmpty()) {
                for (Document doc : this.toEmbed) {
                    this.documentConsummer.accept(doc);
                    ++this.indexed;
                }
            }
        }
        finally {
            this.cleanup();
        }
    }

    private void cleanup() {
        this.toEmbed.clear();
        this.estimatedTokenCountInCurrentBuffer = 0;
    }

    public int getIndexedCount() {
        return this.indexed;
    }

    public void addDocuments(List<Document> docs) throws IOException, EmbeddingsTokensMaxLimitExceededException {
        for (Document document : docs) {
            this.addDocument(document);
        }
    }

    public static class EmbeddingsTokensMaxLimitExceededException
    extends Exception {
        public EmbeddingsTokensMaxLimitExceededException(String message) {
            super(message);
        }

        public EmbeddingsTokensMaxLimitExceededException(String message, EmbeddingsTokensMaxLimitExceededException ex) {
            super(message, ex);
        }
    }
}

