/*
 * Decompiled with CFR 0.152.
 */
package com.datmt.pdftools.service;

import java.io.File;
import java.io.IOException;
import java.text.Normalizer;
import java.util.regex.Pattern;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.text.PDFTextStripper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PdfTitleExtractor {
    private static final Logger logger = LoggerFactory.getLogger(PdfTitleExtractor.class);
    private static final Pattern INVALID_CHARS = Pattern.compile("[\\\\/:*?\"<>|]");
    private static final Pattern MULTIPLE_SPACES = Pattern.compile("\\s+");
    private static final Pattern CONTROL_CHARS = Pattern.compile("[\\x00-\\x1f\\x7f]");
    private static final Pattern NON_ASCII = Pattern.compile("[^\\x00-\\x7F]");
    private static final Pattern MULTIPLE_HYPHENS = Pattern.compile("-+");
    private static final int MAX_FILENAME_LENGTH = 200;
    private static final int MAX_FIRST_PAGE_CHARS = 500;

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    public String extractTitle(File pdfFile) {
        logger.debug("Extracting title from: {}", (Object)pdfFile.getName());
        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(pdfFile));){
            String title = this.extractFromMetadata(document);
            if (title != null && !title.isBlank()) {
                logger.debug("Found title in metadata: {}", (Object)title);
                String string = title.trim();
                return string;
            }
            title = this.extractFromFirstPage(document);
            if (title != null && !title.isBlank()) {
                logger.debug("Extracted title from first page: {}", (Object)title);
                String string = title.trim();
                return string;
            }
            logger.debug("No title found for: {}", (Object)pdfFile.getName());
            String string = null;
            return string;
        }
        catch (IOException e) {
            logger.warn("Failed to extract title from {}: {}", (Object)pdfFile.getName(), (Object)e.getMessage());
            return null;
        }
    }

    private String extractFromMetadata(PDDocument document) {
        String title;
        PDDocumentInformation info = document.getDocumentInformation();
        if (info != null && (title = info.getTitle()) != null && !title.isBlank()) {
            return this.cleanTitle(title);
        }
        return null;
    }

    private String extractFromFirstPage(PDDocument document) {
        try {
            if (document.getNumberOfPages() == 0) {
                return null;
            }
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setStartPage(1);
            stripper.setEndPage(1);
            String text = stripper.getText(document);
            if (text == null || text.isBlank()) {
                return null;
            }
            if (text.length() > 500) {
                text = text.substring(0, 500);
            }
            return this.extractTitleFromText(text);
        }
        catch (IOException e) {
            logger.debug("Failed to extract text from first page: {}", (Object)e.getMessage());
            return null;
        }
    }

    private String extractTitleFromText(String text) {
        String title;
        String[] lines = text.split("\\r?\\n");
        StringBuilder titleBuilder = new StringBuilder();
        int consecutiveEmptyLines = 0;
        for (String line : lines) {
            String trimmed = line.trim();
            if (trimmed.isEmpty()) {
                if (titleBuilder.length() <= 0 || ++consecutiveEmptyLines < 1) continue;
                break;
            }
            consecutiveEmptyLines = 0;
            if (this.isHeaderLine(trimmed)) continue;
            if (this.isAuthorLine(trimmed)) {
                if (titleBuilder.length() <= 0) continue;
                break;
            }
            if (trimmed.toLowerCase().startsWith("abstract")) break;
            if (titleBuilder.length() > 0) {
                titleBuilder.append(" ");
            }
            titleBuilder.append(trimmed);
            if (titleBuilder.length() > 150) break;
        }
        if ((title = titleBuilder.toString().trim()).length() < 5) {
            return null;
        }
        return this.cleanTitle(title);
    }

    private boolean isHeaderLine(String line) {
        String lower = line.toLowerCase();
        if (lower.contains("doi:") || lower.contains("doi.org")) {
            return true;
        }
        if (lower.contains("journal") || lower.contains("proceedings") || lower.contains("conference") || lower.contains("volume") || lower.contains("issue") || lower.contains("published") || lower.contains("received") || lower.contains("accepted") || lower.contains("arxiv") || lower.contains("preprint")) {
            return true;
        }
        if (line.matches(".*\\b(19|20)\\d{2}\\b.*") && line.length() < 30) {
            return true;
        }
        if (line.matches("^\\d+[-\u2013]\\d+$") || line.matches("^p\\.?\\s*\\d+.*")) {
            return true;
        }
        return lower.contains("copyright") || lower.contains("\u00a9") || lower.contains("all rights reserved");
    }

    private boolean isAuthorLine(String line) {
        String trimmed;
        String[] parts;
        if (line.contains(",") && line.split(",").length >= 2) {
            parts = line.split(",");
            int nameCount = 0;
            for (String part : parts) {
                trimmed = part.trim();
                if (!trimmed.matches("^[A-Z][a-z]+( [A-Z][a-z.]+){0,3}$")) continue;
                ++nameCount;
            }
            if (nameCount >= 2) {
                return true;
            }
        }
        if (line.toLowerCase().contains(" and ") && line.length() < 100 && (parts = line.toLowerCase().split(" and ")).length >= 2) {
            boolean allNames = true;
            for (String part : parts) {
                trimmed = part.trim();
                if (trimmed.split("\\s+").length <= 4) continue;
                allNames = false;
                break;
            }
            if (allNames) {
                return true;
            }
        }
        if (line.contains("@") && line.contains(".")) {
            return true;
        }
        String lower = line.toLowerCase();
        return lower.contains("university") || lower.contains("institute") || lower.contains("department") || lower.contains("laboratory") || lower.contains("school of");
    }

    private String cleanTitle(String title) {
        int lastSpace;
        if (title == null) {
            return null;
        }
        title = CONTROL_CHARS.matcher(title).replaceAll("");
        title = INVALID_CHARS.matcher(title).replaceAll(" ");
        title = MULTIPLE_SPACES.matcher(title).replaceAll(" ");
        title = title.trim();
        while (title.endsWith(".")) {
            title = title.substring(0, title.length() - 1).trim();
        }
        if (title.length() > 200 && (lastSpace = (title = title.substring(0, 200)).lastIndexOf(32)) > 150) {
            title = title.substring(0, lastSpace);
        }
        return title.trim();
    }

    public String normalizeFilename(String filename, NormalizationOptions options) {
        if (filename == null || options == null) {
            return filename;
        }
        String result = filename;
        if (options.isNormalizeToAscii()) {
            result = this.normalizeToAscii(result);
        }
        if (options.isReplaceSpacesWithHyphens()) {
            result = result.replace(' ', '-');
            result = MULTIPLE_HYPHENS.matcher(result).replaceAll("-");
            result = result.replaceAll("^-+|-+$", "");
        }
        if (options.isLowercase()) {
            result = result.toLowerCase();
        }
        return result;
    }

    private String normalizeToAscii(String input) {
        if (input == null) {
            return null;
        }
        String normalized = Normalizer.normalize(input, Normalizer.Form.NFD);
        normalized = normalized.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
        normalized = normalized.replace("\u00df", "ss").replace("\u00e6", "ae").replace("\u00c6", "AE").replace("\u0153", "oe").replace("\u0152", "OE").replace("\u00f8", "o").replace("\u00d8", "O").replace("\u0111", "d").replace("\u0110", "D").replace("\u0142", "l").replace("\u0141", "L").replace("\u00f1", "n").replace("\u00d1", "N");
        normalized = NON_ASCII.matcher(normalized).replaceAll("");
        return normalized;
    }

    public String generateFilename(String title) {
        return this.generateFilename(title, null);
    }

    public String generateFilename(String title, NormalizationOptions options) {
        if (title == null || title.isBlank()) {
            return null;
        }
        String filename = this.cleanTitle(title);
        if (filename == null || filename.isBlank()) {
            return null;
        }
        if (options != null) {
            filename = this.normalizeFilename(filename, options);
        }
        if (filename == null || filename.isBlank()) {
            return null;
        }
        return filename + ".pdf";
    }

    public String generateUniqueFilename(File directory, String baseFilename) {
        if (baseFilename == null) {
            return null;
        }
        File targetFile = new File(directory, baseFilename);
        if (!targetFile.exists()) {
            return baseFilename;
        }
        String nameWithoutExt = baseFilename.substring(0, baseFilename.length() - 4);
        for (int i = 1; i <= 999; ++i) {
            String numberedName = nameWithoutExt + " (" + i + ").pdf";
            targetFile = new File(directory, numberedName);
            if (targetFile.exists()) continue;
            return numberedName;
        }
        logger.warn("Could not find unique filename for: {}", (Object)baseFilename);
        return null;
    }

    public static class NormalizationOptions {
        private boolean replaceSpacesWithHyphens = false;
        private boolean normalizeToAscii = false;
        private boolean lowercase = false;

        public boolean isReplaceSpacesWithHyphens() {
            return this.replaceSpacesWithHyphens;
        }

        public NormalizationOptions setReplaceSpacesWithHyphens(boolean replaceSpacesWithHyphens) {
            this.replaceSpacesWithHyphens = replaceSpacesWithHyphens;
            return this;
        }

        public boolean isNormalizeToAscii() {
            return this.normalizeToAscii;
        }

        public NormalizationOptions setNormalizeToAscii(boolean normalizeToAscii) {
            this.normalizeToAscii = normalizeToAscii;
            return this;
        }

        public boolean isLowercase() {
            return this.lowercase;
        }

        public NormalizationOptions setLowercase(boolean lowercase) {
            this.lowercase = lowercase;
            return this;
        }
    }
}

