/*
 * Decompiled with CFR 0.152.
 */
package de.l3s.boilerpipe.filters.heuristics;

import de.l3s.boilerpipe.BoilerpipeFilter;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;

public final class DocumentTitleMatchClassifier
implements BoilerpipeFilter {
    private final Set<String> potentialTitles;
    private static final Pattern PAT_REMOVE_CHARACTERS = Pattern.compile("[\\?\\!\\.\\-\\:]+");

    public DocumentTitleMatchClassifier(String title) {
        if (title == null) {
            this.potentialTitles = null;
        } else {
            title = title.replace('\u00a0', ' ');
            title = title.replace("'", "");
            if ((title = title.trim().toLowerCase()).length() == 0) {
                this.potentialTitles = null;
            } else {
                this.potentialTitles = new HashSet<String>();
                this.potentialTitles.add(title);
                String p = this.getLongestPart(title, "[ ]*[\\|\u00bb|-][ ]*");
                if (p != null) {
                    this.potentialTitles.add(p);
                }
                if ((p = this.getLongestPart(title, "[ ]*[\\|\u00bb|:][ ]*")) != null) {
                    this.potentialTitles.add(p);
                }
                if ((p = this.getLongestPart(title, "[ ]*[\\|\u00bb|:\\(\\)][ ]*")) != null) {
                    this.potentialTitles.add(p);
                }
                if ((p = this.getLongestPart(title, "[ ]*[\\|\u00bb|:\\(\\)\\-][ ]*")) != null) {
                    this.potentialTitles.add(p);
                }
                if ((p = this.getLongestPart(title, "[ ]*[\\|\u00bb|,|:\\(\\)\\-][ ]*")) != null) {
                    this.potentialTitles.add(p);
                }
                if ((p = this.getLongestPart(title, "[ ]*[\\|\u00bb|,|:\\(\\)\\-\u00a0][ ]*")) != null) {
                    this.potentialTitles.add(p);
                }
                this.addPotentialTitles(this.potentialTitles, title, "[ ]+[\\|][ ]+", 4);
                this.addPotentialTitles(this.potentialTitles, title, "[ ]+[\\-][ ]+", 4);
                this.potentialTitles.add(title.replaceFirst(" - [^\\-]+$", ""));
                this.potentialTitles.add(title.replaceFirst("^[^\\-]+ - ", ""));
            }
        }
    }

    public Set<String> getPotentialTitles() {
        return this.potentialTitles;
    }

    private void addPotentialTitles(Set<String> potentialTitles, String title, String pattern, int minWords) {
        String[] parts = title.split(pattern);
        if (parts.length == 1) {
            return;
        }
        int i = 0;
        while (i < parts.length) {
            int numWords;
            String p = parts[i];
            if (!p.contains(".com") && (numWords = p.split("[\b ]+").length) >= minWords) {
                potentialTitles.add(p);
            }
            ++i;
        }
    }

    private String getLongestPart(String title, String pattern) {
        String[] parts = title.split(pattern);
        if (parts.length == 1) {
            return null;
        }
        int longestNumWords = 0;
        String longestPart = "";
        int i = 0;
        while (i < parts.length) {
            int numWords;
            String p = parts[i];
            if (!(p.contains(".com") || (numWords = p.split("[\b ]+").length) <= longestNumWords && p.length() <= longestPart.length())) {
                longestNumWords = numWords;
                longestPart = p;
            }
            ++i;
        }
        if (longestPart.length() == 0) {
            return null;
        }
        return longestPart.trim();
    }

    @Override
    public boolean process(TextDocument doc) throws BoilerpipeProcessingException {
        if (this.potentialTitles == null) {
            return false;
        }
        boolean changes = false;
        for (TextBlock tb : doc.getTextBlocks()) {
            String text = tb.getText();
            text = text.replace('\u00a0', ' ');
            text = text.replace("'", "");
            if (this.potentialTitles.contains(text = text.trim().toLowerCase())) {
                tb.addLabel("de.l3s.boilerpipe/TITLE");
                changes = true;
                break;
            }
            if (!this.potentialTitles.contains(text = PAT_REMOVE_CHARACTERS.matcher(text).replaceAll("").trim())) continue;
            tb.addLabel("de.l3s.boilerpipe/TITLE");
            changes = true;
            break;
        }
        return changes;
    }
}

