/*
 * Decompiled with CFR 0.152.
 */
package documents;

import documents.NRC_Document;
import documents.NRC_HTMLDocument;
import documents.WebPageReader;
import documents.pdfboxAddition.NRC_PDFFonttedTextStripper;
import documents.pdfboxAddition.NRC_PDFHighlighter;
import ecriture.Syllabics;
import ecriture.TransCoder;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.CharArrayWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.StringWriter;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.pdfbox.cos.COSDocument;
import org.pdfbox.cos.COSName;
import org.pdfbox.cos.COSObject;
import org.pdfbox.cos.COSString;
import org.pdfbox.pdfparser.PDFParser;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.pdmodel.font.PDFont;
import polices.Police;
import utilites1.Util;

public class NRC_PDFDocument
implements NRC_Document {
    private static Logger LOG;
    public COSDocument document = null;
    private String urlName = null;
    private String contentType = null;
    private Date date = null;
    private String title = null;
    private String preferredFont = null;
    private Hashtable fonts = null;
    private String pageContent = null;
    private String inuktitutContent = null;
    private String totalContent = null;
    private Hashtable inuktitutLegacy = null;
    private NRC_PDFFonttedTextStripper stripper;
    private WebPageReader wpr = null;
    private File copyFile = null;
    private boolean containsInuktitut = false;
    private long lengthOfInuktitutContent = 0L;
    private long lengthOfTotalContent = 0L;
    private Object[][] textElements = null;
    private static Pattern patternFontName;
    PrintStream out = new PrintStream((OutputStream)System.out, true, "utf-8");
    private static File wordFile;
    private static File tmpDir;
    static /* synthetic */ Class class$0;

    static {
        Class<?> clazz = class$0;
        if (clazz == null) {
            try {
                clazz = class$0 = Class.forName("documents.NRC_PDFDocument");
            }
            catch (ClassNotFoundException classNotFoundException) {
                throw new NoClassDefFoundError(classNotFoundException.getMessage());
            }
        }
        LOG = Logger.getLogger((Class)clazz);
        patternFontName = Pattern.compile("(.{6}\\x2B)?([^,]+)(,.+)?$");
        tmpDir = new File(File.separator, "tmp");
    }

    public NRC_PDFDocument(String fileName, String dummy) throws IOException {
        FileInputStream fis = new FileInputStream(fileName);
        this.make_object(fis);
    }

    public NRC_PDFDocument(String urlName) throws MalformedURLException, IOException {
        this.urlName = urlName;
        URL url = new URL(urlName);
        String protocol = url.getProtocol().toLowerCase();
        URLConnection connection = null;
        this.wpr = new WebPageReader(urlName);
        this.contentType = this.wpr.contentType;
        connection = this.wpr.connection;
        this.date = new Date(connection.getLastModified());
        FileInputStream fis = null;
        if (protocol.matches("^file.*")) {
            String fileName = url.getPath();
            fis = new FileInputStream(fileName);
        } else {
            this.copyPDF(connection, urlName);
            fis = new FileInputStream(this.copyFile);
        }
        this.make_object(fis);
    }

    private void make_object(FileInputStream fis) throws IOException {
        PDFParser parser = new PDFParser((InputStream)fis);
        parser.parse();
        fis.close();
        this.document = parser.getDocument();
        this.stripper = new NRC_PDFFonttedTextStripper();
        this.setTextElements();
        List objs = this.document.getObjects();
        int i = 0;
        while (i < objs.size()) {
            COSObject obj = (COSObject)objs.get(i);
            COSString tit = (COSString)obj.getItem(COSName.getPDFName((String)"Title"));
            if (tit != null) {
                this.title = tit.getString();
                break;
            }
            ++i;
        }
    }

    public void close() {
        try {
            this.document.close();
        }
        catch (IOException iOException) {
            // empty catch block
        }
    }

    /*
     * WARNING - Removed back jump from a try to a catch block - possible behaviour change.
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    private void copyPDF(URLConnection connection, String urlName) throws IOException {
        FilterOutputStream bos;
        block8: {
            BufferedInputStream bis = null;
            bos = null;
            try {
                try {
                    this.copyFile = File.createTempFile("pdfcopy", ".txt", tmpDir);
                    bis = new BufferedInputStream(connection.getInputStream());
                    bos = new BufferedOutputStream(new FileOutputStream(this.copyFile));
                    byte[] buff = new byte[2048];
                    while (true) {
                        int bytesRead;
                        if (-1 == (bytesRead = bis.read(buff, 0, buff.length))) {
                            ((BufferedOutputStream)bos).flush();
                            this.copyFile.deleteOnExit();
                        }
                        ((BufferedOutputStream)bos).write(buff, 0, bytesRead);
                    }
                }
                catch (IOException e) {
                    e.printStackTrace();
                    this.copyFile = null;
                }
            }
            catch (Throwable throwable) {
                Object var7_9 = null;
                if (bis != null) {
                    bis.close();
                }
                if (bos == null) throw throwable;
                bos.close();
                throw throwable;
            }
            {
                Object var7_10 = null;
                if (bis == null) break block8;
            }
            bis.close();
        }
        if (bos == null) return;
        bos.close();
    }

    public Object[][] getTextElements() {
        return this.textElements;
    }

    private void setTextElements() {
        try {
            Object[][] textElements = this.stripper.getText(this.document);
            int i = 0;
            while (i < textElements.length) {
                Matcher mp;
                String textElement = (String)textElements[i][0];
                PDFont font = (PDFont)textElements[i][1];
                String textElementFont = null;
                if (font != null) {
                    textElementFont = font.getBaseFont();
                }
                if (textElementFont == null) {
                    textElementFont = "";
                }
                if ((mp = patternFontName.matcher(textElementFont)).find()) {
                    textElementFont = mp.group(2).toLowerCase();
                    if (textElement.indexOf(63743) >= 0 && textElementFont.startsWith("naamajut")) {
                        textElements[i][0] = textElement.replaceAll("\uf8ff", "\ufffd");
                    }
                }
                ++i;
            }
            this.textElements = textElements;
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public String getInuktitutContent() {
        if (this.inuktitutContent != null) {
            return this.inuktitutContent;
        }
        this.getPageContent();
        return this.inuktitutContent;
    }

    public String getPageContent() {
        if (this.pageContent != null) {
            return this.pageContent;
        }
        this.fonts = new Hashtable();
        this.inuktitutLegacy = new Hashtable();
        StringBuffer totalText = new StringBuffer("");
        StringBuffer inuktitutText = new StringBuffer("");
        StringBuffer content = new StringBuffer("");
        String textElementFont = null;
        if (this.textElements != null) {
            int i = 0;
            while (i < this.textElements.length) {
                String f = null;
                String textElement = (String)this.textElements[i][0];
                textElementFont = null;
                String unicode = textElement;
                if (textElement.replaceAll("\\s", "").equals("")) {
                    content.append(textElement);
                } else if (Syllabics.containsInuktitut((String)textElement)) {
                    totalText.append(" ").append(textElement);
                    inuktitutText.append(" ").append(textElement);
                    content.append(" ").append(textElement);
                    this.containsInuktitut = true;
                } else {
                    Object font = this.textElements[i][1];
                    if (font != null) {
                        textElementFont = font instanceof PDFont ? ((PDFont)font).getBaseFont() : (String)font;
                    }
                    if (textElementFont == null) {
                        textElementFont = "";
                    }
                    Matcher mp = patternFontName.matcher(textElementFont);
                    try {
                        if (mp.find()) {
                            textElementFont = mp.group(2).toLowerCase();
                        }
                    }
                    catch (Exception e) {
                        textElementFont = "";
                    }
                    if ((f = NRC_PDFFonttedTextStripper.isContainedFont(textElementFont, Police.polices)) != null) {
                        unicode = TransCoder.legacyToUnicode((String)textElement, (String)f);
                        Object n = this.inuktitutLegacy.get(f);
                        int ni = 0;
                        if (n != null) {
                            ni = (Integer)n;
                        }
                        this.inuktitutLegacy.put(f, new Integer(ni += unicode.replaceAll("\\s", "").length()));
                        totalText.append(" ").append(unicode);
                        inuktitutText.append(" ").append(unicode);
                        content.append(" ").append(unicode);
                        this.containsInuktitut = true;
                    } else {
                        totalText.append(" ").append(textElement);
                        content.append(" ").append(textElement);
                    }
                }
                String textElementWithoutBlanks = textElement.replaceAll("\\s", "");
                if (textElementFont != null && !textElementWithoutBlanks.equals("")) {
                    if (!this.fonts.containsKey(textElementFont)) {
                        this.fonts.put(textElementFont, new Integer(textElement.length()));
                    } else {
                        this.fonts.put(textElementFont, new Integer((Integer)this.fonts.get(textElementFont) + textElement.length()));
                    }
                }
                ++i;
            }
        }
        String scontent = NRC_PDFDocument.removeHyphens(content.toString());
        this.pageContent = scontent = scontent.replaceAll("\\s{2,}", " ");
        this.inuktitutContent = inuktitutText.toString();
        this.totalContent = totalText.toString();
        this.lengthOfInuktitutContent = this.inuktitutContent.length();
        this.lengthOfTotalContent = totalText.length();
        return totalText.toString();
    }

    static String removeHyphens(String s) {
        Pattern p = Pattern.compile("(\\S)[-]+\r\n");
        Matcher mp = p.matcher(s);
        StringBuffer sb = new StringBuffer();
        int pos = 0;
        while (pos < s.length() && mp.find(pos)) {
            sb.append(s.substring(pos, mp.start()));
            sb.append(mp.group(1));
            pos = mp.end();
        }
        sb.append(s.substring(pos));
        return sb.toString();
    }

    public String getContentType() {
        return this.contentType;
    }

    public String getUrlName() {
        return this.urlName;
    }

    public String getTitle() {
        if (this.title == null) {
            String pc = this.getPageContent();
            Pattern p = Pattern.compile("(\\S+)");
            Matcher mp = p.matcher(pc);
            int pos = 0;
            int nwords = 5;
            this.title = "";
            while (mp.find(pos) && nwords > 0) {
                this.title = String.valueOf(this.title) + mp.group() + " ";
                --nwords;
                pos = mp.end();
            }
            this.title = String.valueOf(this.title) + "...";
        }
        return this.title;
    }

    public Date getDate() {
        return this.date;
    }

    public String getPreferredFont() {
        if (this.preferredFont == null) {
            if (this.fonts == null) {
                this.getPageContent();
            }
            int max = 0;
            Enumeration e = this.fonts.keys();
            while (e.hasMoreElements()) {
                Object f = e.nextElement();
                int n = (Integer)this.fonts.get(f);
                if (n <= max) continue;
                this.preferredFont = (String)f;
                max = n;
            }
        }
        return this.preferredFont;
    }

    public Object[] getAllFonts() {
        return this.stripper.getAllFonts();
    }

    public static String getFontName(PDFont font) {
        String fntName = font.getBaseFont();
        Matcher match = patternFontName.matcher(fntName);
        match.lookingAt();
        return match.group(2);
    }

    public String[] getAllFontsNames() {
        HashSet<String> fntNames = new HashSet<String>();
        Object[] fnts = this.getAllFonts();
        int i = 0;
        while (i < fnts.length) {
            PDFont fnt = (PDFont)fnts[i];
            fntNames.add(NRC_PDFDocument.getFontName(fnt));
            ++i;
        }
        return fntNames.toArray(new String[0]);
    }

    public String[] getInuktitutFonts() {
        String[] allFonts = this.getAllFontsNames();
        allFonts = this.fonts.keySet().toArray(new String[0]);
        Vector<String> fs = new Vector<String>();
        int i = 0;
        while (i < allFonts.length) {
            String fnt = allFonts[i];
            int j = 0;
            while (j < Police.polices.length) {
                if (fnt.toLowerCase().startsWith(Police.polices[j]) && !fs.contains(Police.polices[j])) {
                    fs.add(Police.polices[j]);
                }
                ++j;
            }
            ++i;
        }
        return fs.toArray(new String[0]);
    }

    public Object[][] getInuktitutFontsAndPercentages() {
        String[] allFonts = this.getAllFontsNames();
        allFonts = this.fonts.keySet().toArray(new String[0]);
        Vector<String> fs = new Vector<String>();
        int i = 0;
        while (i < allFonts.length) {
            String fnt = allFonts[i];
            int j = 0;
            while (j < Police.polices.length) {
                if (fnt.toLowerCase().startsWith(Police.polices[j]) && !fs.contains(Police.polices[j])) {
                    fs.add(Police.polices[j]);
                }
                ++j;
            }
            ++i;
        }
        Object[][] res = new Object[fs.size()][2];
        int i2 = 0;
        while (i2 < fs.size()) {
            String p = (String)fs.get(i2);
            Integer n = (Integer)this.inuktitutLegacy.get(p);
            int ni = n;
            float pcn = (float)ni / (float)this.totalContent.replaceAll("\\s", "").length();
            res[i2][0] = p;
            res[i2][1] = new Float(pcn);
            ++i2;
        }
        return res;
    }

    public String getContents() {
        StringWriter writer = (StringWriter)this.stripper.getOutput();
        String str = writer.toString();
        return str;
    }

    public OutputStreamWriter getHighlightPositions(String highlightWord, File filePath) {
        OutputStreamWriter xmlOutput = null;
        try {
            NRC_PDFHighlighter hl = new NRC_PDFHighlighter();
            PDDocument pdDocument = new PDDocument(this.document);
            xmlOutput = new OutputStreamWriter((OutputStream)new FileOutputStream(filePath), "UTF-8");
            hl.generateXMLHighlight(pdDocument, highlightWord, (Writer)xmlOutput);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return xmlOutput;
    }

    public OutputStreamWriter getHighlightPositions(String[] highlightWords, File filePath) {
        OutputStreamWriter xmlOutput = null;
        try {
            NRC_PDFHighlighter hl = new NRC_PDFHighlighter();
            PDDocument pdDocument = new PDDocument(this.document);
            xmlOutput = new OutputStreamWriter((OutputStream)new FileOutputStream(filePath), "UTF-8");
            hl.generateXMLHighlight(pdDocument, highlightWords, (Writer)xmlOutput);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return xmlOutput;
    }

    public String[] getWordsToHighlight(String[] highlightWords) {
        CharArrayWriter xmlOutput = null;
        String[] wordsToHighlight = null;
        try {
            NRC_PDFHighlighter hl = new NRC_PDFHighlighter();
            PDDocument pdDocument = new PDDocument(this.document);
            xmlOutput = new CharArrayWriter();
            hl.generateXMLHighlight(pdDocument, highlightWords, (Writer)xmlOutput);
            wordsToHighlight = hl.getWordsToHighlight();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return wordsToHighlight;
    }

    public Object[] highlight(String[] args) throws Exception {
        String nomUrl = Util.getArgument((String[])args, (String)"url");
        String wordFileName = Util.getArgument((String[])args, (String)"f");
        String latsyl = Util.getArgument((String[])args, (String)"s");
        if (latsyl == null) {
            latsyl = Util.getArgument((String[])args, (String)"outputType");
        }
        String directoryName = Util.getArgument((String[])args, (String)"d");
        String htdocsName = Util.getArgument((String[])args, (String)"h");
        String wordsSeq = Util.getArgument((String[])args, (String)"m");
        if (directoryName != null) {
            tmpDir = new File(directoryName);
        }
        File htdocsDir = new File(htdocsName);
        Vector words = null;
        if (wordFileName != null) {
            wordFile = new File(tmpDir, wordFileName);
            words = NRC_HTMLDocument.getWordsToHighlight(wordFile, latsyl);
        } else if (wordsSeq != null) {
            words = NRC_HTMLDocument.getWordsToHighlight(wordsSeq, latsyl);
        }
        String[] fonts = this.getInuktitutFonts();
        String[] sWords = this.lookForWordsInFile(words, fonts, latsyl);
        File xmlFile = File.createTempFile("pdfhl", ".txt", htdocsDir);
        this.getHighlightPositions(sWords, xmlFile);
        Object[] hl = new Object[]{nomUrl, xmlFile};
        return hl;
    }

    String[] lookForWordsInFile(Vector words, String[] fonts, String latsyl) {
        Hashtable<String, Vector> wordsFontsToCheck = new Hashtable<String, Vector>();
        wordsFontsToCheck.put("unicode_ra", words);
        Vector<String> wordsToHighlight = new Vector<String>();
        Enumeration e = wordsFontsToCheck.keys();
        while (e.hasMoreElements()) {
            Object font = e.nextElement();
            Vector wordPatterns = (Vector)wordsFontsToCheck.get(font);
            int i = 0;
            while (i < wordPatterns.size()) {
                String w = (String)wordPatterns.elementAt(i);
                String wp = w.replaceAll("\\s+", "\\\\s+");
                Vector w2h = NRC_PDFDocument.makeWordsToHighlight(wp, fonts);
                int j = 0;
                while (j < w2h.size()) {
                    String wf = (String)w2h.elementAt(j);
                    if (!wordsToHighlight.contains(wf)) {
                        wordsToHighlight.add(wf);
                    }
                    ++j;
                }
                ++i;
            }
        }
        return wordsToHighlight.toArray(new String[0]);
    }

    static Vector makeWordsToHighlight(String word, String[] fonts) {
        Vector<String> v = new Vector<String>();
        v.add(word);
        String w = new String(word);
        w = w.replaceAll("rr", "qr");
        w = w.replaceAll("qq", "rq");
        w = w.replaceAll("\u1550([\u1546-\u154c])", "\u1585$1");
        w = w.replaceAll("\u1585\u146b", "\u1550\u1670");
        w = w.replaceAll("\u1585\u146d", "\u1550\u157f");
        w = w.replaceAll("\u1585\u146e", "\u1550\u1580");
        w = w.replaceAll("\u1585\u146f", "\u1550\u1581");
        w = w.replaceAll("\u1585\u1470", "\u1550\u1582");
        w = w.replaceAll("\u1585\u1472", "\u1550\u1583");
        w = w.replaceAll("\u1585\u1473", "\u1550\u1584");
        if (!w.equals(word)) {
            v.add(w);
        }
        int vl = v.size();
        int i = 0;
        while (i < vl) {
            String wo = (String)v.elementAt(i);
            int j = 0;
            while (j < fonts.length) {
                String wol = TransCoder.unicodeToLegacy((String)wo, (String)fonts[j]);
                String wd = NRC_PDFDocument.prepareForRegexp(wol);
                v.add(wd);
                ++j;
            }
            ++i;
        }
        return v;
    }

    static String prepareForRegexp(String x) {
        if (x.length() == 0) {
            return x;
        }
        String dotPattern = "[|}~+\\]`<>]";
        String dotPatternReplacement = "[|}~+\\]`<>]";
        StringBuffer sb = new StringBuffer();
        Pattern patDot = Pattern.compile(dotPattern);
        Pattern patPunct = Pattern.compile("\\p{Punct}");
        int i = 0;
        while (i < x.length()) {
            String input = x.substring(i, i + 1);
            Matcher mDot = patDot.matcher(input);
            Matcher mPunct = patPunct.matcher(input);
            if (mDot.matches()) {
                sb.append(dotPatternReplacement);
            } else if (mPunct.matches()) {
                sb.append("\\" + input);
            } else {
                sb.append(input);
            }
            ++i;
        }
        return sb.toString();
    }

    static String prepareForRegexpNonAlphanum(String x) {
        if (x.length() == 0) {
            return x;
        }
        int pos = 0;
        Pattern pat = Pattern.compile("\\p{Punct}");
        Matcher mpat = pat.matcher(x);
        String ret = "";
        while (pos < x.length() && mpat.find(pos)) {
            ret = String.valueOf(ret) + x.substring(pos, mpat.start());
            ret = String.valueOf(ret) + "\\\\" + mpat.group();
            pos = mpat.end();
        }
        ret = String.valueOf(ret) + x.substring(pos);
        return ret;
    }

    static String prepareForRegexpSpaces(String x) {
        String y = x.replaceAll("\\s+", "\\\\s+");
        return y;
    }

    static String withDots(String src) {
        String combinedDotCodesForRegexp = "[<>`~\\\\\\]+|}]";
        String regexp = "\\\\" + combinedDotCodesForRegexp;
        String newsrc = src.replaceAll(regexp, combinedDotCodesForRegexp);
        return newsrc;
    }

    public static void main(String[] args) {
        NRC_PDFDocument doc = null;
        PrintStream out = System.out;
        if (args[0].equals("-f")) {
            try {
                int c;
                File f = new File(args[1]);
                doc = new NRC_PDFDocument(args[1], "file");
                float percent = doc.getInuktitutPercentage();
                ((OutputStream)out).write("Content-Type: application/pdf\n".getBytes());
                ((OutputStream)out).write(("Inuktitut-Percentage: " + String.valueOf(percent)).getBytes());
                ((OutputStream)out).write("\n\n".getBytes());
                FileInputStream fr = new FileInputStream(f);
                while ((c = fr.read()) != -1) {
                    ((OutputStream)out).write(c);
                }
                fr.close();
                ((OutputStream)out).flush();
            }
            catch (Exception e) {
                LOG.info((Object)("--NRC_PDFDocument--- Exception: " + e.getMessage()));
            }
        }
    }

    public Object[] transliterate() throws OutOfMemoryError, Exception {
        return null;
    }

    public WebPageReader getWpr() {
        return this.wpr;
    }

    public boolean containsInuktitut() {
        if (this.pageContent == null) {
            this.getPageContent();
        }
        return this.containsInuktitut;
    }

    public float getInuktitutPercentage() {
        if (this.containsInuktitut()) {
            return (float)this.inuktitutContent.replaceAll("\\s", "").length() / (float)this.totalContent.replaceAll("\\s", "").length();
        }
        return 0.0f;
    }
}

