/*
 * Decompiled with CFR 0.152.
 */
package documents;

import com.steadystate.css.dom.CSSStyleSheetImpl;
import documents.MySimpleHtmlRendererContext;
import documents.MySimpleUserAgentContext;
import documents.NRC_Document;
import documents.WebPageReader;
import ecriture.Syllabics;
import ecriture.TransCoder;
import html.BetweenTag;
import html.HTMLDocuElement;
import html.HtmlDocu;
import html.HtmlEntities;
import html.Tag;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.log4j.Logger;
import org.lobobrowser.html.HtmlRendererContext;
import org.lobobrowser.html.UserAgentContext;
import org.lobobrowser.html.domimpl.HTMLDocumentImpl;
import org.lobobrowser.html.domimpl.HTMLElementImpl;
import org.lobobrowser.html.domimpl.NodeImpl;
import org.lobobrowser.html.gui.HtmlPanel;
import org.lobobrowser.html.parser.DocumentBuilderImpl;
import org.lobobrowser.html.parser.InputSourceImpl;
import org.lobobrowser.html.style.CSS2PropertiesImpl;
import org.lobobrowser.html.test.SimpleBrowserFrame;
import org.w3c.css.sac.CSSException;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.css.CSSRule;
import org.w3c.dom.css.CSSRuleList;
import org.w3c.dom.html2.HTMLCollection;
import org.xml.sax.InputSource;
import polices.Police;
import polices.TextCat;

public class NRC_HTMLDocumentByCobra
implements NRC_Document {
    private static Logger LOG;
    public String encoding = null;
    private Date date;
    private String baseName = null;
    private String contentType;
    protected HTMLDocumentImpl document;
    protected HTMLDocumentImpl documentUnicode;
    private String urlName;
    private WebPageReader wpr;
    private boolean containsInuktitut = false;
    private String inuktitutContent = null;
    private long lengthOfInuktitutContent = -1L;
    private long lengthOfTotalContent = -1L;
    private Hashtable inuktitutLegacy = null;
    private String pageContent = null;
    private String httpHeaderContentType;
    private String title;
    private Hashtable fonts = null;
    private String previousElementName = "";
    private String[] encodingAndManner;
    public static File tmpDir;
    public File copyOfFile = null;
    public InputStream inputStream = null;
    private static final int CHUNK_SIZE = 2000;
    private static String httpContentTypePatternString;
    private static Pattern metaPattern;
    private static String charsetPatternString;
    private static Pattern charsetPattern;
    private static Pattern contentPattern;
    private static Pattern bomPattern;
    public Hashtable transHash;
    private static Pattern pfw2;
    static /* synthetic */ Class class$0;

    static {
        Class<?> clazz = class$0;
        if (clazz == null) {
            try {
                clazz = class$0 = Class.forName("documents.NRC_HTMLDocumentByCobra");
            }
            catch (ClassNotFoundException classNotFoundException) {
                throw new NoClassDefFoundError(classNotFoundException.getMessage());
            }
        }
        LOG = Logger.getLogger((Class)clazz);
        tmpDir = new File(File.separator, "tmp");
        httpContentTypePatternString = "http-equiv=\"?content-type\"?";
        metaPattern = Pattern.compile("<meta\\s+([^>]*" + httpContentTypePatternString + "[^>]*)>", 2);
        charsetPatternString = "(charset=\\s*)+([a-z][_\\-0-9a-z]*)";
        charsetPattern = Pattern.compile(charsetPatternString, 2);
        contentPattern = Pattern.compile("content=\"?([^\"]+)\"?", 2);
        bomPattern = Pattern.compile("\u00ef\u00bb\u00bf");
        pfw2 = Pattern.compile("(\\S+)(\\s+(\\S+))?(\\s+(\\S+))?");
    }

    private static String[] sniffCharacterEncoding(byte[] content) {
        int length = content.length < 2000 ? content.length : 2000;
        String source = null;
        String str = new String(content, 0, 0, length);
        Matcher metaMatcher = metaPattern.matcher(str);
        String encoding = null;
        if (metaMatcher.find()) {
            Matcher charsetMatcher = charsetPattern.matcher(metaMatcher.group(1));
            if (charsetMatcher.find()) {
                encoding = new String(charsetMatcher.group(1));
                source = "meta";
            }
        } else {
            Matcher bomMatcher = bomPattern.matcher(str);
            if (bomMatcher.find()) {
                encoding = "utf-8";
                source = "bom";
            }
        }
        if (encoding != null) {
            return new String[]{encoding, source};
        }
        return null;
    }

    public NRC_HTMLDocumentByCobra() {
    }

    public NRC_HTMLDocumentByCobra(byte[] content, URL url, String enc) throws Exception {
        ByteArrayInputStream is = new ByteArrayInputStream(content);
        this.inputStream = is;
        MySimpleUserAgentContext userAgentContext = new MySimpleUserAgentContext();
        MySimpleHtmlRendererContext htmlRendererContext = new MySimpleHtmlRendererContext((HtmlPanel)new SimpleBrowserFrame(null), (UserAgentContext)userAgentContext);
        DocumentBuilderImpl dbi = new DocumentBuilderImpl((UserAgentContext)userAgentContext, (HtmlRendererContext)htmlRendererContext);
        if (enc != null && !enc.equals("")) {
            this.encodingAndManner = new String[]{enc, "httpheader"};
        } else {
            byte[] top = Arrays.copyOf(content, 2000);
            this.encodingAndManner = NRC_HTMLDocumentByCobra.sniffCharacterEncoding(top);
            if (this.encodingAndManner != null) {
                LOG.info((Object)("---NRC_HTMLDocumentByCobra--- sniffed encoding: '" + this.encodingAndManner[0] + "' (" + this.encodingAndManner[1] + ")"));
            } else {
                LOG.info((Object)"---NRC_HTMLDocumentByCobra--- sniffed encoding: null");
                this.encodingAndManner = new String[]{"iso-8859-1", "default"};
            }
        }
        this.urlName = url.toExternalForm();
        LOG.info((Object)("---NRC_HTMLDocumentByCobra--- url: '" + this.urlName + "'"));
        this.document = (HTMLDocumentImpl)dbi.parse((InputSource)new InputSourceImpl((InputStream)is, this.urlName, this.encodingAndManner[0]));
        this.checkForCSSPseudoRules();
        LOG.info((Object)("---NRC_HTMLDocumentByCobra--- base: '" + this.document.getBaseURI() + "'"));
        LOG.info((Object)("---NRC_HTMLDocumentByCobra--- number of links: '" + this.document.getLinks().getLength() + "'"));
        LOG.info((Object)("---NRC_HTMLDocumentByCobra--- text: '" + this.document.getTextContent() + "'"));
        this.encoding = this.encodingAndManner[0];
        is.reset();
    }

    public NRC_HTMLDocumentByCobra(String content, URL url, String enc) throws Exception {
        this(content.getBytes("utf-8"), url, "utf-8");
    }

    public NRC_HTMLDocumentByCobra(String file, String urlName, String enc) throws Exception {
        this.urlName = urlName;
        if (enc != null && !enc.equals("")) {
            this.httpHeaderContentType = "text/html; charset=" + enc;
        }
        this.copyOfFile = new File(file);
        this.HTMLDocumentByCobra();
    }

    public NRC_HTMLDocumentByCobra(String urlName) throws Exception {
        this.urlName = urlName;
        URL url = null;
        this.document = null;
        this.wpr = new WebPageReader(urlName);
        url = this.wpr.url;
        this.copyOfFile = this.copyInCache(url);
        this.httpHeaderContentType = this.wpr.contentType;
        this.HTMLDocumentByCobra();
    }

    void HTMLDocumentByCobra() throws Exception {
        try {
            if (this.httpHeaderContentType != null) {
                this.httpHeaderContentType = this.httpHeaderContentType.toLowerCase();
                Pattern httpPat = Pattern.compile("text/html\\s*(;\\s*charset\\s*=\\s*(.+))?", 2);
                Matcher httpMat = httpPat.matcher(this.httpHeaderContentType);
                if (!httpMat.find()) {
                    throw new Exception("not html");
                }
                if (httpMat.group(2) != null) {
                    this.encoding = httpMat.group(2);
                    this.encodingAndManner = new String[]{this.encoding, "httpheader"};
                }
            }
            if (this.encoding == null) {
                FileInputStream fis = new FileInputStream(this.copyOfFile);
                byte[] fisBytes = new byte[(int)this.copyOfFile.length()];
                fis.read(fisBytes);
                String fisContents = new String(fisBytes, "iso-8859-1");
                fisContents = fisContents.replaceAll("\\s+", " ");
                Pattern metaPat = Pattern.compile("<meta http-equiv ?= ?\"content-type\" content ?= ?\".+?charset ?= ?([^=\"]+)\"", 2);
                Matcher metaMat = metaPat.matcher(fisContents);
                this.encodingAndManner = metaMat.find() ? new String[]{metaMat.group(1), "meta"} : (fisContents.contains("\u00ef\u00bb\u00bf") ? new String[]{"utf-8", "bom"} : new String[]{"iso-8859-1", "default"});
                this.encoding = this.encodingAndManner[0];
            }
            LOG.debug((Object)("encoding = " + this.encoding));
            FileInputStream is = new FileInputStream(this.copyOfFile);
            MySimpleUserAgentContext userAgentContext = new MySimpleUserAgentContext();
            MySimpleHtmlRendererContext htmlRendererContext = new MySimpleHtmlRendererContext((HtmlPanel)new SimpleBrowserFrame(null), (UserAgentContext)userAgentContext);
            DocumentBuilderImpl dbi = new DocumentBuilderImpl((UserAgentContext)userAgentContext, (HtmlRendererContext)htmlRendererContext);
            this.document = (HTMLDocumentImpl)dbi.parse((InputSource)new InputSourceImpl((InputStream)is, this.urlName, this.encoding));
            this.checkForCSSPseudoRules();
        }
        catch (CSSException is) {
        }
        catch (Exception e) {
            e.printStackTrace(System.err);
            throw new Exception("From HTMLDocumentByCobra() --- " + e.getClass().getName() + ": " + e.getMessage());
        }
    }

    public static String checkForBadComments(String fileName, String encoding) {
        File f = new File(fileName);
        String content = "";
        String newFileName = null;
        try {
            String line;
            BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(f), encoding));
            while ((line = br.readLine()) != null) {
                content = String.valueOf(content) + line + System.getProperty("line.separator");
            }
            br.close();
            content = NRC_HTMLDocumentByCobra.checkForBadComments(content);
            newFileName = String.valueOf(fileName) + ".out";
            OutputStreamWriter osw = new OutputStreamWriter((OutputStream)new FileOutputStream(new File(newFileName)), encoding);
            osw.write(content, 0, content.length());
            osw.close();
        }
        catch (UnsupportedEncodingException unsupportedEncodingException) {
        }
        catch (FileNotFoundException fileNotFoundException) {
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return newFileName;
    }

    public static String checkForBadComments(String content) {
        String newContent = "";
        Pattern pat = Pattern.compile("(<!--|-->)");
        int pos = 0;
        int nopen = 0;
        int nclose = 0;
        String pattern = "";
        Matcher mpat = pat.matcher(content);
        while (mpat.find(pos)) {
            if (mpat.group().equals("<!--")) {
                ++nopen;
                pattern = String.valueOf(pattern) + "1";
            } else {
                ++nclose;
                pattern = String.valueOf(pattern) + "0";
            }
            pos = mpat.end();
        }
        if (nopen == nclose) {
            newContent = content;
        } else {
            pos = 0;
            int i = 0;
            while (i < pattern.length()) {
                int newPos;
                if (pattern.charAt(i) == '0') {
                    newPos = content.indexOf("-->", pos) + 3;
                    newContent = String.valueOf(newContent) + content.substring(pos, newPos);
                    pos = newPos;
                } else if (i + 1 == pattern.length()) {
                    newContent = String.valueOf(newContent) + content.substring(pos);
                    pos = content.length();
                } else if (pattern.charAt(i + 1) == '0') {
                    newPos = content.indexOf("-->", pos) + 3;
                    newContent = String.valueOf(newContent) + content.substring(pos, newPos);
                    ++i;
                    pos = newPos;
                } else {
                    newPos = content.indexOf("<!--", pos) + 4;
                    newPos = content.indexOf("<!--", newPos);
                    newContent = String.valueOf(newContent) + content.substring(pos, newPos) + "--><!--";
                    pos = newPos + 4;
                }
                ++i;
            }
            if (pos != content.length()) {
                newContent = String.valueOf(newContent) + content.substring(pos);
            }
        }
        return newContent;
    }

    private void checkForCSSPseudoRules() {
        Collection ss = this.document.getStyleSheets();
        Iterator itss = ss.iterator();
        Pattern patRule = Pattern.compile("^([^:]+?):(link|active|visited|hover)\\s+(\\{.+)$");
        HashSet<String> newRules = new HashSet<String>();
        while (itss.hasNext()) {
            CSSStyleSheetImpl ss1 = (CSSStyleSheetImpl)itss.next();
            CSSRuleList ruleList = ss1.getCssRules();
            int i = 0;
            while (i < ruleList.getLength()) {
                CSSRule rule = ruleList.item(i);
                String ruleText = rule.getCssText();
                Matcher mpatRule = patRule.matcher(ruleText);
                if (mpatRule.find()) {
                    String newRuleText = String.valueOf(mpatRule.group(1)) + " " + mpatRule.group(3);
                    if (!newRules.contains(mpatRule.group(1))) {
                        newRules.add(mpatRule.group(1));
                        ss1.insertRule(newRuleText, i++);
                    }
                }
                ++i;
            }
        }
    }

    public boolean hasContents() {
        return this.document.getChildrenArray() != null;
    }

    public void displayNodes() {
        NodeImpl[] nodes = this.document.getChildrenArray();
        this.displayNodes(nodes, "");
    }

    private void displayNodes(NodeImpl[] nodes, String tabs) {
        if (nodes != null) {
            int i = 0;
            while (i < nodes.length) {
                String text = "";
                if (nodes[i].getNodeType() == 3) {
                    text = nodes[i].getNodeValue();
                } else if (nodes[i].getNodeType() == 8) {
                    text = nodes[i].getNodeValue();
                }
                System.out.println(String.valueOf(tabs) + nodes[i].getNodeName() + "  [" + text + "]");
                this.displayNodes(nodes[i].getChildrenArray(), String.valueOf(tabs) + "    ");
                ++i;
            }
        }
    }

    public void close() {
        this.document.close();
        if (this.copyOfFile != null) {
            this.copyOfFile.delete();
        }
    }

    public boolean containsInuktitut() {
        if (this.lengthOfInuktitutContent == -1L) {
            this.getPageContent();
        } else {
            this.containsInuktitut = true;
        }
        return this.containsInuktitut;
    }

    public float getInuktitutPercentage() {
        if (this.containsInuktitut()) {
            return (float)this.lengthOfInuktitutContent / (float)this.lengthOfTotalContent;
        }
        return 0.0f;
    }

    public Object[] getAllFonts() {
        if (this.fonts == null) {
            this.getPageContent();
        }
        Set keySet = this.fonts.keySet();
        Object[] keys = keySet.toArray();
        return keys;
    }

    public String[] getAllFontsNames() {
        Object[] fnts = this.getAllFonts();
        String[] fontNames = new String[fnts.length];
        int i = 0;
        while (i < fnts.length) {
            fontNames[i] = (String)fnts[i];
            ++i;
        }
        return fontNames;
    }

    public String getContentType() {
        return this.contentType;
    }

    public String getBase() {
        if (this.baseName == null) {
            this.baseName = this.document.getBaseURI();
        }
        return this.baseName;
    }

    public Date getDate() {
        return this.date;
    }

    public String[] getInuktitutFonts() {
        Vector<String> fs = new Vector<String>();
        String[] allFonts = this.getAllFontsNames();
        int i = 0;
        while (i < allFonts.length) {
            if (Police.isLegacy((String)allFonts[i])) {
                fs.add(allFonts[i]);
            }
            ++i;
        }
        return fs.toArray(new String[0]);
    }

    public Object[][] getInuktitutFontsAndPercentages() {
        String[] allFonts = this.getAllFontsNames();
        allFonts = this.fonts.keySet().toArray(new String[0]);
        Vector<String> fs = new Vector<String>();
        int i = 0;
        while (i < allFonts.length) {
            String fnt = allFonts[i];
            int j = 0;
            while (j < Police.polices.length) {
                if (fnt.toLowerCase().startsWith(Police.polices[j]) && !fs.contains(Police.polices[j])) {
                    fs.add(Police.polices[j]);
                }
                ++j;
            }
            ++i;
        }
        Object[][] res = new Object[fs.size()][2];
        int i2 = 0;
        while (i2 < fs.size()) {
            String p = (String)fs.get(i2);
            Integer n = (Integer)this.inuktitutLegacy.get(p);
            int ni = n;
            float pcn = (float)ni / (float)this.pageContent.replaceAll("\\s", "").length();
            res[i2][0] = p;
            res[i2][1] = new Float(pcn);
            ++i2;
        }
        return res;
    }

    public String getPageContent() {
        if (this.pageContent != null) {
            return this.pageContent;
        }
        NodeImpl[] nodes = this.document.getChildrenArray();
        this.fonts = new Hashtable();
        this.inuktitutLegacy = new Hashtable();
        this.inuktitutContent = "";
        this.previousElementName = "";
        this.lengthOfTotalContent = 0L;
        this.lengthOfInuktitutContent = 0L;
        String contenuPage = this.getNodeTextContent(nodes).trim();
        contenuPage = contenuPage.replaceAll("\\s+", " ");
        this.pageContent = new String(contenuPage);
        return contenuPage;
    }

    public String getPreferredFont() {
        return null;
    }

    public String getTitle() {
        Node titleNode;
        Node titleText;
        NodeList titleNodes;
        if (this.title == null && (titleNodes = this.document.getElementsByTagName("title")) != null && (titleText = (titleNode = titleNodes.item(0)).getFirstChild()) != null) {
            this.title = titleText.getNodeValue();
        }
        return this.title;
    }

    public String getUrlName() {
        return this.urlName;
    }

    public WebPageReader getWpr() {
        return this.wpr;
    }

    public Object[] highlight(String[] x) throws Exception {
        return null;
    }

    public Object[] transliterate() throws OutOfMemoryError, Exception {
        this.transHash = new Hashtable();
        this.traiterDocPourTranslit();
        HtmlDocu doc2 = new HtmlDocu(this.copyOfFile, this.encoding);
        if (this.document.getElementsByTagName("base").getLength() == 0) {
            doc2.insertBase(this.getBase());
        }
        File fout2 = this.traiterDocPourTranslit2(doc2, this.encoding);
        return new Object[]{this.encoding, fout2};
    }

    public void toRoman(OutputStream out) throws OutOfMemoryError, Exception {
        int c;
        this.transHash = new Hashtable();
        this.traiterDocPourTranslit();
        HtmlDocu doc2 = new HtmlDocu(this.copyOfFile, this.encoding);
        if (this.document.getElementsByTagName("base").getLength() == 0) {
            doc2.insertBase(this.getBase());
        }
        File fout2 = this.traiterDocPourTranslit3(doc2, this.encoding, "latin");
        FileInputStream fr = new FileInputStream(fout2);
        while ((c = fr.read()) != -1) {
            out.write(c);
        }
        fr.close();
        fout2.delete();
        out.flush();
    }

    public void toUnicode(OutputStream out) throws OutOfMemoryError, Exception {
        int c;
        this.transHash = new Hashtable();
        this.traiterDocPourTranslit();
        HtmlDocu doc2 = null;
        doc2 = this.copyOfFile != null ? new HtmlDocu(this.copyOfFile, this.encoding) : new HtmlDocu(this.inputStream, this.encoding);
        if (this.document.getElementsByTagName("base").getLength() == 0) {
            doc2.insertBase(this.getBase());
        }
        File fout2 = this.traiterDocPourTranslit3(doc2, this.encoding, "utf-8");
        float percent = this.getInuktitutPercentage();
        out.write("Content-Type: text/html; charset=utf-8\n".getBytes());
        out.write(("Inuktitut-Percentage: " + String.valueOf(percent)).getBytes());
        out.write("\n\n".getBytes());
        out.write("<!--END OF HTTP HEADERS-->".getBytes());
        FileInputStream fr = new FileInputStream(fout2);
        while ((c = fr.read()) != -1) {
            out.write(c);
        }
        fr.close();
        fout2.delete();
        out.flush();
    }

    public void toUnicode2(OutputStream out, boolean aipaitai, String fontName) throws OutOfMemoryError, Exception {
        int c;
        this.transHash = new Hashtable();
        this.traiterDocPourTranslit();
        HtmlDocu doc2 = null;
        doc2 = this.copyOfFile != null ? new HtmlDocu(this.copyOfFile, this.encoding) : new HtmlDocu(this.inputStream, this.encoding);
        if (this.document.getElementsByTagName("base").getLength() == 0) {
            doc2.insertBase(this.getBase());
        }
        File fout2 = this.traiterDocPourTranslit3_2(doc2, this.encoding, "utf-8", aipaitai, fontName);
        FileInputStream fr = new FileInputStream(fout2);
        while ((c = fr.read()) != -1) {
            out.write(c);
        }
        fr.close();
        fout2.delete();
        out.flush();
    }

    public void toUnicode3(OutputStream out, boolean aipaitai) throws OutOfMemoryError, Exception {
        int c;
        this.transHash = new Hashtable();
        this.traiterDocPourTranslit();
        HtmlDocu doc2 = null;
        doc2 = this.copyOfFile != null ? new HtmlDocu(this.copyOfFile, this.encoding) : new HtmlDocu(this.inputStream, this.encoding);
        if (this.document.getElementsByTagName("base").getLength() == 0) {
            doc2.insertBase(this.getBase());
        }
        doc2.insertLinkCSS("/tcihtml.css");
        File fout2 = this.traiterDocPourTranslit3_3(doc2, this.encoding, "utf-8", aipaitai);
        FileInputStream fr = new FileInputStream(fout2);
        while ((c = fr.read()) != -1) {
            out.write(c);
        }
        fr.close();
        fout2.delete();
        out.flush();
    }

    public static void dumpFile(File file, OutputStream out) {
        FileInputStream fr = null;
        try {
            int c;
            fr = new FileInputStream(file);
            while ((c = fr.read()) != -1) {
                out.write(c);
            }
            fr.close();
            out.flush();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public String getEncoding() {
        return this.encodingAndManner[0];
    }

    public String getEncodingManner() {
        return this.encodingAndManner[1];
    }

    public HTMLDocumentImpl getDocument() {
        return this.document;
    }

    String[] determineDocEncoding() {
        Pattern p = Pattern.compile("charset=(.+)");
        Matcher mp = p.matcher(this.httpHeaderContentType);
        if (mp.find()) {
            return new String[]{mp.group(1), "httpheader"};
        }
        if (this.document != null) {
            NodeList metaElements = this.document.getElementsByTagName("meta");
            int i = 0;
            while (i < metaElements.getLength()) {
                String content;
                Node metaElement = metaElements.item(i);
                NamedNodeMap attributes = metaElement.getAttributes();
                Node httpEquiv = attributes.getNamedItem("http-equiv");
                if (httpEquiv != null && httpEquiv.getNodeValue().toLowerCase().equals("content-type") && (mp = p.matcher(content = attributes.getNamedItem("content").getNodeValue().toLowerCase())).find()) {
                    return new String[]{mp.group(1), "meta"};
                }
                ++i;
            }
            NodeImpl firstNode = this.document.getChildrenArray()[0];
            if (firstNode.getNodeType() == 3 && firstNode.getTextContent().equals("\u00ef\u00bb\u00bf")) {
                return new String[]{"utf-8", "bom"};
            }
        }
        return null;
    }

    String getNodeTextContent(NodeImpl[] nodes) {
        if (nodes == null) {
            return "";
        }
        String wholeText = "";
        int i = 0;
        while (i < nodes.length) {
            String textOfNode = this.getNodeTextContent(nodes[i]);
            wholeText = wholeText.concat(textOfNode);
            ++i;
        }
        return wholeText;
    }

    String getNodeTextContent(NodeImpl node) {
        String text = null;
        if (node.getNodeType() == 3) {
            text = node.getTextContent();
            NodeImpl parentNode = (NodeImpl)node.getParentNode();
            String parentName = parentNode.getNodeName().toLowerCase();
            if (parentName.equals("script") || parentName.equals("style") || parentName.equals("#document") || parentName.equals("title")) {
                return "";
            }
            String[] elementFonts = NRC_HTMLDocumentByCobra.getFonts(node);
            if (elementFonts != null) {
                int ifs = 0;
                while (ifs < elementFonts.length) {
                    if (!this.fonts.containsKey(elementFonts[ifs])) {
                        this.fonts.put(elementFonts[ifs], new Integer(1));
                    } else {
                        this.fonts.put(elementFonts[ifs], new Integer((Integer)this.fonts.get(elementFonts[ifs]) + 1));
                    }
                    ++ifs;
                }
            }
            String font = null;
            if (!text.replaceAll("\\s", "").equals("")) {
                if (Syllabics.containsInuktitut((String)text)) {
                    this.containsInuktitut = true;
                    this.lengthOfInuktitutContent += (long)text.replaceAll("\\s", "").length();
                    this.lengthOfTotalContent += (long)text.replaceAll("\\s", "").length();
                } else {
                    font = Police.containsLegacyFont((String[])elementFonts);
                    if (font != null) {
                        this.containsInuktitut = true;
                        text = TransCoder.legacyToUnicode((String)text, (String)font);
                        this.lengthOfInuktitutContent += (long)text.replaceAll("\\s", "").length();
                        this.lengthOfTotalContent += (long)text.replaceAll("\\s", "").length();
                        Object n = this.inuktitutLegacy.get(font);
                        int ni = 0;
                        if (n != null) {
                            ni = (Integer)n;
                        }
                        this.inuktitutLegacy.put(font, new Integer(ni += text.replaceAll("\\s", "").length()));
                        this.inuktitutContent = String.valueOf(this.inuktitutContent) + text;
                    } else {
                        this.lengthOfTotalContent += (long)text.replaceAll("\\s", "").length();
                    }
                }
            }
            if (Tag.isTagThatAddsSpace((String)this.previousElementName)) {
                text = " ".concat(text);
            }
            return text;
        }
        this.previousElementName = node.getNodeName().toLowerCase();
        return this.getNodeTextContent(node.getChildrenArray());
    }

    private File copyInCache(URL url) throws IOException {
        File of = null;
        String prefix = null;
        try {
            String pre = String.valueOf(Math.random());
            prefix = "copy_" + pre;
            of = File.createTempFile(prefix, ".html", tmpDir);
        }
        catch (IOException ioe1) {
            throw new IOException("From copyInCache, createTempFile: prefix='" + prefix + "'; tmpDir='" + tmpDir.getAbsolutePath() + "'");
        }
        try {
            int b;
            InputStream is = url.openStream();
            FileOutputStream os = new FileOutputStream(of);
            while ((b = is.read()) != -1) {
                os.write(b);
            }
            os.close();
            is.close();
            return of;
        }
        catch (IOException ioe1) {
            throw new IOException("From copyInCache, openStream: url='" + url.toExternalForm() + "'");
        }
    }

    public void traiterDocPourTranslit() {
        NodeImpl[] nodes = this.document.getChildrenArray();
        this.traiterDocPourTranslitNodes(nodes);
    }

    private void traiterDocPourTranslitNodes(NodeImpl[] nodes) {
        int i = 0;
        while (i < nodes.length) {
            NodeImpl node = nodes[i];
            if (node.getNodeType() == 3) {
                this.traiterDocPourTranslitTextNode(node);
            } else if (node.getChildrenArray() != null) {
                this.traiterDocPourTranslitNodes(node.getChildrenArray());
            }
            ++i;
        }
    }

    private void traiterDocPourTranslitTextNode(NodeImpl node) {
        Node parentNode = node.getParentNode();
        String parentNodeName = parentNode.getNodeName().toLowerCase();
        if (!(parentNodeName.equals("style") || parentNodeName.equals("script") || parentNodeName.equals("#comment"))) {
            Matcher mfw;
            String text = node.getNodeValue();
            String txttmp = HtmlEntities.entityToChar((String)text);
            String[] fontFamilies = NRC_HTMLDocumentByCobra.getFonts(node);
            String font = Police.containsLegacyFont((String[])fontFamilies);
            LOG.debug((Object)("font = " + font));
            LOG.debug((Object)("text = " + text));
            LOG.debug((Object)("txttmp = " + txttmp));
            if (font != null && (mfw = pfw2.matcher(txttmp)).find()) {
                String first3Words = mfw.group(1).concat(mfw.group(2) != null ? mfw.group(2) : "").concat(mfw.group(4) != null ? mfw.group(4) : "");
                first3Words = first3Words.replaceAll("\\s+", " ");
                this.transHash.put(first3Words, font);
                LOG.debug((Object)("first3Words = " + first3Words));
                int i = 0;
                while (i < first3Words.length()) {
                    LOG.debug((Object)("transHash: key(" + i + ")=" + first3Words.codePointAt(i)));
                    ++i;
                }
            }
        }
    }

    File traiterDocPourTranslit2(HtmlDocu doc2, String enc) {
        int i = 0;
        while (i < doc2.elements.size()) {
            HTMLDocuElement element = (HTMLDocuElement)doc2.elements.elementAt(i);
            if (element.getType() == 2) {
                String txt = null;
                try {
                    String transText;
                    HTMLDocuElement precElement;
                    txt = new String(element.getBytes(), enc);
                    String txttmp = HtmlEntities.entityToChar((String)txt.replaceAll("\\s+", " "));
                    Matcher mfw = pfw2.matcher(txttmp);
                    String key = null;
                    if (mfw.find()) {
                        key = String.valueOf(mfw.group(1)) + (mfw.group(2) != null ? mfw.group(2) : "") + (mfw.group(4) != null ? mfw.group(4) : "");
                        key = key.replaceAll("\\s+", " ");
                    }
                    if (i != 0 && (precElement = (HTMLDocuElement)doc2.elements.elementAt(i - 1)).getType() == 1 && ((Tag)precElement).text.endsWith("/>")) {
                        key = ">" + key;
                    }
                    if (key != null && this.transHash.containsKey(key)) {
                        int pos = 0;
                        transText = "";
                        String font = (String)this.transHash.get(key);
                        Matcher mh = HtmlEntities.pHtmlEntity.matcher(txt);
                        while (pos < txt.length() && mh.find(pos)) {
                            String htmlEntity = mh.group();
                            transText = String.valueOf(transText) + TransCoder.legacyToRoman((String)txt.substring(pos, mh.start()), (String)font);
                            transText = String.valueOf(transText) + htmlEntity;
                            pos = mh.end();
                            if (txt.charAt(pos) != ';') continue;
                            transText = String.valueOf(transText) + ';';
                            ++pos;
                        }
                        transText = String.valueOf(transText) + TransCoder.legacyToRoman((String)txt.substring(pos), (String)font);
                        String fonttxt = "<FONT face=arial>" + transText + "</FONT>";
                        byte[] bs = fonttxt.getBytes(enc);
                        element.setBytes(bs);
                    } else {
                        String txtentity = HtmlEntities.toStringInuktitut((String)txt);
                        transText = TransCoder.unicodeToRoman((String)txtentity);
                        byte[] bs = transText.getBytes(enc);
                        element.setBytes(bs);
                    }
                    doc2.elements.setElementAt(element, i);
                }
                catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
            }
            ++i;
        }
        File fout = null;
        try {
            fout = File.createTempFile("translitOutput", ".htm", tmpDir);
        }
        catch (IOException element) {
            // empty catch block
        }
        try {
            FileOutputStream os = new FileOutputStream(fout);
            doc2.write((OutputStream)os);
            ((OutputStream)os).close();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return fout;
    }

    File traiterDocPourTranslit3(HtmlDocu doc2, String enc, String translitMode) {
        boolean existsMetaHttpEquiv = false;
        this.lengthOfTotalContent = 0L;
        this.lengthOfInuktitutContent = 0L;
        String translitFont = translitMode.equals("utf-8") ? "pigiarniq" : "arial";
        boolean inTitle = false;
        int i = 0;
        while (i < doc2.elements.size()) {
            block29: {
                HTMLDocuElement element = (HTMLDocuElement)doc2.elements.elementAt(i);
                if (element.getType() == 2) {
                    String txt = null;
                    if (!inTitle) {
                        try {
                            txt = new String(element.getBytes(), enc);
                            String txttmp = HtmlEntities.entityToChar((String)txt);
                            Matcher mfw = pfw2.matcher(txttmp);
                            String key = null;
                            if (mfw.find()) {
                                key = String.valueOf(mfw.group(1)) + (mfw.group(2) != null ? mfw.group(2) : "") + (mfw.group(4) != null ? mfw.group(4) : "");
                                key = key.replaceAll("\\s+", " ");
                            }
                            if (key != null && this.transHash.containsKey(key)) {
                                String transText = "";
                                String font = (String)this.transHash.get(key);
                                String transPart = translitMode.equals("utf-8") ? TransCoder.legacyToUnicode((String)txttmp, (String)font) : TransCoder.legacyToRoman((String)txttmp, (String)font);
                                this.lengthOfInuktitutContent += (long)transPart.replaceAll("\\s+", "").length();
                                this.lengthOfTotalContent += (long)transPart.replaceAll("\\s+", "").length();
                                transText = String.valueOf(transText) + transPart;
                                String fontAndTxt = "<span style=\"font-family:" + translitFont + "\">" + transText + "</span>";
                                byte[] bs = fontAndTxt.getBytes(translitMode.equals("utf-8") ? translitMode : enc);
                                element.setBytes(bs);
                            } else {
                                String txtentity = HtmlEntities.toStringInuktitut((String)txt);
                                String txtWithConvertedEntities = HtmlEntities.fromHTMLEntity((String)txttmp);
                                if (Syllabics.containsInuktitut((String)txtWithConvertedEntities)) {
                                    String txtWithConvertedEntitiesAnsNoSpaces = txtWithConvertedEntities.replaceAll("\\s+", "");
                                    this.lengthOfInuktitutContent += (long)txtWithConvertedEntitiesAnsNoSpaces.length();
                                    this.lengthOfTotalContent += (long)txtWithConvertedEntitiesAnsNoSpaces.length();
                                    if (!translitMode.equals("utf-8")) {
                                        txtentity = TransCoder.unicodeToRoman((String)txtWithConvertedEntities);
                                    }
                                } else {
                                    this.lengthOfTotalContent += (long)txtWithConvertedEntities.replaceAll("\\s+", "").length();
                                }
                                byte[] bs = txtentity.getBytes(translitMode.equals("utf-8") ? translitMode : enc);
                                element.setBytes(bs);
                            }
                            doc2.elements.setElementAt(element, i);
                        }
                        catch (UnsupportedEncodingException txttmp) {
                        }
                        catch (PatternSyntaxException e) {
                            e.printStackTrace(System.err);
                        }
                    }
                    inTitle = false;
                } else if (translitMode.equals("utf-8") && ((Tag)element).tag.toLowerCase().equals("meta")) {
                    try {
                        String t = element.getText();
                        Pattern pct = Pattern.compile(httpContentTypePatternString, 2);
                        Matcher mct = pct.matcher(t);
                        if (!mct.find()) break block29;
                        Matcher m = charsetPattern.matcher(t);
                        String new_t = null;
                        if (m.find()) {
                            new_t = t.replace(m.group(), "charset=utf-8");
                        } else {
                            Matcher mc = contentPattern.matcher(t);
                            new_t = t;
                            if (mc.find()) {
                                String group1 = mc.group(1);
                                new_t = t.replace(group1, String.valueOf(group1) + "; charset=utf-8");
                            }
                        }
                        element.setBytes(new_t.getBytes());
                        try {
                            element.setText("iso-8859-1");
                        }
                        catch (UnsupportedEncodingException unsupportedEncodingException) {
                            // empty catch block
                        }
                        existsMetaHttpEquiv = true;
                    }
                    catch (PatternSyntaxException e) {
                        e.printStackTrace(System.err);
                    }
                } else {
                    String tagElementName = ((Tag)element).tag.toLowerCase();
                    if (tagElementName.equals("title")) {
                        inTitle = true;
                    }
                }
            }
            ++i;
        }
        if (translitMode.equals("utf-8") && !existsMetaHttpEquiv) {
            doc2.insertMetaContentType("utf-8");
        }
        File fout = null;
        try {
            fout = File.createTempFile("translitOutput", ".htm", tmpDir);
        }
        catch (IOException element) {
            // empty catch block
        }
        try {
            FileOutputStream os = new FileOutputStream(fout);
            doc2.write((OutputStream)os);
            ((OutputStream)os).close();
        }
        catch (FileNotFoundException fileNotFoundException) {
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return fout;
    }

    File traiterDocPourTranslit3_2(HtmlDocu doc2, String enc, String translitMode, boolean aipaitai, String fontName) {
        boolean existsMetaHttpEquiv = false;
        this.lengthOfTotalContent = 0L;
        this.lengthOfInuktitutContent = 0L;
        String translitFont = translitMode.equals("utf-8") ? fontName : "arial";
        boolean inTitle = false;
        int i = 0;
        while (i < doc2.elements.size()) {
            block29: {
                HTMLDocuElement element = (HTMLDocuElement)doc2.elements.elementAt(i);
                if (element.getType() == 2) {
                    String txt = null;
                    if (!inTitle) {
                        try {
                            txt = new String(element.getBytes(), enc);
                            String txttmp = HtmlEntities.entityToChar((String)txt);
                            Matcher mfw = pfw2.matcher(txttmp);
                            String key = null;
                            if (mfw.find()) {
                                key = String.valueOf(mfw.group(1)) + (mfw.group(2) != null ? mfw.group(2) : "") + (mfw.group(4) != null ? mfw.group(4) : "");
                                key = key.replaceAll("\\s+", " ");
                            }
                            if (key != null && this.transHash.containsKey(key)) {
                                String transText = "";
                                String font = (String)this.transHash.get(key);
                                String transPart = translitMode.equals("utf-8") ? TransCoder.legacyToUnicode((String)txttmp, (String)font, (boolean)aipaitai) : TransCoder.legacyToRoman((String)txttmp, (String)font);
                                this.lengthOfInuktitutContent += (long)transPart.replaceAll("\\s+", "").length();
                                this.lengthOfTotalContent += (long)transPart.replaceAll("\\s+", "").length();
                                transText = String.valueOf(transText) + transPart;
                                String fontAndTxt = "<span style=\"font-family:" + translitFont + "\">" + transText + "</span>";
                                byte[] bs = fontAndTxt.getBytes(translitMode.equals("utf-8") ? translitMode : enc);
                                element.setBytes(bs);
                            } else {
                                String txtentity = HtmlEntities.toStringInuktitut((String)txt);
                                String txtWithConvertedEntities = HtmlEntities.fromHTMLEntity((String)txttmp);
                                if (Syllabics.containsInuktitut((String)txtWithConvertedEntities)) {
                                    String txtWithConvertedEntitiesAnsNoSpaces = txtWithConvertedEntities.replaceAll("\\s+", "");
                                    this.lengthOfInuktitutContent += (long)txtWithConvertedEntitiesAnsNoSpaces.length();
                                    this.lengthOfTotalContent += (long)txtWithConvertedEntitiesAnsNoSpaces.length();
                                    if (!translitMode.equals("utf-8")) {
                                        txtentity = TransCoder.unicodeToRoman((String)txtWithConvertedEntities);
                                    }
                                } else {
                                    this.lengthOfTotalContent += (long)txtWithConvertedEntities.replaceAll("\\s+", "").length();
                                }
                                byte[] bs = txtentity.getBytes(translitMode.equals("utf-8") ? translitMode : enc);
                                element.setBytes(bs);
                            }
                            doc2.elements.setElementAt(element, i);
                        }
                        catch (UnsupportedEncodingException txttmp) {
                        }
                        catch (PatternSyntaxException e) {
                            e.printStackTrace(System.err);
                        }
                    }
                    inTitle = false;
                } else if (translitMode.equals("utf-8") && ((Tag)element).tag.toLowerCase().equals("meta")) {
                    try {
                        String t = element.getText();
                        Pattern pct = Pattern.compile(httpContentTypePatternString, 2);
                        Matcher mct = pct.matcher(t);
                        if (!mct.find()) break block29;
                        Matcher m = charsetPattern.matcher(t);
                        String new_t = null;
                        if (m.find()) {
                            new_t = t.replace(m.group(), "charset=utf-8");
                        } else {
                            Matcher mc = contentPattern.matcher(t);
                            new_t = t;
                            if (mc.find()) {
                                String group1 = mc.group(1);
                                new_t = t.replace(group1, String.valueOf(group1) + "; charset=utf-8");
                            }
                        }
                        element.setBytes(new_t.getBytes());
                        try {
                            element.setText("iso-8859-1");
                        }
                        catch (UnsupportedEncodingException unsupportedEncodingException) {
                            // empty catch block
                        }
                        existsMetaHttpEquiv = true;
                    }
                    catch (PatternSyntaxException e) {
                        e.printStackTrace(System.err);
                    }
                } else {
                    String tagElementName = ((Tag)element).tag.toLowerCase();
                    if (tagElementName.equals("title")) {
                        inTitle = true;
                    }
                }
            }
            ++i;
        }
        if (translitMode.equals("utf-8") && !existsMetaHttpEquiv) {
            doc2.insertMetaContentType("utf-8");
        }
        File fout = null;
        try {
            fout = File.createTempFile("translitOutput", ".htm", tmpDir);
        }
        catch (IOException element) {
            // empty catch block
        }
        try {
            FileOutputStream os = new FileOutputStream(fout);
            doc2.write((OutputStream)os);
            ((OutputStream)os).close();
        }
        catch (FileNotFoundException fileNotFoundException) {
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return fout;
    }

    File traiterDocPourTranslit3_3(HtmlDocu doc2, String enc, String translitMode, boolean aipaitai) {
        boolean existsMetaHttpEquiv = false;
        this.lengthOfTotalContent = 0L;
        this.lengthOfInuktitutContent = 0L;
        boolean inTitle = false;
        int i = 0;
        while (i < doc2.elements.size()) {
            block35: {
                HTMLDocuElement element = (HTMLDocuElement)doc2.elements.elementAt(i);
                if (element.getType() == 2) {
                    String txt = null;
                    if (!inTitle) {
                        try {
                            byte[] bs;
                            txt = new String(element.getBytes(), enc);
                            LOG.debug((Object)("traiterDocPourTranslit3_3: txt=" + txt));
                            String txttmp = HtmlEntities.entityToChar((String)txt);
                            LOG.debug((Object)("traiterDocPourTranslit3_3: txttmp=" + txttmp));
                            Matcher mfw = pfw2.matcher(txttmp);
                            String key = null;
                            if (mfw.find()) {
                                key = String.valueOf(mfw.group(1)) + (mfw.group(2) != null ? mfw.group(2) : "") + (mfw.group(4) != null ? mfw.group(4) : "");
                                key = key.replaceAll("\\s+", " ");
                            }
                            LOG.debug((Object)("traiterDocPourTranslit3_3: key=" + key));
                            if (key != null) {
                                int ik = 0;
                                while (ik < key.length()) {
                                    LOG.debug((Object)("key(" + ik + ")=" + key.codePointAt(ik)));
                                    ++ik;
                                }
                                Enumeration eth = this.transHash.keys();
                                while (eth.hasMoreElements()) {
                                    String th = (String)eth.nextElement();
                                    int ith = 0;
                                    while (ith < th.length()) {
                                        LOG.debug((Object)("th(" + ith + ")=" + th.codePointAt(ith)));
                                        ++ith;
                                    }
                                }
                            }
                            if (key != null && this.transHash.containsKey(key)) {
                                String transPart;
                                String font = (String)this.transHash.get(key);
                                if (translitMode.equals("utf-8")) {
                                    LOG.debug((Object)("traiterDocPourTranslit3_3: txttmp=" + txttmp + " ; font=" + font));
                                    transPart = TransCoder.legacyToUnicode((String)txttmp, (String)font, (boolean)aipaitai);
                                } else {
                                    transPart = TransCoder.legacyToRoman((String)txttmp, (String)font);
                                }
                                LOG.debug((Object)("traiterDocPourTranslit3_3: transPart=" + transPart));
                                this.lengthOfInuktitutContent += (long)transPart.replaceAll("\\s+", "").length();
                                this.lengthOfTotalContent += (long)transPart.replaceAll("\\s+", "").length();
                                String fontAndTxt = "<span class=\"tcihtml\">" + transPart + "</span>";
                                bs = fontAndTxt.getBytes(translitMode.equals("utf-8") ? translitMode : enc);
                                element.setBytes(bs);
                            } else {
                                String txtentity = HtmlEntities.toStringInuktitut((String)txt);
                                String txtWithConvertedEntities = HtmlEntities.fromHTMLEntity((String)txttmp);
                                if (Syllabics.containsInuktitut((String)txtWithConvertedEntities)) {
                                    String txtWithConvertedEntitiesAnsNoSpaces = txtWithConvertedEntities.replaceAll("\\s+", "");
                                    this.lengthOfInuktitutContent += (long)txtWithConvertedEntitiesAnsNoSpaces.length();
                                    this.lengthOfTotalContent += (long)txtWithConvertedEntitiesAnsNoSpaces.length();
                                    if (!translitMode.equals("utf-8")) {
                                        txtentity = TransCoder.unicodeToRoman((String)txtWithConvertedEntities);
                                    }
                                } else {
                                    this.lengthOfTotalContent += (long)txtWithConvertedEntities.replaceAll("\\s+", "").length();
                                }
                                bs = txtentity.getBytes(translitMode.equals("utf-8") ? translitMode : enc);
                                element.setBytes(bs);
                            }
                            doc2.elements.setElementAt(element, i);
                        }
                        catch (UnsupportedEncodingException txttmp) {
                        }
                        catch (PatternSyntaxException e) {
                            e.printStackTrace(System.err);
                        }
                    }
                    inTitle = false;
                } else if (translitMode.equals("utf-8") && ((Tag)element).tag.toLowerCase().equals("meta")) {
                    try {
                        String t = element.getText();
                        Pattern pct = Pattern.compile(httpContentTypePatternString, 2);
                        Matcher mct = pct.matcher(t);
                        if (!mct.find()) break block35;
                        Matcher m = charsetPattern.matcher(t);
                        String new_t = null;
                        if (m.find()) {
                            new_t = t.replace(m.group(), "charset=utf-8");
                        } else {
                            Matcher mc = contentPattern.matcher(t);
                            new_t = t;
                            if (mc.find()) {
                                String group1 = mc.group(1);
                                new_t = t.replace(group1, String.valueOf(group1) + "; charset=utf-8");
                            }
                        }
                        element.setBytes(new_t.getBytes());
                        try {
                            element.setText("iso-8859-1");
                        }
                        catch (UnsupportedEncodingException unsupportedEncodingException) {
                            // empty catch block
                        }
                        existsMetaHttpEquiv = true;
                    }
                    catch (PatternSyntaxException e) {
                        e.printStackTrace(System.err);
                    }
                } else {
                    String tagElementName = ((Tag)element).tag.toLowerCase();
                    if (tagElementName.equals("title")) {
                        inTitle = true;
                    }
                }
            }
            ++i;
        }
        if (translitMode.equals("utf-8") && !existsMetaHttpEquiv) {
            doc2.insertMetaContentType("utf-8");
        }
        File fout = null;
        try {
            fout = File.createTempFile("translitOutput", ".htm", tmpDir);
        }
        catch (IOException element) {
            // empty catch block
        }
        try {
            FileOutputStream os = new FileOutputStream(fout);
            doc2.write((OutputStream)os);
            ((OutputStream)os).close();
        }
        catch (FileNotFoundException fileNotFoundException) {
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return fout;
    }

    String traiterDocPourToUnicode(HtmlDocu doc2, String enc) {
        boolean metaCharsetFoundAndSet = false;
        int i = 0;
        while (i < doc2.elements.size()) {
            block18: {
                HTMLDocuElement element = (HTMLDocuElement)doc2.elements.elementAt(i);
                if (element.getType() == 2) {
                    String txt = null;
                    try {
                        HTMLDocuElement precElement;
                        txt = new String(element.getBytes(), enc);
                        String txttmp = HtmlEntities.entityToChar((String)txt.replaceAll("\\s+", " "));
                        Matcher mfw = pfw2.matcher(txttmp);
                        String key = null;
                        if (mfw.find()) {
                            key = String.valueOf(mfw.group(1)) + (mfw.group(2) != null ? mfw.group(2) : "") + (mfw.group(4) != null ? mfw.group(4) : "");
                            key = key.replaceAll("\\s+", " ");
                        }
                        if (i != 0 && (precElement = (HTMLDocuElement)doc2.elements.elementAt(i - 1)).getType() == 1 && ((Tag)precElement).text.endsWith("/>")) {
                            key = ">" + key;
                        }
                        if (key != null && this.transHash.containsKey(key)) {
                            int pos = 0;
                            String transText = "";
                            String font = (String)this.transHash.get(key);
                            Matcher mh = HtmlEntities.pHtmlEntity.matcher(txt);
                            while (pos < txt.length() && mh.find(pos)) {
                                String htmlEntity = mh.group();
                                transText = String.valueOf(transText) + TransCoder.legacyToUnicode((String)txt.substring(pos, mh.start()), (String)font);
                                transText = String.valueOf(transText) + htmlEntity;
                                pos = mh.end();
                                if (txt.charAt(pos) != ';') continue;
                                transText = String.valueOf(transText) + ';';
                                ++pos;
                            }
                            transText = String.valueOf(transText) + TransCoder.legacyToUnicode((String)txt.substring(pos), (String)font);
                            String fonttxt = "<span style=\"font-family:pigiarniq\">" + transText + "</span>";
                            element.setText(fonttxt, "utf-8");
                            doc2.elements.setElementAt(element, i);
                        }
                    }
                    catch (UnsupportedEncodingException e) {
                        e.printStackTrace();
                    }
                    catch (PatternSyntaxException e) {
                        e.printStackTrace(System.err);
                    }
                } else if (((Tag)element).tag.equals("meta")) {
                    try {
                        String text = ((Tag)element).text;
                        Pattern phe = Pattern.compile("http-equiv\\s*=\\s*[\"']?content-type[\"']?");
                        Matcher mphe = phe.matcher(text.toLowerCase());
                        if (!mphe.find()) break block18;
                        metaCharsetFoundAndSet = true;
                        Pattern pcs = Pattern.compile("content\\s*=\\s*[\"']?text/html\\s*;\\s*charset\\s*=\\s*[\"']?([^\"'>]+)[\"']?");
                        Matcher mpcs = pcs.matcher(text.toLowerCase());
                        if (mpcs.find()) {
                            ((Tag)element).text = text = text.replaceFirst("charset\\s*=\\s*[\"']?([^\"'>]+)", "charset=\"utf-8\"");
                            break block18;
                        }
                        ((Tag)element).text = "<meta http-equiv=\"content-type\"; charset=\"utf-8\">";
                    }
                    catch (PatternSyntaxException e) {
                        e.printStackTrace(System.err);
                    }
                } else if (((Tag)element).tag.equals("/head") && !metaCharsetFoundAndSet) {
                    try {
                        doc2.elements.add(i - 1, new BetweenTag("\n", null));
                        doc2.elements.add(i, new Tag("<meta http-equiv=\"content-type\"; charset=\"utf-8\">", null));
                        i += 2;
                    }
                    catch (UnsupportedEncodingException e) {
                        e.printStackTrace();
                    }
                }
            }
            ++i;
        }
        return doc2.print();
    }

    public static String[] getFonts(NodeImpl node) {
        String[] fonts = NRC_HTMLDocumentByCobra.getFontsA(node);
        return fonts;
    }

    static String[] getFontsA(NodeImpl node) {
        CSS2PropertiesImpl properties;
        Node faceNode;
        if (node == null) {
            return null;
        }
        if (node.getNodeType() == 3) {
            return NRC_HTMLDocumentByCobra.getFontsA((NodeImpl)node.getParentNode());
        }
        String fontFamiliesFromProperties = null;
        if (node.getNodeName().toLowerCase().equals("font") && (faceNode = node.getAttributes().getNamedItem("face")) != null) {
            fontFamiliesFromProperties = faceNode.getNodeValue();
        }
        if (fontFamiliesFromProperties == null && node != null && node instanceof HTMLElementImpl && (properties = ((HTMLElementImpl)node).getCurrentStyle()) != null) {
            fontFamiliesFromProperties = properties.getFontFamily();
        }
        if (fontFamiliesFromProperties == null) {
            return NRC_HTMLDocumentByCobra.getFontsA((NodeImpl)node.getParentNode());
        }
        String[] fontFamilies = fontFamiliesFromProperties.split(",\\s*");
        return fontFamilies;
    }

    static String[] getFontsB(NodeImpl node) {
        String text = node.getTextContent();
        String font = TextCat.classify((String)text);
        if (font == null || font.equals("")) {
            return null;
        }
        return new String[]{font.replace("inuktitut_", "")};
    }

    public static void main(String[] args) {
        NRC_HTMLDocumentByCobra doc = null;
        if (args[0].equals("-f")) {
            try {
                doc = new NRC_HTMLDocumentByCobra(args[1], args[2], args[3]);
                doc.toUnicode(System.out);
            }
            catch (Exception e) {
                System.err.println("Problem with URL '" + args[1] + "' (" + e.getClass().getName() + ")");
                System.out.print("--- ERROR from NRC_HTMLDocumentByCobra: " + e.getMessage());
            }
        } else if (args[0].equals("-u")) {
            try {
                doc = new NRC_HTMLDocumentByCobra(args[1]);
                try {
                    doc.toUnicode(System.out);
                }
                catch (Exception e) {
                    System.err.println("Problem in toUnicode with URL '" + args[1] + "' (" + e.getClass().getName() + ")");
                    System.out.print("--- ERROR from toUnicode in NRC_HTMLDocumentByCobra: " + e.getMessage());
                }
            }
            catch (Exception e) {
                System.err.println("Problem with creating HTML document with URL '" + args[1] + "' (" + e.getClass().getName() + ": " + e.getMessage() + ")");
                System.out.print("--- ERROR from NRC_HTMLDocumentByCobra: " + e.getMessage());
            }
        } else if (args[0].equals("-content")) {
            try {
                doc = new NRC_HTMLDocumentByCobra(args[1]);
                PrintStream out = new PrintStream((OutputStream)System.out, true, "utf-8");
                int i = 0;
                while (i < doc.document.getChildNodes().getLength()) {
                    Node node = doc.document.getChildNodes().item(i);
                    out.println(node.getTextContent());
                    ++i;
                }
            }
            catch (Exception e) {
                e.printStackTrace();
                System.out.print("");
            }
        } else if (args[0].equals("-nodes")) {
            try {
                doc = new NRC_HTMLDocumentByCobra(args[1], args[2], args[3]);
                doc.displayNodes();
            }
            catch (Exception e) {
                LOG.info((Object)("--NRC_HTMLDocumentByCobra--- Exception: " + e.getMessage()));
            }
        } else if (args[0].equals("-a")) {
            try {
                doc = new NRC_HTMLDocumentByCobra(args[1]);
                HTMLCollection anchors = doc.document.getAnchors();
                int i = 0;
                while (i < anchors.getLength()) {
                    Node anchor = anchors.item(i);
                    NamedNodeMap attrs = anchor.getAttributes();
                    Node href = attrs.getNamedItem("href");
                    if (href != null) {
                        System.out.println(href.getNodeValue());
                    }
                    ++i;
                }
            }
            catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}

