package org.apache.nutch.parse;

import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.SignatureFactory;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/parse/ParserChecker.class */
public class ParserChecker implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(ParserChecker.class);
    Configuration conf = null;

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws Exception {
        boolean z = false;
        boolean z2 = false;
        String str = null;
        String str2 = null;
        if (strArr.length == 0) {
            System.err.println("Usage: ParserChecker [-dumpText] [-forceAs mimeType] url");
            System.exit(-1);
        }
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].equals("-forceAs")) {
                z2 = true;
                i++;
                str = strArr[i];
            } else if (strArr[i].equals("-dumpText")) {
                z = true;
            } else if (i != strArr.length - 1) {
                System.err.println("Usage: ParserChecker [-dumpText] [-forceAs mimeType] url");
                System.exit(-1);
            } else {
                str2 = strArr[i];
            }
            i++;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("fetching: " + str2);
        }
        Content content = new ProtocolFactory(this.conf).getProtocol(str2).getProtocolOutput(new Text(str2), new CrawlDatum()).getContent();
        if (content == null) {
            System.err.println("Can't fetch URL successfully");
            return -1;
        }
        if (z2) {
            content.setContentType(str);
        } else {
            str = content.getContentType();
        }
        if (str == null) {
            System.err.println("");
            return -1;
        }
        ParseResult parse = new ParseUtil(this.conf).parse(content);
        byte[] calculate = SignatureFactory.getSignature(getConf()).calculate(content, parse.get(new Text(str2)));
        if (LOG.isInfoEnabled()) {
            LOG.info("parsing: " + str2);
            LOG.info("contentType: " + str);
            LOG.info("signature: " + StringUtil.toHexString(calculate));
        }
        Iterator<Map.Entry<Text, Parse>> it = parse.iterator();
        while (it.hasNext()) {
            Map.Entry<Text, Parse> next = it.next();
            Parse value = next.getValue();
            System.out.print("---------\nUrl\n---------------\n");
            System.out.print(next.getKey());
            System.out.print("\n---------\nParseData\n---------\n");
            System.out.print(value.getData().toString());
            if (z) {
                System.out.print("---------\nParseText\n---------\n");
                System.out.print(value.getText());
            }
        }
        return 0;
    }

    @Override // org.apache.hadoop.conf.Configurable
    public Configuration getConf() {
        return this.conf;
    }

    @Override // org.apache.hadoop.conf.Configurable
    public void setConf(Configuration configuration) {
        this.conf = configuration;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(NutchConfiguration.create(), new ParserChecker(), strArr));
    }
}
