package org.apache.nutch.util.domain;

import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.Generator;
import org.apache.nutch.indexer.solr.SolrConstants;
import org.apache.nutch.util.domain.DomainSuffix;
import org.apache.nutch.util.domain.TopLevelDomain;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/util/domain/DomainSuffixesReader.class */
class DomainSuffixesReader {
    private static final Logger LOG = LoggerFactory.getLogger(DomainSuffixesReader.class);

    /* JADX INFO: Access modifiers changed from: package-private */
    public void read(DomainSuffixes domainSuffixes, InputStream inputStream) throws IOException {
        try {
            DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
            newInstance.setIgnoringComments(true);
            Element documentElement = newInstance.newDocumentBuilder().parse(new InputSource(inputStream)).getDocumentElement();
            if (documentElement == null || !documentElement.getTagName().equals("domains")) {
                throw new IOException("xml file is not valid");
            }
            Element element = (Element) documentElement.getElementsByTagName("tlds").item(0);
            Element element2 = (Element) documentElement.getElementsByTagName("suffixes").item(0);
            readITLDs(domainSuffixes, (Element) element.getElementsByTagName("itlds").item(0));
            readGTLDs(domainSuffixes, (Element) element.getElementsByTagName("gtlds").item(0));
            readCCTLDs(domainSuffixes, (Element) element.getElementsByTagName("cctlds").item(0));
            readSuffixes(domainSuffixes, element2);
        } catch (ParserConfigurationException e) {
            LOG.warn(StringUtils.stringifyException(e));
            throw new IOException(e.getMessage());
        } catch (SAXException e2) {
            LOG.warn(StringUtils.stringifyException(e2));
            throw new IOException(e2.getMessage());
        }
    }

    void readITLDs(DomainSuffixes domainSuffixes, Element element) {
        NodeList elementsByTagName = element.getElementsByTagName("tld");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            domainSuffixes.addDomainSuffix(readGTLD((Element) elementsByTagName.item(i), TopLevelDomain.Type.INFRASTRUCTURE));
        }
    }

    void readGTLDs(DomainSuffixes domainSuffixes, Element element) {
        NodeList elementsByTagName = element.getElementsByTagName("tld");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            domainSuffixes.addDomainSuffix(readGTLD((Element) elementsByTagName.item(i), TopLevelDomain.Type.GENERIC));
        }
    }

    void readCCTLDs(DomainSuffixes domainSuffixes, Element element) throws IOException {
        NodeList elementsByTagName = element.getElementsByTagName("tld");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            domainSuffixes.addDomainSuffix(readCCTLD((Element) elementsByTagName.item(i)));
        }
    }

    TopLevelDomain readGTLD(Element element, TopLevelDomain.Type type) {
        return new TopLevelDomain(element.getAttribute(Generator.GENERATOR_COUNT_VALUE_DOMAIN), type, readStatus(element), readBoost(element));
    }

    TopLevelDomain readCCTLD(Element element) throws IOException {
        return new TopLevelDomain(element.getAttribute(Generator.GENERATOR_COUNT_VALUE_DOMAIN), readStatus(element), readBoost(element), readCountryName(element));
    }

    DomainSuffix.Status readStatus(Element element) {
        NodeList elementsByTagName = element.getElementsByTagName("status");
        return (elementsByTagName == null || elementsByTagName.getLength() == 0) ? DomainSuffix.DEFAULT_STATUS : DomainSuffix.Status.valueOf(elementsByTagName.item(0).getFirstChild().getNodeValue());
    }

    float readBoost(Element element) {
        NodeList elementsByTagName = element.getElementsByTagName(SolrConstants.BOOST_FIELD);
        if (elementsByTagName == null || elementsByTagName.getLength() == 0) {
            return 1.0f;
        }
        return Float.parseFloat(elementsByTagName.item(0).getFirstChild().getNodeValue());
    }

    String readCountryName(Element element) throws IOException {
        NodeList elementsByTagName = element.getElementsByTagName("country");
        if (elementsByTagName == null || elementsByTagName.getLength() == 0) {
            throw new IOException("Country name should be given");
        }
        return elementsByTagName.item(0).getNodeValue();
    }

    void readSuffixes(DomainSuffixes domainSuffixes, Element element) {
        NodeList elementsByTagName = element.getElementsByTagName("suffix");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            domainSuffixes.addDomainSuffix(readSuffix((Element) elementsByTagName.item(i)));
        }
    }

    DomainSuffix readSuffix(Element element) {
        return new DomainSuffix(element.getAttribute(Generator.GENERATOR_COUNT_VALUE_DOMAIN), readStatus(element), readBoost(element));
    }
}
