package org.apache.nutch.scoring.webgraph;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.ListIterator;
import java.util.Random;
import java.util.Set;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.util.FSUtils;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/scoring/webgraph/Loops.class */
public class Loops extends Configured implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(Loops.class);
    public static final String LOOPS_DIR = "loops";
    public static final String ROUTES_DIR = "routes";

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/scoring/webgraph/Loops$Finalizer.class */
    public static class Finalizer extends Configured implements Mapper<Text, Route, Text, Route>, Reducer<Text, Route, Text, LoopSet> {
        private JobConf conf;

        public Finalizer() {
        }

        public Finalizer(Configuration configuration) {
            setConf(configuration);
        }

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            this.conf = jobConf;
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(Text text, Route route, OutputCollector<Text, Route> outputCollector, Reporter reporter) throws IOException {
            if (route.isFound()) {
                outputCollector.collect(new Text(route.getLookingFor()), route);
            }
        }

        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(Text text, Iterator<Route> it, OutputCollector<Text, LoopSet> outputCollector, Reporter reporter) throws IOException {
            LoopSet loopSet = new LoopSet();
            while (it.hasNext()) {
                loopSet.getLoopSet().add(it.next().getOutlinkUrl());
            }
            outputCollector.collect(text, loopSet);
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/scoring/webgraph/Loops$Initializer.class */
    public static class Initializer extends Configured implements Mapper<Text, Writable, Text, ObjectWritable>, Reducer<Text, ObjectWritable, Text, Route> {
        private JobConf conf;

        public Initializer() {
        }

        public Initializer(Configuration configuration) {
            setConf(configuration);
        }

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            this.conf = jobConf;
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(Text text, Writable writable, OutputCollector<Text, ObjectWritable> outputCollector, Reporter reporter) throws IOException {
            ObjectWritable objectWritable = new ObjectWritable();
            objectWritable.set(writable);
            outputCollector.collect(text, objectWritable);
        }

        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(Text text, Iterator<ObjectWritable> it, OutputCollector<Text, Route> outputCollector, Reporter reporter) throws IOException {
            String text2 = text.toString();
            Node node = null;
            ArrayList arrayList = new ArrayList();
            while (it.hasNext()) {
                Object obj = it.next().get();
                if (obj instanceof LinkDatum) {
                    arrayList.add((LinkDatum) obj);
                } else if (obj instanceof Node) {
                    node = (Node) obj;
                }
            }
            if (node == null || node.getNumInlinks() <= 0) {
                return;
            }
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                String url = ((LinkDatum) it2.next()).getUrl();
                Route route = new Route();
                route.setFound(false);
                route.setLookingFor(text2);
                route.setOutlinkUrl(url);
                outputCollector.collect(new Text(url), route);
            }
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/scoring/webgraph/Loops$LoopSet.class */
    public static class LoopSet implements Writable {
        private Set<String> loopSet = new HashSet();

        public Set<String> getLoopSet() {
            return this.loopSet;
        }

        public void setLoopSet(Set<String> set) {
            this.loopSet = set;
        }

        @Override // org.apache.hadoop.io.Writable
        public void readFields(DataInput dataInput) throws IOException {
            int readInt = dataInput.readInt();
            this.loopSet = new HashSet();
            for (int i = 0; i < readInt; i++) {
                this.loopSet.add(Text.readString(dataInput));
            }
        }

        @Override // org.apache.hadoop.io.Writable
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeInt(this.loopSet != null ? this.loopSet.size() : 0);
            Iterator<String> it = this.loopSet.iterator();
            while (it.hasNext()) {
                Text.writeString(dataOutput, it.next());
            }
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            Iterator<String> it = this.loopSet.iterator();
            while (it.hasNext()) {
                sb.append(it.next() + StringUtils.COMMA_STR);
            }
            return sb.substring(0, sb.length() - 1);
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/scoring/webgraph/Loops$Looper.class */
    public static class Looper extends Configured implements Mapper<Text, Writable, Text, ObjectWritable>, Reducer<Text, ObjectWritable, Text, Route> {
        private JobConf conf;
        private boolean last = false;

        public Looper() {
        }

        public Looper(Configuration configuration) {
            setConf(configuration);
        }

        @Override // org.apache.hadoop.mapred.JobConfigurable
        public void configure(JobConf jobConf) {
            this.conf = jobConf;
            this.last = jobConf.getBoolean("last", false);
        }

        @Override // org.apache.hadoop.mapred.Mapper
        public void map(Text text, Writable writable, OutputCollector<Text, ObjectWritable> outputCollector, Reporter reporter) throws IOException {
            ObjectWritable objectWritable = new ObjectWritable();
            objectWritable.set(writable instanceof LinkDatum ? new Text(((LinkDatum) writable).getUrl()) : WritableUtils.clone(writable, this.conf));
            outputCollector.collect(text, objectWritable);
        }

        @Override // org.apache.hadoop.mapred.Reducer
        public void reduce(Text text, Iterator<ObjectWritable> it, OutputCollector<Text, Route> outputCollector, Reporter reporter) throws IOException {
            ArrayList arrayList = new ArrayList();
            LinkedHashSet linkedHashSet = new LinkedHashSet();
            int i = 0;
            while (it.hasNext()) {
                Object obj = it.next().get();
                if (obj instanceof Route) {
                    arrayList.add((Route) WritableUtils.clone((Route) obj, this.conf));
                } else if (obj instanceof Text) {
                    String text2 = ((Text) obj).toString();
                    if (!linkedHashSet.contains(text2)) {
                        linkedHashSet.add(text2);
                    }
                }
                i++;
                if (i % 100 == 0) {
                    reporter.progress();
                }
            }
            ListIterator listIterator = arrayList.listIterator();
            while (listIterator.hasNext()) {
                Route route = (Route) listIterator.next();
                listIterator.remove();
                if (route.isFound()) {
                    outputCollector.collect(text, route);
                } else if (linkedHashSet.contains(route.getLookingFor())) {
                    route.setFound(true);
                    outputCollector.collect(text, route);
                } else if (!this.last) {
                    Iterator it2 = linkedHashSet.iterator();
                    while (it2.hasNext()) {
                        outputCollector.collect(new Text((String) it2.next()), route);
                    }
                }
            }
        }

        @Override // java.io.Closeable, java.lang.AutoCloseable
        public void close() {
        }
    }

    /* loaded from: input_file:nutch-1.5.1.jar:org/apache/nutch/scoring/webgraph/Loops$Route.class */
    public static class Route implements Writable {
        private String outlinkUrl = null;
        private String lookingFor = null;
        private boolean found = false;

        public String getOutlinkUrl() {
            return this.outlinkUrl;
        }

        public void setOutlinkUrl(String str) {
            this.outlinkUrl = str;
        }

        public String getLookingFor() {
            return this.lookingFor;
        }

        public void setLookingFor(String str) {
            this.lookingFor = str;
        }

        public boolean isFound() {
            return this.found;
        }

        public void setFound(boolean z) {
            this.found = z;
        }

        @Override // org.apache.hadoop.io.Writable
        public void readFields(DataInput dataInput) throws IOException {
            this.outlinkUrl = Text.readString(dataInput);
            this.lookingFor = Text.readString(dataInput);
            this.found = dataInput.readBoolean();
        }

        @Override // org.apache.hadoop.io.Writable
        public void write(DataOutput dataOutput) throws IOException {
            Text.writeString(dataOutput, this.outlinkUrl);
            Text.writeString(dataOutput, this.lookingFor);
            dataOutput.writeBoolean(this.found);
        }
    }

    public void findLoops(Path path) throws IOException {
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long currentTimeMillis = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("Loops: starting at " + simpleDateFormat.format(Long.valueOf(currentTimeMillis)));
            LOG.info("Loops: webgraphdb: " + path);
        }
        Configuration conf = getConf();
        FileSystem fileSystem = FileSystem.get(conf);
        Path path2 = new Path(path, WebGraph.OUTLINK_DIR);
        Path path3 = new Path(path, WebGraph.NODE_DIR);
        Path path4 = new Path(path, ROUTES_DIR);
        Path path5 = new Path(path, "routes-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
        NutchJob nutchJob = new NutchJob(conf);
        nutchJob.setJobName("Initializer: " + path);
        FileInputFormat.addInputPath(nutchJob, path2);
        FileInputFormat.addInputPath(nutchJob, path3);
        nutchJob.setInputFormat(SequenceFileInputFormat.class);
        nutchJob.setMapperClass(Initializer.class);
        nutchJob.setReducerClass(Initializer.class);
        nutchJob.setMapOutputKeyClass(Text.class);
        nutchJob.setMapOutputValueClass(ObjectWritable.class);
        nutchJob.setOutputKeyClass(Text.class);
        nutchJob.setOutputValueClass(Route.class);
        FileOutputFormat.setOutputPath(nutchJob, path5);
        nutchJob.setOutputFormat(SequenceFileOutputFormat.class);
        try {
            LOG.info("Loops: starting initializer");
            JobClient.runJob(nutchJob);
            LOG.info("Loops: installing initializer " + path4);
            FSUtils.replace(fileSystem, path4, path5, true);
            LOG.info("Loops: finished initializer");
            int i = conf.getInt("link.loops.depth", 2);
            int i2 = 0;
            while (i2 < i) {
                NutchJob nutchJob2 = new NutchJob(conf);
                nutchJob2.setJobName("Looper: " + (i2 + 1) + " of " + i);
                FileInputFormat.addInputPath(nutchJob2, path2);
                FileInputFormat.addInputPath(nutchJob2, path4);
                nutchJob2.setInputFormat(SequenceFileInputFormat.class);
                nutchJob2.setMapperClass(Looper.class);
                nutchJob2.setReducerClass(Looper.class);
                nutchJob2.setMapOutputKeyClass(Text.class);
                nutchJob2.setMapOutputValueClass(ObjectWritable.class);
                nutchJob2.setOutputKeyClass(Text.class);
                nutchJob2.setOutputValueClass(Route.class);
                FileOutputFormat.setOutputPath(nutchJob2, path5);
                nutchJob2.setOutputFormat(SequenceFileOutputFormat.class);
                nutchJob2.setBoolean("last", i2 == i - 1);
                try {
                    LOG.info("Loops: starting looper");
                    JobClient.runJob(nutchJob2);
                    LOG.info("Loops: installing looper " + path4);
                    FSUtils.replace(fileSystem, path4, path5, true);
                    LOG.info("Loops: finished looper");
                    i2++;
                } catch (IOException e) {
                    LOG.error(StringUtils.stringifyException(e));
                    throw e;
                }
            }
            NutchJob nutchJob3 = new NutchJob(conf);
            nutchJob3.setJobName("Finalizer: " + path);
            FileInputFormat.addInputPath(nutchJob3, path4);
            nutchJob3.setInputFormat(SequenceFileInputFormat.class);
            nutchJob3.setMapperClass(Finalizer.class);
            nutchJob3.setReducerClass(Finalizer.class);
            nutchJob3.setMapOutputKeyClass(Text.class);
            nutchJob3.setMapOutputValueClass(Route.class);
            nutchJob3.setOutputKeyClass(Text.class);
            nutchJob3.setOutputValueClass(LoopSet.class);
            FileOutputFormat.setOutputPath(nutchJob3, new Path(path, LOOPS_DIR));
            nutchJob3.setOutputFormat(MapFileOutputFormat.class);
            try {
                LOG.info("Loops: starting finalizer");
                JobClient.runJob(nutchJob3);
                LOG.info("Loops: finished finalizer");
                long currentTimeMillis2 = System.currentTimeMillis();
                LOG.info("Loops: finished at " + simpleDateFormat.format(Long.valueOf(currentTimeMillis2)) + ", elapsed: " + TimingUtil.elapsedTime(currentTimeMillis, currentTimeMillis2));
            } catch (IOException e2) {
                LOG.error(StringUtils.stringifyException(e2));
                throw e2;
            }
        } catch (IOException e3) {
            LOG.error(StringUtils.stringifyException(e3));
            throw e3;
        }
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(NutchConfiguration.create(), new Loops(), strArr));
    }

    @Override // org.apache.hadoop.util.Tool
    public int run(String[] strArr) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName("help");
        OptionBuilder.withDescription("show this help message");
        Option create = OptionBuilder.create("help");
        OptionBuilder.withArgName("webgraphdb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("the web graph database to use");
        Option create2 = OptionBuilder.create("webgraphdb");
        options.addOption(create);
        options.addOption(create2);
        try {
            CommandLine parse = new GnuParser().parse(options, strArr);
            if (parse.hasOption("help") || !parse.hasOption("webgraphdb")) {
                new HelpFormatter().printHelp("Loops", options);
                return -1;
            }
            findLoops(new Path(parse.getOptionValue("webgraphdb")));
            return 0;
        } catch (Exception e) {
            LOG.error("Loops: " + StringUtils.stringifyException(e));
            return -2;
        }
    }
}
