/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.EnglishPTBTreebankCorrector;
import edu.stanford.nlp.trees.FilteringTreebank;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeVisitor;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.ReflectionLoading;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.function.Predicate;

public class Treebanks {
    private Treebanks() {
    }

    private static void printUsage() {
        System.err.println("This main method will let you variously manipulate and view a treebank.");
        System.err.println("Usage: java Treebanks [-flags]* treebankPath [fileRanges]");
        System.err.println("Useful flags include:");
        System.err.println("\t-maxLength n\t-suffix ext\t-treeReaderFactory class");
        System.err.println("\t-pennPrint\t-encoding enc\t-tlp class\t-sentenceLengths");
        System.err.println("\t-summary\t-decimate\t-yield\t-correct\t-punct");
        System.err.println("\t-oneLine\t-words\t-taggedWords\t-annotate options");
    }

    public static void main(String[] args) throws IOException {
        BobChrisTreeNormalizer tn;
        if (args.length == 0) {
            Treebanks.printUsage();
            return;
        }
        int i = 0;
        int maxL = Integer.MAX_VALUE;
        int minL = -1;
        boolean normalized = false;
        boolean decimate = false;
        boolean pennPrintTrees = false;
        boolean oneLinePrint = false;
        boolean printTaggedWords = false;
        boolean printWords = false;
        boolean correct = false;
        String annotationOptions = null;
        boolean summary = false;
        boolean timing = false;
        boolean yield = false;
        boolean punct = false;
        boolean sentenceLengths = false;
        boolean countTaggings = false;
        boolean removeCodeTrees = false;
        String decimatePrefix = null;
        String encoding = "UTF-8";
        String suffix = "mrg";
        TreeReaderFactory trf = null;
        TreebankLanguagePack tlp = null;
        ArrayList<Predicate> filters = new ArrayList<Predicate>();
        while (i < args.length && args[i].startsWith("-")) {
            Object o;
            if (args[i].equals("-maxLength") && i + 1 < args.length) {
                maxL = Integer.parseInt(args[i + 1]);
                i += 2;
                continue;
            }
            if (args[i].equals("-minLength") && i + 1 < args.length) {
                minL = Integer.parseInt(args[i + 1]);
                i += 2;
                continue;
            }
            if (args[i].equals("-h") || args[i].equals("-help")) {
                Treebanks.printUsage();
                ++i;
                continue;
            }
            if (args[i].equals("-normalized")) {
                normalized = true;
                ++i;
                continue;
            }
            if (args[i].equalsIgnoreCase("-tlp")) {
                try {
                    o = Class.forName(args[i + 1]).newInstance();
                    tlp = (TreebankLanguagePack)o;
                    trf = tlp.treeReaderFactory();
                }
                catch (Exception e) {
                    System.err.println("Couldn't instantiate as TreebankLanguagePack: " + args[i + 1]);
                    return;
                }
                i += 2;
                continue;
            }
            if (args[i].equals("-treeReaderFactory") || args[i].equals("-trf")) {
                try {
                    o = Class.forName(args[i + 1]).newInstance();
                    trf = (TreeReaderFactory)o;
                }
                catch (Exception e) {
                    System.err.println("Couldn't instantiate as TreeReaderFactory: " + args[i + 1]);
                    return;
                }
                i += 2;
                continue;
            }
            if (args[i].equals("-suffix")) {
                suffix = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equals("-decimate")) {
                decimate = true;
                decimatePrefix = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equals("-encoding")) {
                encoding = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equals("-correct")) {
                correct = true;
                ++i;
                continue;
            }
            if (args[i].equals("-summary")) {
                summary = true;
                ++i;
                continue;
            }
            if (args[i].equals("-yield")) {
                yield = true;
                ++i;
                continue;
            }
            if (args[i].equals("-punct")) {
                punct = true;
                ++i;
                continue;
            }
            if (args[i].equals("-pennPrint")) {
                pennPrintTrees = true;
                ++i;
                continue;
            }
            if (args[i].equals("-oneLine")) {
                oneLinePrint = true;
                ++i;
                continue;
            }
            if (args[i].equals("-taggedWords")) {
                printTaggedWords = true;
                ++i;
                continue;
            }
            if (args[i].equals("-words")) {
                printWords = true;
                ++i;
                continue;
            }
            if (args[i].equals("-annotate")) {
                annotationOptions = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equals("-timing")) {
                timing = true;
                ++i;
                continue;
            }
            if (args[i].equals("-countTaggings")) {
                countTaggings = true;
                ++i;
                continue;
            }
            if (args[i].equals("-sentenceLengths")) {
                sentenceLengths = true;
                ++i;
                continue;
            }
            if (args[i].equals("-removeCodeTrees")) {
                removeCodeTrees = true;
                ++i;
                continue;
            }
            if (args[i].equals("-filter")) {
                Predicate filter = (Predicate)ReflectionLoading.loadByReflection(args[i + 1], new Object[0]);
                filters.add(filter);
                i += 2;
                continue;
            }
            System.err.println("Unknown option: " + args[i]);
            ++i;
        }
        int maxLength = maxL;
        int minLength = minL;
        if (trf == null) {
            trf = in -> new PennTreeReader(in, new LabeledScoredTreeFactory());
        }
        Treebank treebank = normalized ? new DiskTreebank() : new DiskTreebank(trf, encoding);
        for (Predicate filter : filters) {
            treebank = new FilteringTreebank(treebank, filter);
        }
        PrintWriter pw = new PrintWriter((Writer)new OutputStreamWriter((OutputStream)System.out, encoding), true);
        if (i + 1 < args.length) {
            treebank.loadPath(args[i], (FileFilter)new NumberRangesFileFilter(args[i + 1], true));
        } else if (i < args.length) {
            treebank.loadPath(args[i], suffix, true);
        } else {
            Treebanks.printUsage();
            return;
        }
        if (annotationOptions != null) {
            System.err.println("annotationOptions not yet implemented");
        }
        if (summary) {
            System.out.println(treebank.textualSummary());
        }
        if (sentenceLengths) {
            Treebanks.sentenceLengths(treebank, args[i], i + 1 < args.length ? args[i + 1] : null, pw);
        }
        if (punct) {
            Treebanks.printPunct(treebank, tlp, pw);
        }
        if (correct) {
            treebank = new EnglishPTBTreebankCorrector().transformTrees(treebank);
        }
        if (pennPrintTrees) {
            treebank.apply(tree -> {
                int length = tree.yield().size();
                if (length >= minLength && length <= maxLength) {
                    tree.pennPrint(pw);
                    pw.println();
                }
            });
        }
        if (oneLinePrint) {
            treebank.apply(tree -> {
                int length = tree.yield().size();
                if (length >= minLength && length <= maxLength) {
                    pw.println(tree);
                }
            });
        }
        if (printWords) {
            tn = new BobChrisTreeNormalizer();
            treebank.apply(tree -> {
                Tree tPrime = tn.normalizeWholeTree(tree, tree.treeFactory());
                int length = tPrime.yield().size();
                if (length >= minLength && length <= maxLength) {
                    pw.println(Sentence.listToString(tPrime.taggedYield()));
                }
            });
        }
        if (printTaggedWords) {
            tn = new BobChrisTreeNormalizer();
            treebank.apply(tree -> {
                Tree tPrime = tn.normalizeWholeTree(tree, tree.treeFactory());
                pw.println(Sentence.listToString(tPrime.taggedYield(), false, "_"));
            });
        }
        if (countTaggings) {
            Treebanks.countTaggings(treebank, pw);
        }
        if (yield) {
            treebank.apply(tree -> {
                int length = tree.yield().size();
                if (length >= minLength && length <= maxLength) {
                    pw.println(Sentence.listToString(tree.yield()));
                }
            });
        }
        if (decimate) {
            BufferedWriter w1 = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(decimatePrefix + "-train.txt"), encoding));
            BufferedWriter w2 = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(decimatePrefix + "-dev.txt"), encoding));
            BufferedWriter w3 = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(decimatePrefix + "-test.txt"), encoding));
            treebank.decimate(w1, w2, w3);
        }
        if (timing) {
            Treebanks.runTiming(treebank);
        }
        if (removeCodeTrees) {
            if (new File(args[i]).isDirectory()) {
                throw new RuntimeException("-removeCodeTrees only works on a single file");
            }
            String treebankStr = IOUtils.slurpFile(args[i]);
            treebankStr = treebankStr.replaceAll("\\( \\(CODE <[^>]+>\\)\\)", "");
            OutputStreamWriter w = new OutputStreamWriter((OutputStream)new FileOutputStream(args[i]), encoding);
            w.write(treebankStr);
            ((Writer)w).close();
        }
    }

    private static void printPunct(Treebank treebank, TreebankLanguagePack tlp, PrintWriter pw) {
        if (tlp == null) {
            System.err.println("The -punct option requires you to specify -tlp");
        } else {
            Predicate<String> punctTagFilter = tlp.punctuationTagAcceptFilter();
            for (Tree t : treebank) {
                ArrayList<TaggedWord> tws = t.taggedYield();
                for (TaggedWord tw : tws) {
                    if (!punctTagFilter.test(tw.tag())) continue;
                    pw.println(tw);
                }
            }
        }
    }

    private static void countTaggings(Treebank tb, PrintWriter pw) {
        TwoDimensionalCounter wtc = new TwoDimensionalCounter();
        tb.apply(tree -> {
            ArrayList<TaggedWord> tags = tree.taggedYield();
            for (TaggedWord tag : tags) {
                wtc.incrementCount(tag.word(), tag.tag());
            }
        });
        for (String key : wtc.firstKeySet()) {
            pw.print(key);
            pw.print('\t');
            Counter ctr = wtc.getCounter(key);
            for (String k2 : ctr.keySet()) {
                pw.print(k2 + '\t' + ctr.getCount(k2) + '\t');
            }
            pw.println();
        }
    }

    private static void runTiming(Treebank treebank) {
        System.out.println();
        Timing.startTime();
        int num = 0;
        for (Tree t : treebank) {
            num += t.yield().size();
        }
        Timing.endTime("traversing corpus, counting words with iterator");
        System.err.println("There were " + num + " words in the treebank.");
        treebank.apply(new TreeVisitor(){
            int num = 0;

            @Override
            public void visitTree(Tree t) {
                this.num += t.yield().size();
            }
        });
        System.err.println();
        Timing.endTime("traversing corpus, counting words with TreeVisitor");
        System.err.println("There were " + num + " words in the treebank.");
        System.err.println();
        Timing.startTime();
        System.err.println("This treebank contains " + treebank.size() + " trees.");
        Timing.endTime("size of corpus");
    }

    private static void sentenceLengths(Treebank treebank, String name, String range, PrintWriter pw) {
        int maxleng = 150;
        int[] lengthCounts = new int[152];
        int numSents = 0;
        int longestSeen = 0;
        int totalWords = 0;
        String longSent = "";
        double median = 0.0;
        DecimalFormat nf = new DecimalFormat("0.0");
        boolean foundMedian = false;
        for (Tree t : treebank) {
            ++numSents;
            int len = t.yield().size();
            if (len <= 150) {
                int n = len;
                lengthCounts[n] = lengthCounts[n] + 1;
            } else {
                lengthCounts[151] = lengthCounts[151] + 1;
            }
            totalWords += len;
            if (len <= longestSeen) continue;
            longestSeen = len;
            longSent = t.toString();
        }
        System.out.print("Files " + name + ' ');
        if (range != null) {
            System.out.print(range + ' ');
        }
        System.out.println("consists of " + numSents + " sentences");
        int runningTotal = 0;
        for (int i = 0; i <= 150; ++i) {
            System.out.println("  " + lengthCounts[i] + " of length " + i + " (running total: " + (runningTotal += lengthCounts[i]) + ')');
            if (foundMedian || runningTotal <= numSents / 2) continue;
            if (numSents % 2 == 0 && runningTotal == numSents / 2 + 1) {
                int j;
                for (j = i - 1; j > 0 && lengthCounts[j] == 0; --j) {
                }
                median = ((double)i + (double)j) / 2.0;
            } else {
                median = i;
            }
            foundMedian = true;
        }
        if (lengthCounts[151] > 0) {
            System.out.println("  " + lengthCounts[151] + " of length " + 151 + " to " + longestSeen + " (running total: " + (runningTotal += lengthCounts[151]) + ')');
        }
        System.out.println("Average length: " + nf.format((double)totalWords / (double)numSents) + "; median length: " + nf.format(median));
        System.out.println("Longest sentence is of length: " + longestSeen);
        pw.println(longSent);
    }
}

