/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.supervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.ContingencyTables;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SpecialFunctions;
import weka.core.Statistics;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.WeightedAttributesHandler;
import weka.core.WeightedInstancesHandler;
import weka.filters.SimpleBatchFilter;
import weka.filters.SupervisedFilter;

public class MergeNominalValues
extends SimpleBatchFilter
implements SupervisedFilter,
WeightedInstancesHandler,
WeightedAttributesHandler,
TechnicalInformationHandler {
    static final long serialVersionUID = 7447337831221353842L;
    protected double m_SigLevel = 0.05;
    protected Range m_SelectCols = new Range("first-last");
    protected int[] m_SelectedAttributes;
    protected boolean[] m_AttToBeModified;
    protected int[][] m_Indicators;
    protected boolean m_UseShortIdentifiers = false;

    @Override
    public String globalInfo() {
        return "Merges values of all nominal attributes among the specified attributes, excluding the class attribute, using the CHAID method, but without considering re-splitting of merged subsets. It implements Steps 1 and 2 described by Kass (1980), see\n\n" + this.getTechnicalInformation().toString() + "\n\nOnce attribute values have been merged, a chi-squared test using the Bonferroni correction is applied to check if the resulting attribute is a valid predictor, based on the Bonferroni multiplier in Equation 3.2 in Kass (1980). If an attribute does not pass this test, all remaining values (if any) are merged. Nevertheless, useless predictors can slip through without being fully merged, e.g. identifier attributes.\n\nThe code applies the Yates correction when the chi-squared statistic is computed.\n\nNote that the algorithm is quadratic in the number of attribute values for an attribute.";
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.ARTICLE);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Gordon V. Kass");
        result.setValue(TechnicalInformation.Field.TITLE, "An Exploratory Technique for Investigating Large Quantities of Categorical Data");
        result.setValue(TechnicalInformation.Field.JOURNAL, "Applied Statistics");
        result.setValue(TechnicalInformation.Field.YEAR, "1980");
        result.setValue(TechnicalInformation.Field.VOLUME, "29");
        result.setValue(TechnicalInformation.Field.NUMBER, "2");
        result.setValue(TechnicalInformation.Field.PAGES, "119-127");
        return result;
    }

    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tThe significance level (default: 0.05).\n", "-L", 1, "-L <double>"));
        result.addElement(new Option("\tSets list of attributes to act on (or its inverse). 'first and 'last' are accepted as well.'\n\tE.g.: first-5,7,9,20-last\n\t(default: first-last)", "R", 1, "-R <range>"));
        result.addElement(new Option("\tInvert matching sense (i.e. act on all attributes not specified in list)", "V", 0, "-V"));
        result.addElement(new Option("\tUse short identifiers for merged subsets.", "O", 0, "-O"));
        result.addAll(Collections.list(super.listOptions()));
        return result.elements();
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-L");
        result.add("" + this.getSignificanceLevel());
        if (!this.getAttributeIndices().equals("")) {
            // empty if block
        }
        result.add("-R");
        result.add(this.getAttributeIndices());
        if (this.getInvertSelection()) {
            result.add("-V");
        }
        if (this.getUseShortIdentifiers()) {
            result.add("-O");
        }
        Collections.addAll(result, super.getOptions());
        return result.toArray(new String[result.size()]);
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String significanceLevelString = Utils.getOption('L', options);
        if (significanceLevelString.length() != 0) {
            this.setSignificanceLevel(Double.parseDouble(significanceLevelString));
        } else {
            this.setSignificanceLevel(0.05);
        }
        String tmpStr = Utils.getOption('R', options);
        if (tmpStr.length() != 0) {
            this.setAttributeIndices(tmpStr);
        } else {
            this.setAttributeIndices("first-last");
        }
        this.setInvertSelection(Utils.getFlag('V', options));
        this.setUseShortIdentifiers(Utils.getFlag('O', options));
        super.setOptions(options);
        Utils.checkForRemainingOptions(options);
    }

    public String significanceLevelTipText() {
        return "The significance level for the chi-squared test used to decide when to stop merging.";
    }

    public double getSignificanceLevel() {
        return this.m_SigLevel;
    }

    public void setSignificanceLevel(double sF) {
        this.m_SigLevel = sF;
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on (or its inverse). This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_SelectCols.getRanges();
    }

    public void setAttributeIndices(String rangeList) {
        this.m_SelectCols.setRanges(rangeList);
    }

    public void setAttributeIndicesArray(int[] attributes) {
        this.setAttributeIndices(Range.indicesToRangeList(attributes));
    }

    public String invertSelectionTipText() {
        return "Determines whether selected attributes are to be acted on or all other attributes are used instead.";
    }

    public boolean getInvertSelection() {
        return this.m_SelectCols.getInvert();
    }

    public void setInvertSelection(boolean invert) {
        this.m_SelectCols.setInvert(invert);
    }

    public String useShortIdentifiersTipText() {
        return "Whether to use short identifiers for the merged values.";
    }

    public boolean getUseShortIdentifiers() {
        return this.m_UseShortIdentifiers;
    }

    public void setUseShortIdentifiers(boolean b) {
        this.m_UseShortIdentifiers = b;
    }

    @Override
    public boolean allowAccessToFullInputFormat() {
        return true;
    }

    @Override
    protected Instances determineOutputFormat(Instances inputFormat) {
        int current;
        this.m_SelectCols.setUpper(inputFormat.numAttributes() - 1);
        this.m_SelectedAttributes = this.m_SelectCols.getSelection();
        double[][][] freqs = new double[inputFormat.numAttributes()][][];
        int[] object = this.m_SelectedAttributes;
        int n = object.length;
        for (int i = 0; i < n; ++i) {
            int m_SelectedAttribute;
            current = m_SelectedAttribute = object[i];
            Attribute att = inputFormat.attribute(current);
            if (current == inputFormat.classIndex() || !att.isNominal()) continue;
            freqs[current] = new double[att.numValues()][inputFormat.numClasses()];
        }
        for (Instance inst : inputFormat) {
            for (int m_SelectedAttribute : this.m_SelectedAttributes) {
                int current2 = m_SelectedAttribute;
                if (current2 == inputFormat.classIndex() || !inputFormat.attribute(current2).isNominal() || inst.isMissing(current2) || inst.classIsMissing()) continue;
                double[] dArray = freqs[current2][(int)inst.value(current2)];
                int n2 = (int)inst.classValue();
                dArray[n2] = dArray[n2] + inst.weight();
            }
        }
        this.m_AttToBeModified = new boolean[inputFormat.numAttributes()];
        this.m_Indicators = new int[inputFormat.numAttributes()][];
        for (int m_SelectedAttribute : this.m_SelectedAttributes) {
            current = m_SelectedAttribute;
            if (current == inputFormat.classIndex() || !inputFormat.attribute(current).isNominal()) continue;
            if (this.m_Debug) {
                System.err.println(inputFormat.attribute(current));
            }
            this.m_Indicators[current] = this.mergeValues(freqs[current]);
            if (this.m_Debug) {
                for (int j = 0; j < this.m_Indicators[current].length; ++j) {
                    System.err.print(" - " + this.m_Indicators[current][j] + " - ");
                }
                System.err.println();
            }
            for (int k = 0; k < this.m_Indicators[current].length; ++k) {
                if (this.m_Indicators[current][k] == k) continue;
                this.m_AttToBeModified[current] = true;
            }
        }
        ArrayList<Attribute> arrayList = new ArrayList<Attribute>();
        for (int i = 0; i < inputFormat.numAttributes(); ++i) {
            int current3 = i;
            Attribute att = inputFormat.attribute(current3);
            if (this.m_AttToBeModified[i]) {
                int j;
                int numValues = 0;
                for (int j2 = 0; j2 < this.m_Indicators[current3].length; ++j2) {
                    if (this.m_Indicators[current3][j2] + 1 <= numValues) continue;
                    numValues = this.m_Indicators[current3][j2] + 1;
                }
                ArrayList<StringBuilder> vals = new ArrayList<StringBuilder>(numValues);
                for (j = 0; j < numValues; ++j) {
                    vals.add(null);
                }
                for (j = 0; j < this.m_Indicators[current3].length; ++j) {
                    int index = this.m_Indicators[current3][j];
                    StringBuilder val = (StringBuilder)vals.get(index);
                    if (val == null) {
                        if (this.m_UseShortIdentifiers) {
                            vals.set(index, new StringBuilder("" + (index + 1)));
                            continue;
                        }
                        vals.set(index, new StringBuilder(att.value(j)));
                        continue;
                    }
                    if (this.m_UseShortIdentifiers) continue;
                    ((StringBuilder)vals.get(index)).append("_or_").append(att.value(j));
                }
                ArrayList<String> valsAsStrings = new ArrayList<String>(vals.size());
                for (StringBuilder val : vals) {
                    valsAsStrings.add(val.toString());
                }
                Attribute a = new Attribute(att.name() + "_merged_values", valsAsStrings);
                a.setWeight(att.weight());
                arrayList.add(a);
                continue;
            }
            arrayList.add((Attribute)att.copy());
        }
        Instances data = new Instances(inputFormat.relationName(), arrayList, 0);
        data.setClassIndex(inputFormat.classIndex());
        return data;
    }

    protected double BFfactor(int c, int r) {
        double sum = 0.0;
        double multiplier = 1.0;
        for (int i = 0; i < r; ++i) {
            sum += multiplier * Math.exp((double)c * Math.log(r - i) - (SpecialFunctions.lnFactorial(i) + SpecialFunctions.lnFactorial(r - i)));
            multiplier *= -1.0;
        }
        return sum;
    }

    protected int[] mergeValues(double[][] counts) {
        int[] indicators = new int[((double[][])counts).length];
        for (int i = 0; i < indicators.length; ++i) {
            indicators[i] = i;
        }
        while (((double[][])counts).length > 1) {
            int i;
            double[][] reducedCounts = new double[2][];
            double minVal = Double.MAX_VALUE;
            int toMergeOne = -1;
            int toMergeTwo = -1;
            for (int i2 = 0; i2 < ((double[][])counts).length; ++i2) {
                reducedCounts[0] = counts[i2];
                for (int j = i2 + 1; j < ((double[][])counts).length; ++j) {
                    reducedCounts[1] = counts[j];
                    double val = ContingencyTables.chiVal(reducedCounts, true);
                    if (!(val < minVal)) continue;
                    minVal = val;
                    toMergeOne = i2;
                    toMergeTwo = j;
                }
            }
            if (Statistics.chiSquaredProbability(minVal, reducedCounts[0].length - 1) <= this.m_SigLevel) {
                double val = ContingencyTables.chiVal(counts, true);
                int df = (counts[0].length - 1) * (((double[][])counts).length - 1);
                double originalSig = Statistics.chiSquaredProbability(val, df);
                double adjustedSig = originalSig * this.BFfactor(indicators.length, ((double[][])counts).length);
                if (this.m_Debug) {
                    System.err.println("Original p-value: " + originalSig + "\tAdjusted p-value: " + adjustedSig);
                }
                if (adjustedSig <= this.m_SigLevel) break;
                for (int i3 = 0; i3 < indicators.length; ++i3) {
                    indicators[i3] = 0;
                }
                break;
            }
            double[][] newCounts = new double[((double[][])counts).length - 1][];
            for (i = 0; i < ((double[][])counts).length; ++i) {
                if (i < toMergeTwo) {
                    newCounts[i] = counts[i];
                    continue;
                }
                if (i == toMergeTwo) {
                    for (int k = 0; k < counts[i].length; ++k) {
                        double[] dArray = newCounts[toMergeOne];
                        int n = k;
                        dArray[n] = dArray[n] + counts[i][k];
                    }
                    continue;
                }
                newCounts[i - 1] = counts[i];
            }
            for (i = 0; i < indicators.length; ++i) {
                if (indicators[i] < toMergeTwo) continue;
                if (indicators[i] == toMergeTwo) {
                    indicators[i] = toMergeOne;
                    continue;
                }
                int n = i;
                indicators[n] = indicators[n] - 1;
            }
            counts = newCounts;
        }
        return indicators;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        return result;
    }

    @Override
    protected Instances process(Instances instances) throws Exception {
        Instances result = new Instances(this.getOutputFormat(), instances.numInstances());
        for (int i = 0; i < instances.numInstances(); ++i) {
            Instance inst = instances.instance(i);
            double[] newData = new double[instances.numAttributes()];
            for (int j = 0; j < instances.numAttributes(); ++j) {
                newData[j] = this.m_AttToBeModified[j] && !inst.isMissing(j) ? (double)this.m_Indicators[j][(int)inst.value(j)] : inst.value(j);
            }
            DenseInstance instNew = new DenseInstance(1.0, newData);
            instNew.setDataset(result);
            this.copyValues(instNew, false, inst.dataset(), this.outputFormatPeek());
            result.add(instNew);
        }
        return result;
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 14508 $");
    }

    public static void main(String[] args) {
        MergeNominalValues.runFilter(new MergeNominalValues(), args);
    }
}

