/*
 * Decompiled with CFR 0.152.
 */
package de.lmu.ifi.dbs.elki.algorithm.clustering.em;

import de.lmu.ifi.dbs.elki.algorithm.AbstractAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.ClusteringAlgorithm;
import de.lmu.ifi.dbs.elki.algorithm.clustering.em.EMClusterModel;
import de.lmu.ifi.dbs.elki.algorithm.clustering.em.EMClusterModelFactory;
import de.lmu.ifi.dbs.elki.algorithm.clustering.em.MultivariateGaussianModelFactory;
import de.lmu.ifi.dbs.elki.algorithm.clustering.kmeans.KMeans;
import de.lmu.ifi.dbs.elki.data.Cluster;
import de.lmu.ifi.dbs.elki.data.Clustering;
import de.lmu.ifi.dbs.elki.data.NumberVector;
import de.lmu.ifi.dbs.elki.data.model.MeanModel;
import de.lmu.ifi.dbs.elki.data.type.SimpleTypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeInformation;
import de.lmu.ifi.dbs.elki.data.type.TypeUtil;
import de.lmu.ifi.dbs.elki.database.Database;
import de.lmu.ifi.dbs.elki.database.datastore.DataStoreUtil;
import de.lmu.ifi.dbs.elki.database.datastore.WritableDataStore;
import de.lmu.ifi.dbs.elki.database.ids.DBIDIter;
import de.lmu.ifi.dbs.elki.database.ids.DBIDRef;
import de.lmu.ifi.dbs.elki.database.ids.DBIDUtil;
import de.lmu.ifi.dbs.elki.database.ids.DBIDs;
import de.lmu.ifi.dbs.elki.database.ids.HashSetModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.ids.ModifiableDBIDs;
import de.lmu.ifi.dbs.elki.database.relation.MaterializedRelation;
import de.lmu.ifi.dbs.elki.database.relation.Relation;
import de.lmu.ifi.dbs.elki.distance.distancefunction.minkowski.SquaredEuclideanDistanceFunction;
import de.lmu.ifi.dbs.elki.logging.Logging;
import de.lmu.ifi.dbs.elki.result.AbstractHierarchicalResult;
import de.lmu.ifi.dbs.elki.utilities.Alias;
import de.lmu.ifi.dbs.elki.utilities.documentation.Description;
import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
import de.lmu.ifi.dbs.elki.utilities.documentation.Title;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.AbstractParameterizer;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.ObjectParameter;
import java.util.ArrayList;
import java.util.List;

@Title(value="EM-Clustering: Clustering by Expectation Maximization")
@Description(value="Cluster data via Gaussian mixture modeling and the EM algorithm")
@Reference(authors="A. P. Dempster, N. M. Laird, D. B. Rubin", title="Maximum Likelihood from Incomplete Data via the EM algorithm", booktitle="Journal of the Royal Statistical Society, Series B, 39(1), 1977, pp. 1-31", url="http://www.jstor.org/stable/2984875")
@Alias(value={"de.lmu.ifi.dbs.elki.algorithm.clustering.EM"})
public class EM<V extends NumberVector, M extends MeanModel>
extends AbstractAlgorithm<Clustering<M>>
implements ClusteringAlgorithm<Clustering<M>> {
    private static final Logging LOG = Logging.getLogger(EM.class);
    private int k;
    private double delta;
    private EMClusterModelFactory<V, M> mfactory;
    private int maxiter;
    private boolean soft;
    private static final double MIN_LOGLIKELIHOOD = -100000.0;
    public static final SimpleTypeInformation<double[]> SOFT_TYPE = new SimpleTypeInformation<double[]>(double[].class);

    public EM(int n, double d, EMClusterModelFactory<V, M> eMClusterModelFactory, int n2, boolean bl) {
        this.k = n;
        this.delta = d;
        this.mfactory = eMClusterModelFactory;
        this.maxiter = n2;
        this.setSoft(bl);
    }

    public Clustering<M> run(Database database, Relation<V> relation) {
        if (relation.size() == 0) {
            throw new IllegalArgumentException("database empty: must contain elements");
        }
        if (LOG.isVerbose()) {
            LOG.verbose("initializing " + this.k + " models");
        }
        List<EMClusterModel<M>> list = this.mfactory.buildInitialModels(database, relation, this.k, SquaredEuclideanDistanceFunction.STATIC);
        WritableDataStore<double[]> writableDataStore = DataStoreUtil.makeStorage(relation.getDBIDs(), 10, double[].class);
        double d = EM.assignProbabilitiesToInstances(relation, list, writableDataStore);
        if (LOG.isVerbose()) {
            LOG.verbose("iterating EM");
        }
        if (LOG.isVerbose()) {
            LOG.verbose("iteration 0 - expectation value: " + d);
        }
        for (int i = 1; i <= this.maxiter || this.maxiter < 0; ++i) {
            double d2 = d;
            EM.recomputeCovarianceMatrices(relation, writableDataStore, list);
            d = EM.assignProbabilitiesToInstances(relation, list, writableDataStore);
            if (LOG.isVerbose()) {
                LOG.verbose("iteration " + i + " - expectation value: " + d);
            }
            if (Math.abs(d2 - d) <= this.delta || d2 > d) break;
        }
        if (LOG.isVerbose()) {
            LOG.verbose("assigning clusters");
        }
        ArrayList<HashSetModifiableDBIDs> arrayList = new ArrayList<HashSetModifiableDBIDs>(this.k);
        for (int i = 0; i < this.k; ++i) {
            arrayList.add(DBIDUtil.newHashSet());
        }
        Object object = relation.iterDBIDs();
        while (object.valid()) {
            double[] dArray = (double[])writableDataStore.get((DBIDRef)object);
            int n = 0;
            double d3 = 0.0;
            for (int i = 0; i < this.k; ++i) {
                if (!(dArray[i] > d3)) continue;
                n = i;
                d3 = dArray[i];
            }
            ((ModifiableDBIDs)arrayList.get(n)).add((DBIDRef)object);
            object.advance();
        }
        object = new Clustering("EM Clustering", "em-clustering");
        for (int i = 0; i < this.k; ++i) {
            Cluster<M> cluster = new Cluster<M>((DBIDs)arrayList.get(i), list.get(i).finalizeCluster());
            ((Clustering)object).addToplevelCluster(cluster);
        }
        if (this.isSoft()) {
            ((AbstractHierarchicalResult)object).addChildResult(new MaterializedRelation<double[]>("cluster assignments", "em-soft-score", SOFT_TYPE, writableDataStore, relation.getDBIDs()));
        } else {
            writableDataStore.destroy();
        }
        return object;
    }

    /*
     * WARNING - void declaration
     */
    public static void recomputeCovarianceMatrices(Relation<? extends NumberVector> relation, WritableDataStore<double[]> writableDataStore, List<? extends EMClusterModel<?>> list) {
        Object object;
        Object object2;
        for (EMClusterModel<?> object32 : list) {
            object32.beginEStep();
        }
        Object object4 = new double[list.size()];
        DBIDIter n = relation.iterDBIDs();
        while (n.valid()) {
            object2 = (double[])writableDataStore.get(n);
            object = relation.get(n);
            int n2 = 0;
            for (EMClusterModel<?> eMClusterModel : list) {
                Object object3 = object2[n2];
                if (object3 > 0.0) {
                    eMClusterModel.updateE((NumberVector)object, (double)object3);
                }
                Object object5 = object4;
                int n3 = n2++;
                object5[n3] = object5[n3] + object3;
            }
            n.advance();
        }
        boolean bl = false;
        object2 = list.iterator();
        while (object2.hasNext()) {
            void var4_7;
            object = (EMClusterModel)object2.next();
            object.finalizeEStep();
            object.setWeight((double)(object4[var4_7] / (double)relation.size()));
            ++var4_7;
        }
    }

    public static double assignProbabilitiesToInstances(Relation<? extends NumberVector> relation, List<? extends EMClusterModel<?>> list, WritableDataStore<double[]> writableDataStore) {
        int n = list.size();
        double d = 0.0;
        DBIDIter dBIDIter = relation.iterDBIDs();
        while (dBIDIter.valid()) {
            NumberVector numberVector = relation.get(dBIDIter);
            double[] dArray = new double[n];
            int n2 = 0;
            for (EMClusterModel<?> eMClusterModel : list) {
                dArray[n2] = eMClusterModel.estimateDensity(numberVector);
                ++n2;
            }
            double d2 = 0.0;
            for (int i = 0; i < n; ++i) {
                d2 += dArray[i];
            }
            double d3 = Math.max(Math.log(d2), -100000.0);
            d += d3 == d3 ? d3 : 0.0;
            double[] dArray2 = new double[n];
            if (d2 > 0.0) {
                for (int i = 0; i < n; ++i) {
                    dArray2[i] = dArray[i] / d2;
                }
            }
            writableDataStore.put(dBIDIter, dArray2);
            dBIDIter.advance();
        }
        return d / (double)relation.size();
    }

    @Override
    public TypeInformation[] getInputTypeRestriction() {
        return TypeUtil.array(TypeUtil.NUMBER_VECTOR_FIELD);
    }

    @Override
    protected Logging getLogger() {
        return LOG;
    }

    public boolean isSoft() {
        return this.soft;
    }

    public void setSoft(boolean bl) {
        this.soft = bl;
    }

    public static class Parameterizer<V extends NumberVector, M extends MeanModel>
    extends AbstractParameterizer {
        public static final OptionID K_ID = new OptionID("em.k", "The number of clusters to find.");
        public static final OptionID DELTA_ID = new OptionID("em.delta", "The termination criterion for maximization of E(M): E(M) - E(M') < em.delta");
        public static final OptionID INIT_ID = new OptionID("em.model", "Model factory.");
        protected int k;
        protected double delta;
        protected EMClusterModelFactory<V, M> initializer;
        protected int maxiter = -1;

        @Override
        protected void makeOptions(Parameterization parameterization) {
            IntParameter intParameter;
            DoubleParameter doubleParameter;
            ObjectParameter objectParameter;
            super.makeOptions(parameterization);
            IntParameter intParameter2 = new IntParameter(K_ID);
            intParameter2.addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
            if (parameterization.grab(intParameter2)) {
                this.k = (Integer)intParameter2.getValue();
            }
            if (parameterization.grab(objectParameter = new ObjectParameter(INIT_ID, (Class<?>)EMClusterModelFactory.class, MultivariateGaussianModelFactory.class))) {
                this.initializer = (EMClusterModelFactory)objectParameter.instantiateClass(parameterization);
            }
            if (parameterization.grab(doubleParameter = (DoubleParameter)new DoubleParameter(DELTA_ID, 1.0E-7).addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE))) {
                this.delta = (Double)doubleParameter.getValue();
            }
            if (parameterization.grab(intParameter = (IntParameter)((IntParameter)new IntParameter(KMeans.MAXITER_ID).addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_INT)).setOptional(true))) {
                this.maxiter = (Integer)intParameter.getValue();
            }
        }

        @Override
        protected EM<V, M> makeInstance() {
            return new EM<V, M>(this.k, this.delta, this.initializer, this.maxiter, false);
        }
    }
}

