org.apache.commons.math3.distribution.EnumeratedIntegerDistribution Java Examples

The following examples show how to use org.apache.commons.math3.distribution.EnumeratedIntegerDistribution. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SampleUtil.java    From JavaBase with MIT License 6 votes vote down vote up
private static <T extends SampleAble> List<T> sampleResult(List<T> list, int count,
    BiFunction<EnumeratedIntegerDistribution, Integer, List<Integer>> function) {
  if (Objects.isNull(list) || list.isEmpty()) {
    return new ArrayList<>();
  }
  if (list.size() < count) {
    log.warn("data less than count: data size={} count={}", list.size(), count);
    return new ArrayList<>();
  }

  Map<Integer, T> data = IntStream.range(0, list.size()).boxed()
      .collect(Collectors.toMap(i -> i, list::get));

  EnumeratedIntegerDistribution distribution = generateEnumerated(list, data);

  List<Integer> indexes = function.apply(distribution, count);
  return indexes.stream().map(data::get).collect(Collectors.toList());
}
 
Example #2
Source File: SampleUtil.java    From JavaBase with MIT License 6 votes vote down vote up
private static List<Integer> sampleWithNoRepeated(EnumeratedIntegerDistribution distribution,
    int size) {
  if (Objects.isNull(distribution) || size <= 0) {
    return new ArrayList<>();
  }

  Set<Integer> unique = new HashSet<>(size);
  int count = 0;
  while (unique.size() < size) {
    unique.add(distribution.sample());
    count++;
  }

  log.debug("loop: count={}", count);
  return new ArrayList<>(unique);
}
 
Example #3
Source File: MarkovChainEvaluator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public MarkovChain(Matrix matrix, int state) throws IOException {
  double[][] data = matrix.getData();

  if(data.length != data[0].length) {
    throw new IOException("markovChain must be initialized with a square matrix.");
  }

  this.distributions = new EnumeratedIntegerDistribution[data.length];

  if(state > -1) {
    this.state = state;
  } else {
    this.state = new Random().nextInt(data.length);
  }

  for(int i=0; i<data.length; i++) {
    double[] probabilities = data[i];

    //Create the states array needed by the enumerated distribution
    int[] states = MathArrays.sequence(data.length, 0, 1);
    distributions[i] = new EnumeratedIntegerDistribution(states, probabilities);
  }
}
 
Example #4
Source File: SampleUtil.java    From JavaBase with MIT License 5 votes vote down vote up
private static <T extends SampleAble> T sampleOneWithNoReturn(List<T> list) {
  Map<Integer, T> data = IntStream.range(0, list.size()).boxed()
      .collect(Collectors.toMap(i -> i, list::get));

  EnumeratedIntegerDistribution distribution = generateEnumerated(list, data);
  int index = distribution.sample();
  T t = data.get(index);
  data.remove(index);
  list.remove(index);
  return t;
}
 
Example #5
Source File: SampleUtil.java    From JavaBase with MIT License 5 votes vote down vote up
private static List<Integer> sampleWithRepeated(EnumeratedIntegerDistribution distribution,
    int size) {
  List<Integer> result = new ArrayList<>();
  for (int i = 0; i < size; i++) {
    result.add(distribution.sample());
  }
  return result;
}
 
Example #6
Source File: SampleUtil.java    From JavaBase with MIT License 5 votes vote down vote up
private static <T extends SampleAble> EnumeratedIntegerDistribution generateEnumerated(
    List<T> list, Map<Integer, T> tempMap) {

  double sum = list.stream().mapToInt(SampleAble::getWeight).sum();

  List<Double> probList = list.stream().map(SampleAble::getWeight).map(value -> value / sum)
      .collect(Collectors.toList());

  return new EnumeratedIntegerDistribution(
      tempMap.keySet().stream().mapToInt(Integer::intValue).toArray(),
      probList.stream().mapToDouble(Double::doubleValue).toArray()
  );
}
 
Example #7
Source File: PartitionTest.java    From sequence-mining with GNU General Public License v3.0 5 votes vote down vote up
@Test
public void testInterleavingGenerator() {

	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	final Multiset<Sequence> seqsI = HashMultiset.create();
	seqsI.add(new Sequence(1, 2, 3));
	seqsI.add(new Sequence(4, 5));
	seqsI.add(new Sequence(6));
	seqsI.add(new Sequence(7));

	final HashMap<Sequence, Double> seqsG = new HashMap<>();
	for (final Sequence seq : seqsI.elementSet()) {
		seqsG.put(seq, 1.0);
	}

	final Map<Sequence, EnumeratedIntegerDistribution> countDists = new HashMap<>();
	final EnumeratedIntegerDistribution oneRepeat = new EnumeratedIntegerDistribution(randomC, new int[] { 1 },
			new double[] { 1.0 });
	countDists.put(new Sequence(1, 2, 3), oneRepeat);
	countDists.put(new Sequence(4, 5), oneRepeat);
	countDists.put(new Sequence(6), oneRepeat);
	countDists.put(new Sequence(7), oneRepeat);

	final HashSet<Transaction> transG = new HashSet<>();
	for (int i = 0; i < 700000; i++)
		transG.add(
				TransactionGenerator.sampleFromDistribution(random, seqsG, countDists, new HashMap<>(), randomI));
	// Note that upper bound is exact when there are no repetitions
	assertEquals(transG.size(), modP(seqsI.iterator()), EPS);
}
 
Example #8
Source File: BM.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * sample a vector from the mixture distribution
 * @return
 */
public Vector sample(){
    Vector vector = new DenseVector(dimension);
    // first sample cluster
    int[] clusters = IntStream.range(0,numClusters).toArray();
    EnumeratedIntegerDistribution enumeratedIntegerDistribution = new EnumeratedIntegerDistribution(clusters,mixtureCoefficients);
    int cluster = enumeratedIntegerDistribution.sample();
    // then sample each dimension
    for (int d=0;d<dimension;d++){
        vector.set(d,distributions[cluster][d].sample());
    }
    return vector;
}
 
Example #9
Source File: KMeansPlusPlus.java    From pyramid with Apache License 2.0 5 votes vote down vote up
public void initialize(boolean print){
    if (print){
        System.out.println("initialize");
    }

    int dataIndex = Sampling.intUniform(0,dataSet.getNumDataPoints()-1);
    centers.add(dataSet.getRow(dataIndex));
    pickedIds.add(dataIndex);
    if (print){
        System.out.println("randomly pick instance "+(dataIndex+1)+" as the initial centroid for cluster "+centers.size());
    }

    while(centers.size()<numComponents){
        updateDistance();
        double sum = MathUtil.arraySum(distances);
        for (int i=0;i<distances.length;i++){
            distances[i] /= sum;
        }
        int[] indices = IntStream.range(0, dataSet.getNumDataPoints()).toArray();
        EnumeratedIntegerDistribution dis = new EnumeratedIntegerDistribution(indices, distances);
        int sample = dis.sample();
        centers.add(dataSet.getRow(sample));
        pickedIds.add(sample);
        if (print){
            System.out.println("randomly pick instance "+(sample+1)+" as the initial centroid for cluster "+centers.size());
        }

    }
}
 
Example #10
Source File: ConsumerVerifier.java    From hermes with Apache License 2.0 4 votes vote down vote up
@Before
public void before() {
	int[] nackIndexes = new int[] { 0, 1 };
	double[] nackDis = new double[] { 0.05, 0.95 };
	nackRnd = new EnumeratedIntegerDistribution(nackIndexes, nackDis);
}
 
Example #11
Source File: ClassifierWeightedSampling.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public List<Pair<ILabeledInstance, Double>> calculateAcceptanceThresholdsWithTrainedPilot(final D dataset, final IClassifier pilot) {

	/* compute mean value and base values the instances must have */
	double mid = this.getMean(dataset);
	double baseValue = 10 * mid + 1; // arbitrary value, there most likely be better one
	double addForRightClassification = baseValue + 2 * mid; // like baseValue

	/* determine probability for each index to be chosen */
	double[] weights = new double[dataset.size()];
	for (int i = 0; i < weights.length; i++) {
		try {
			IPrediction prediction = pilot.predict(dataset.get(i));
			if (prediction.getLabelWithHighestProbability() == dataset.get(i).getLabel()) {
				weights[i] = addForRightClassification - prediction.getProbabilityOfLabel(dataset.get(i).getLabel());
			} else {
				weights[i] = baseValue + prediction.getProbabilityOfLabel(prediction.getLabelWithHighestProbability());
			}
		} catch (Exception e) {
			weights[i] = 0;
		}
	}
	int[] indices = IntStream.range(0, this.getInput().size()).toArray();
	EnumeratedIntegerDistribution finalDistribution = new EnumeratedIntegerDistribution(indices, weights);
	finalDistribution.reseedRandomGenerator(this.rand.nextLong());

	/* now draw <number of samples> many indices whose threshold will be set to 1 */
	int n = this.getSampleSize();
	Set<Integer> consideredIndices = new HashSet<>();
	for (int i = 0; i < n; i++) {
		int index;
		do {
			index = finalDistribution.sample();
		} while (consideredIndices.contains(index));
		consideredIndices.add(index);
	}

	/* now create the list of pairs */
	List<Pair<ILabeledInstance, Double>> thresholds = new ArrayList<>();
	int m = dataset.size();
	for (int i = 0; i < m; i++) {
		ILabeledInstance inst = dataset.get(i);
		double threshold = consideredIndices.contains(i) ? 1 : 0;
		thresholds.add(new Pair<>(inst, threshold));
	}
	return thresholds;
}
 
Example #12
Source File: TransactionGenerator.java    From sequence-mining with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Generate transactions from set of interesting sequences
 *
 * @return set of sequences added to transaction
 */
public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences,
		final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile)
				throws IOException {

	// Set random number seeds
	final Random random = new Random(1);
	final Random randomI = new Random(10);
	final RandomGenerator randomC = new JDKRandomGenerator();
	randomC.setSeed(100);

	// Storage for sequences actually added
	final HashMap<Sequence, Double> addedSequences = new HashMap<>();

	// Set output file
	final PrintWriter out = new PrintWriter(outFile, "UTF-8");

	// Add to distribution class for easy sampling
	final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>();
	for (final Sequence seq : sequences.keySet()) {
		final List<Integer> singletons = new ArrayList<>();
		final List<Double> probs = new ArrayList<>();
		for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) {
			singletons.add(entry.getKey());
			probs.add(entry.getValue());
		}
		final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC,
				Ints.toArray(singletons), Doubles.toArray(probs));
		dists.put(seq, dist);
	}

	// Generate transaction database
	int count = 0;
	while (count < noTransactions) {

		// Generate transaction from distribution
		final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI);
		for (final int item : transaction) {
			out.print(item + " -1 ");
		}
		if (!transaction.isEmpty()) {
			out.print("-2");
			out.println();
			count++;
		}

	}
	out.close();

	// Print file to screen
	if (VERBOSE) {
		final FileReader reader = new FileReader(outFile);
		final LineIterator it = new LineIterator(reader);
		while (it.hasNext()) {
			System.out.println(it.nextLine());
		}
		LineIterator.closeQuietly(it);
	}

	return addedSequences;
}
 
Example #13
Source File: MultiLabelSynthesizer.java    From pyramid with Apache License 2.0 4 votes vote down vote up
/**
 * y0: w=(0,1)
 * y1: w=(1,1)
 * y2: w=(1,0)
 * y3: w=(1,-1)
 * @param numData
 * @return
 */
public static MultiLabelClfDataSet flipOneNonUniform(int numData){
    int numClass = 4;
    int numFeature = 2;

    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature)
            .numClasses(numClass)
            .numDataPoints(numData)
            .build();

    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k=0;k<numClass;k++){
        Vector vector = new DenseVector(numFeature);
        weights[k] = vector;
    }

    weights[0].set(0,0);
    weights[0].set(1,1);

    weights[1].set(0, 1);
    weights[1].set(1, 1);

    weights[2].set(0, 1);
    weights[2].set(1, 0);

    weights[3].set(0,1);
    weights[3].set(1,-1);


    // generate features
    for (int i=0;i<numData;i++){
        for (int j=0;j<numFeature;j++){
            dataSet.setFeatureValue(i,j,Sampling.doubleUniform(-1, 1));
        }
    }

    // assign labels
    for (int i=0;i<numData;i++){
        for (int k=0;k<numClass;k++){
            double dot = weights[k].dot(dataSet.getRow(i));
            if (dot>=0){
                dataSet.addLabel(i,k);
            }
        }
    }

    int[] indices = {0,1,2,3};
    double[] probs = {0.4,0.2,0.2,0.2};
    IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices,probs);

    // flip
    for (int i=0;i<numData;i++){
        int toChange = distribution.sample();
        MultiLabel label = dataSet.getMultiLabels()[i];
        if (label.matchClass(toChange)){
            label.removeLabel(toChange);
        } else {
            label.addLabel(toChange);
        }

    }


    return dataSet;
}
 
Example #14
Source File: MultiLabelSynthesizer.java    From pyramid with Apache License 2.0 4 votes vote down vote up
/**
 * C0, y0: w=(0,1)
 * C0, y1: w=(1,1)
 * C1, y0: w=(1,0)
 * C1, y1: w=(1,-1)
 * @return
 */
public static MultiLabelClfDataSet sampleFromMix(){
    int numData = 10000;
    int numClass = 2;
    int numFeature = 2;
    int numClusters = 2;
    double[] proportions = {0.4,0.6};
    int[] indices = {0,1};

    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder()
            .numFeatures(numFeature)
            .numClasses(numClass)
            .numDataPoints(numData)
            .build();

    // generate weights
    Vector[][] weights = new Vector[numClusters][numClass];
    for (int c=0;c<numClusters;c++){
        for (int l=0;l<numClass;l++){
            Vector vector = new DenseVector(numFeature);
            weights[c][l] = vector;
        }
    }


    weights[0][0].set(0, 0);
    weights[0][0].set(1, 1);

    weights[0][1].set(0, 1);
    weights[0][1].set(1, 1);


    weights[1][0].set(0, 1);
    weights[1][0].set(1, 0);

    weights[1][1].set(0, 1);
    weights[1][1].set(1,-1);

    // generate features
    for (int i=0;i<numData;i++){
        for (int j=0;j<numFeature;j++){
            dataSet.setFeatureValue(i,j,Sampling.doubleUniform(-1, 1));
        }
    }
    IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices,proportions);
    // assign labels
    for (int i=0;i<numData;i++){
        int cluster = distribution.sample();
        System.out.println("cluster "+cluster);
        for (int l=0;l<numClass;l++){
            System.out.println("row = "+dataSet.getRow(i));
            System.out.println("weight = "+ weights[cluster][l]);
            double dot = weights[cluster][l].dot(dataSet.getRow(i));
            System.out.println("dot = "+dot);
            if (dot>=0){
                dataSet.addLabel(i,l);
            }
        }
    }

    return dataSet;
}
 
Example #15
Source File: SamplingPrediction.java    From pyramid with Apache License 2.0 4 votes vote down vote up
public static MultiLabel predict(double[] probabilities, List<MultiLabel> candidates){
    int[] s = IntStream.range(0, probabilities.length).toArray();
    EnumeratedIntegerDistribution distribution = new EnumeratedIntegerDistribution(s, probabilities);
    int i = distribution.sample();
    return candidates.get(i);
}