org.dmg.pmml.clustering.ClusteringField Java Examples

The following examples show how to use org.dmg.pmml.clustering.ClusteringField. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0

6 votes

private List<ClusteringField> getCenterClusteringFields(){
	ClusteringModel clusteringModel = getModel();

	List<ClusteringField> clusteringFields = clusteringModel.getClusteringFields();

	List<ClusteringField> result = new ArrayList<>(clusteringFields.size());

	for(int i = 0, max = clusteringFields.size(); i < max; i++){
		ClusteringField clusteringField = clusteringFields.get(i);

		ClusteringField.CenterField centerField = clusteringField.getCenterField();
		switch(centerField){
			case TRUE:
				result.add(clusteringField);
				break;
			case FALSE:
				break;
			default:
				throw new UnsupportedAttributeException(clusteringField, centerField);
		}
	}

	return result;
}

Example #2

Source File: KMeansUpdate.java From oryx with Apache License 2.0

5 votes

private ClusteringModel pmmlClusteringModel(KMeansModel model,
                                            Map<Integer,Long> clusterSizesMap) {
  Vector[] clusterCenters = model.clusterCenters();

  List<ClusteringField> clusteringFields = new ArrayList<>();
  for (int i = 0; i < inputSchema.getNumFeatures(); i++) {
    if (inputSchema.isActive(i)) {
      FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(i));
      ClusteringField clusteringField =
          new ClusteringField(fieldName).setCenterField(ClusteringField.CenterField.TRUE);
      clusteringFields.add(clusteringField);
    }
  }

  List<Cluster> clusters = new ArrayList<>(clusterCenters.length);
  for (int i = 0; i < clusterCenters.length; i++) {
    clusters.add(new Cluster().setId(Integer.toString(i))
                     .setSize(clusterSizesMap.get(i).intValue())
                     .setArray(AppPMMLUtils.toArray(clusterCenters[i].toArray())));
  }

  return new ClusteringModel(
      MiningFunction.CLUSTERING,
      ClusteringModel.ModelClass.CENTER_BASED,
      clusters.size(),
      AppPMMLUtils.buildMiningSchema(inputSchema),
      new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()),
      clusteringFields,
      clusters);
}

Example #3

Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0

5 votes

private <V extends Number> ClusterAffinityDistribution<V> evaluateDistance(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){
	ClusteringModel clusteringModel = getModel();

	List<Cluster> clusters = clusteringModel.getClusters();

	Value<V> adjustment;

	MissingValueWeights missingValueWeights = clusteringModel.getMissingValueWeights();
	if(missingValueWeights != null){
		Array array = missingValueWeights.getArray();

		List<? extends Number> adjustmentValues = ArrayUtil.asNumberList(array);
		if(values.size() != adjustmentValues.size()){
			throw new InvalidElementException(missingValueWeights);
		}

		adjustment = MeasureUtil.calculateAdjustment(valueFactory, values, adjustmentValues);
	} else

	{
		adjustment = MeasureUtil.calculateAdjustment(valueFactory, values);
	}

	ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.DISTANCE, clusters);

	for(Cluster cluster : clusters){
		List<FieldValue> clusterValues = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterValueCache);

		if(values.size() != clusterValues.size()){
			throw new InvalidElementException(cluster);
		}

		Value<V> distance = MeasureUtil.evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values, clusterValues, adjustment);

		result.put(cluster, distance);
	}

	return result;
}

Example #4

Source File: MeasureUtilTest.java From jpmml-evaluator with GNU Affero General Public License v3.0

5 votes

static
private List<ClusteringField> createClusteringFields(String... names){
	List<ClusteringField> result = new ArrayList<>(names.length);

	for(String name : names){
		ClusteringField clusteringField = new ClusteringField(FieldName.create(name));

		result.add(clusteringField);
	}

	return result;
}

Example #5

Source File: KMeansPMMLUtilsTest.java From oryx with Apache License 2.0

4 votes

public static PMML buildDummyClusteringModel() {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  dataFields.add(new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE));
  dataFields.add(new DataField(FieldName.create("y"), OpType.CONTINUOUS, DataType.DOUBLE));
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField xMF = new MiningField(FieldName.create("x"))
      .setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE);
  miningFields.add(xMF);
  MiningField yMF = new MiningField(FieldName.create("y"))
      .setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE);
  miningFields.add(yMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  List<ClusteringField> clusteringFields = new ArrayList<>();
  clusteringFields.add(new ClusteringField(
      FieldName.create("x")).setCenterField(ClusteringField.CenterField.TRUE));
  clusteringFields.add(new ClusteringField(
      FieldName.create("y")).setCenterField(ClusteringField.CenterField.TRUE));

  List<Cluster> clusters = new ArrayList<>();
  clusters.add(new Cluster().setId("0").setSize(1).setArray(AppPMMLUtils.toArray(1.0, 0.0)));
  clusters.add(new Cluster().setId("1").setSize(2).setArray(AppPMMLUtils.toArray(2.0, -1.0)));
  clusters.add(new Cluster().setId("2").setSize(3).setArray(AppPMMLUtils.toArray(-1.0, 0.0)));

  pmml.addModels(new ClusteringModel(
      MiningFunction.CLUSTERING,
      ClusteringModel.ModelClass.CENTER_BASED,
      clusters.size(),
      miningSchema,
      new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()),
      clusteringFields,
      clusters));

  return pmml;
}

Example #6

Source File: FieldReferenceFinder.java From jpmml-model with BSD 3-Clause "New" or "Revised" License

4 votes

@Override
public VisitorAction visit(ClusteringField clusteringField){
	process(clusteringField.getField());

	return super.visit(clusteringField);
}

Example #7

Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0

4 votes

@Override
protected <V extends Number> Map<FieldName, ClusterAffinityDistribution<V>> evaluateClustering(ValueFactory<V> valueFactory, EvaluationContext context){
	ClusteringModel clusteringModel = getModel();

	ComparisonMeasure comparisonMeasure = clusteringModel.getComparisonMeasure();

	List<ClusteringField> clusteringFields = getCenterClusteringFields();

	List<FieldValue> values = new ArrayList<>(clusteringFields.size());

	for(int i = 0, max = clusteringFields.size(); i < max; i++){
		ClusteringField clusteringField = clusteringFields.get(i);

		FieldName name = clusteringField.getField();
		if(name == null){
			throw new MissingAttributeException(clusteringField, PMMLAttributes.CLUSTERINGFIELD_FIELD);
		}

		FieldValue value = context.evaluate(name);

		values.add(value);
	}

	ClusterAffinityDistribution<V> result;

	Measure measure = MeasureUtil.ensureMeasure(comparisonMeasure);

	if(measure instanceof Similarity){
		result = evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, values);
	} else

	if(measure instanceof Distance){
		result = evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values);
	} else

	{
		throw new UnsupportedElementException(measure);
	}

	// "For clustering models, the identifier of the winning cluster is returned as the predictedValue"
	result.computeResult(DataType.STRING);

	return Collections.singletonMap(getTargetName(), result);
}

Example #8

Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0

4 votes

private <V extends Number> ClusterAffinityDistribution<V> evaluateSimilarity(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){
	ClusteringModel clusteringModel = getModel();

	List<Cluster> clusters = clusteringModel.getClusters();

	ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.SIMILARITY, clusters);

	BitSet flags = MeasureUtil.toBitSet(values);

	for(Cluster cluster : clusters){
		BitSet clusterFlags = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterFlagCache);

		if(flags.size() != clusterFlags.size()){
			throw new InvalidElementException(cluster);
		}

		Value<V> similarity = MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, clusterFlags);

		result.put(cluster, similarity);
	}

	return result;
}

Example #9

Source File: MeasureUtilTest.java From jpmml-evaluator with GNU Affero General Public License v3.0

4 votes

@Test
public void evaluateSimilarity(){
	BitSet flags = createFlags(Arrays.asList(0, 0, 1, 1));
	BitSet referenceFlags = createFlags(Arrays.asList(0, 1, 0, 1));

	ValueFactory<?> valueFactory = MeasureUtilTest.valueFactoryFactory.newValueFactory(MathContext.DOUBLE);

	ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.SIMILARITY, new SimpleMatching());

	List<ClusteringField> clusteringFields = createClusteringFields("one", "two", "three", "four");

	assertEquals(valueFactory.newValue(2d / 4d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));

	comparisonMeasure.setMeasure(new Jaccard());

	assertEquals(valueFactory.newValue(1d / 3d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));

	comparisonMeasure.setMeasure(new Tanimoto());

	assertEquals(valueFactory.newValue(2d / (1d + 2 * 2d + 1d)), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));

	comparisonMeasure.setMeasure(new BinarySimilarity(0.5d, 0.5d, 0.5d, 0.5d, 1d, 1d, 1d, 1d));

	assertEquals(valueFactory.newValue(2d / 4d), MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, referenceFlags));
}