Java Code Examples for org.apache.flink.ml.api.misc.param.Params

The following examples show how to use org.apache.flink.ml.api.misc.param.Params. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: ParamsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGetAliasParam() {
	ParamInfo <String> predResultColName = ParamInfoFactory
		.createParamInfo("predResultColName", String.class)
		.setDescription("Column name of predicted result.")
		.setRequired()
		.setAlias(new String[] {"predColName", "outputColName"})
		.build();

	Params params = Params.fromJson("{\"predResultColName\":\"\\\"f0\\\"\"}");

	Assert.assertEquals("f0", params.get(predResultColName));

	params = Params.fromJson("{\"predResultColName\":\"\\\"f0\\\"\", \"predColName\":\"\\\"f0\\\"\"}");

	try {
		params.get(predResultColName);
		Assert.fail("failure");
	} catch (IllegalArgumentException ex) {
		Assert.assertTrue(ex.getMessage().startsWith("Duplicate parameters of predResultColName and predColName"));
	}
}
 
Example 2
Source Project: Alink   Source File: VectorToColumnsMapperTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNull() throws Exception {
	TableSchema schema = new TableSchema(new String[] {"vec"}, new TypeInformation <?>[] {Types.STRING});

	Params params = new Params()
		.set(VectorToColumnsParams.SELECTED_COL, "vec")
		.set(VectorToColumnsParams.RESERVED_COLS, new String[] {})
		.set(VectorToColumnsParams.OUTPUT_COLS, new String[] {"f0", "f1"});

	VectorToColumnsMapper mapper = new VectorToColumnsMapper(schema, params);

	Row row = mapper.map(Row.of((Object) null));
	assertEquals(row.getField(0), null);
	assertEquals(row.getField(1), null);
	assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"f0", "f1"},
		new TypeInformation <?>[] {Types.DOUBLE, Types.DOUBLE}));
}
 
Example 3
Source Project: Alink   Source File: BinarizerMapper.java    License: Apache License 2.0 6 votes vote down vote up
public BinarizerMapper(TableSchema dataSchema, Params params) {
	super(dataSchema, params);
	this.threshold = this.params.get(BinarizerParams.THRESHOLD);

	selectedColType = TableUtil.findColTypeWithAssertAndHint(
		dataSchema,
		this.params.get(BinarizerParams.SELECTED_COL)
	);

	if (TableUtil.isNumber(selectedColType)) {
		try {
			Constructor constructor = selectedColType.getTypeClass().getConstructor(String.class);
			objectValue0 = constructor.newInstance("0");
			objectValue1 = constructor.newInstance("1");
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}
}
 
Example 4
Source Project: Alink   Source File: ClassificationEvaluationUtilTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void judgeEvaluationTypeTest(){
    Params params = new Params()
        .set(HasPredictionDetailCol.PREDICTION_DETAIL_COL, "detail");

    ClassificationEvaluationUtil.Type type = ClassificationEvaluationUtil.judgeEvaluationType(params);
    Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_DETAIL);

    params.set(HasPredictionCol.PREDICTION_COL, "pred");
    type = ClassificationEvaluationUtil.judgeEvaluationType(params);
    Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_DETAIL);

    params.remove(HasPredictionDetailCol.PREDICTION_DETAIL_COL);
    type = ClassificationEvaluationUtil.judgeEvaluationType(params);
    Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_RESULT);

    params.remove(HasPredictionCol.PREDICTION_COL);
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Error Input, must give either predictionCol or predictionDetailCol!");
    ClassificationEvaluationUtil.judgeEvaluationType(params);
}
 
Example 5
Source Project: Alink   Source File: SelectMapperTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGeneral() throws Exception {
    TableSchema dataSchema = TableSchema.builder().fields(
        new String[] {"id", "name"},
        new DataType[] {DataTypes.INT(), DataTypes.STRING()}).build();
    Params params = new Params();
    params.set(HasClause.CLAUSE,
        "id, name as eman, id + 1 as id2, CASE WHEN id=1 THEN 'q' ELSE 'p' END as col3, UPPER(name) as col4");
    SelectMapper selectMapper = new SelectMapper(dataSchema, params);
    selectMapper.open();
    Row expected = Row.of(1, "'abc'", 2, "q", "'ABC'");
    Row output = selectMapper.map(Row.of(1, "'abc'"));
    try {
        assertEquals(expected, output);
    } finally {
        selectMapper.close();
    }
}
 
Example 6
Source Project: Alink   Source File: BucketizerMapperTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiFeatures() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"featureA", "featureB"},
        new TypeInformation<?>[] {Types.LONG, Types.LONG});

    Params params = new Params()
        .set(BucketizerParams.SELECTED_COLS, new String[] {"featureA", "featureB"})
        .set(BucketizerParams.CUTS_ARRAY, cutsArray);

    BucketizerMapper mapper = new BucketizerMapper(schema, params);
    assertEquals(mapper.map(Row.of(-999.9, -999.9)).getField(1), 0L);
    assertEquals(mapper.map(Row.of(-0.5, -0.2)).getField(1), 1L);
    assertEquals(mapper.map(Row.of(-0.3, -0.6)).getField(1), 0L);
    assertEquals(mapper.map(Row.of(0.0, 0.0)).getField(1), 1L);
    assertEquals(mapper.map(Row.of(0.5, 0.4)).getField(1), 3L);
    assertEquals(mapper.map(Row.of(0.5, null)).getField(1), 5L);
    assertEquals(mapper.getOutputSchema(), schema);
}
 
Example 7
Source Project: Alink   Source File: FeatureHasherMapperTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test2() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"double", "bool", "number", "str"},
        new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING()});

    Params params = new Params()
        .set(FeatureHasherParams.SELECTED_COLS, new String[] {"double", "bool", "number", "str"})
        .set(FeatureHasherParams.OUTPUT_COL, "output")
        .set(FeatureHasherParams.NUM_FEATURES, 10);

    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);

    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(4),
        new SparseVector(10, new int[]{5, 8, 9}, new double[]{2.0, 1.1, 1.0}));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "B")).getField(4),
        new SparseVector(10, new int[]{1, 5, 6, 8}, new double[]{1.0, 1.0, 1.0, 2.1}));
    assertEquals(mapper.getOutputSchema(),
        new TableSchema(new String[] {"double", "bool", "number", "str", "output"},
            new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING(),
                VectorTypes.VECTOR}));
}
 
Example 8
Source Project: Alink   Source File: BaseLinearModelTrainBatchOp.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Get label info: including label values and label type.
 *
 * @param in        input train data in BatchOperator format.
 * @param params    train parameters.
 * @param isRegProc is regression process or not.
 * @return label info.
 */
private Tuple2<DataSet<Object>, TypeInformation> getLabelInfo(BatchOperator in,
                                                              Params params,
                                                              boolean isRegProc) {
    String labelName = params.get(LinearTrainParams.LABEL_COL);
    // Prepare label values
    DataSet<Object> labelValues;
    TypeInformation<?> labelType = null;
    if (isRegProc) {
        labelType = Types.DOUBLE;
        labelValues = MLEnvironmentFactory.get(in.getMLEnvironmentId())
            .getExecutionEnvironment().fromElements(new Object());
    } else {
        labelType = in.getColTypes()[TableUtil.findColIndexWithAssertAndHint(in.getColNames(), labelName)];
        labelValues = in.select(new String[] {labelName}).distinct().getDataSet().map(
            new MapFunction<Row, Object>() {
                @Override
                public Object map(Row row) {
                    return row.getField(0);
                }
            });
    }
    return Tuple2.of(labelValues, labelType);
}
 
Example 9
Source Project: Alink   Source File: LabeledModelDataConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public M2 load(List<Row> rows) {
    Tuple2<Params, Iterable<String>> metaAndData = extractModelMetaAndData(rows);
    Iterable<Object> labels = extractAuxiliaryData(rows, true);
    return deserializeModel(metaAndData.f0, metaAndData.f1, labels);

}
 
Example 10
Source Project: Alink   Source File: MySqlDB.java    License: Apache License 2.0 5 votes vote down vote up
public MySqlDB(Params params) {
	this(params.get(MySqlDBParams.DB_NAME),
		params.get(MySqlDBParams.IP),
		params.get(MySqlDBParams.PORT),
		params.get(MySqlDBParams.USERNAME),
		params.get(MySqlDBParams.PASSWORD));
}
 
Example 11
Source Project: Alink   Source File: ParamsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testColorAlias() {
    Params params = new Params()
        .set("enumType2", "green")
        .set("appendType", "DENSE");

    Assert.assertEquals(Color.GREEN, params.get(HasEnumTypeColor.ENUM_TYPE_COLOR));

    TestBatchOpColor testBatchOp = new TestBatchOpColor(params);
    Assert.assertEquals(Color.GREEN, testBatchOp.getEnumTypeColor());
    Assert.assertEquals(Color.GREEN, testBatchOp.get(HasEnumTypeColor.ENUM_TYPE_COLOR));
}
 
Example 12
Source Project: Alink   Source File: NaiveBayesTextModelDataConverter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Deserialize the model data.
 *
 * @param meta           The model meta data.
 * @param data           The model data.
 * @param distinctLabels The labels.
 * @return The model data used by mapper.
 */
@Override
public NaiveBayesTextPredictModelData deserializeModel(Params meta, Iterable<String> data, Iterable<Object> distinctLabels) {
    NaiveBayesTextPredictModelData modelData = new NaiveBayesTextPredictModelData();
    modelData.meta = meta;
    String json = data.iterator().next();
    NaiveBayesTextProbInfo dataInfo = JsonConverter.fromJson(json, NaiveBayesTextProbInfo.class);
    modelData.pi = dataInfo.piArray;
    modelData.theta = dataInfo.theta;
    modelData.label = Iterables.toArray(distinctLabels, Object.class);
    modelData.vectorColName = modelData.meta.get(NaiveBayesTextTrainParams.VECTOR_COL);
    modelData.modelType = modelData.meta.get(NaiveBayesTextTrainParams.MODEL_TYPE);
    modelData.featLen = modelData.theta.numCols();

    int rowSize = modelData.theta.numRows();
    modelData.phi = new double[rowSize];
    modelData.minMat = new DenseMatrix(rowSize, modelData.featLen);
    //construct special model data for the bernoulli model.
    if (ModelType.Bernoulli.equals(modelData.modelType)) {
        for (int i = 0; i < rowSize; ++i) {
            for (int j = 0; j < modelData.featLen; ++j) {
                double tmp = Math.log(1 - Math.exp(modelData.theta.get(i, j)));
                modelData.phi[i] += tmp;
                modelData.minMat.set(i, j, modelData.theta.get(i, j) - tmp);
            }
        }
    }
    return modelData;
}
 
Example 13
Source Project: Alink   Source File: JdbcDB.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void sinkBatch(String tableName, Table in, Params parameter, Long sessionId) {
    dropAndCreateTable(this, tableName, in, parameter);

    TableSchema schema = in.getSchema();
    String[] colNames = schema.getFieldNames();
    StringBuilder sbd = new StringBuilder();
    sbd.append("INSERT INTO ").append(tableName).append(" (").append(colNames[0]);
    for (int i = 1; i < colNames.length; i++) {
        sbd.append(",").append(colNames[i]);
    }
    sbd.append(") VALUES (?");
    for (int i = 1; i < colNames.length; i++) {
        sbd.append(",").append("?");
    }
    sbd.append(")");

    JDBCAppendTableSink jdbcAppendTableSink = JDBCAppendTableSink.builder()
            .setUsername(getUserName())
            .setPassword(getPassword())
            .setDrivername(getDriverName())
            .setDBUrl(getDbUrl())
            .setQuery(sbd.toString())
            .setParameterTypes(schema.getFieldTypes())
            .build();

    jdbcAppendTableSink.emitDataSet(BatchOperator.fromTable(in).setMLEnvironmentId(sessionId).getDataSet());
}
 
Example 14
Source Project: Alink   Source File: IsotonicRegressionModelMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRowData() throws Exception {
	Row[] rows = new Row[] {
		Row.of(0L, "{\"vectorColName\":\"\\\"vector\\\"\",\"modelName\":\"\\\"IsotonicRegressionModel\\\"\","
			+ "\"featureColName\":null,\"featureIndex\":\"0\",\"modelSchema\":\"\\\"model_id bigint,model_info "
			+ "string\\\"\",\"isNewFormat\":\"true\"}\n"),
		Row.of(1048576L, "[0.02,0.1,0.2,0.27,0.3,0.35,0.45,0.5,0.7,0.8,0.9]"),
		Row.of(2097152L,
			"[0.0,0.3333333333333333,0.3333333333333333,0.5,0.5,0.6666666666666666,0.6666666666666666,0.75,0.75,"
				+ "1.0,1.0]")
	};
	List <Row> model = Arrays.asList(rows);
	TableSchema modelSchema = new TableSchema(new String[] {"model_id", "model_info"},
		new TypeInformation[] {Types.LONG, Types.STRING});

	TableSchema dataSchema = new TableSchema(new String[] {"vector"}, new TypeInformation <?>[] {Types.DOUBLE});

	Params params = new Params()
		.set(IsotonicRegPredictParams.PREDICTION_COL, "pred");

	IsotonicRegressionModelMapper mapper = new IsotonicRegressionModelMapper(modelSchema, dataSchema, params);
	mapper.loadModel(model);

	assertEquals(Double.parseDouble(mapper.map(Row.of("0.81, 0.35")).getField(1).toString()), 1.0, 0.01);
	assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"vector", "pred"},
		new TypeInformation <?>[] {Types.DOUBLE, Types.DOUBLE}));
}
 
Example 15
Source Project: Alink   Source File: CsvSourceBatchOp.java    License: Apache License 2.0 5 votes vote down vote up
public CsvSourceBatchOp(String filePath, String[] colNames, TypeInformation<?>[] colTypes,
                        String fieldDelim, String rowDelim) {
    this(new Params()
        .set(FILE_PATH, filePath)
        .set(SCHEMA_STR, CsvUtil.schema2SchemaStr(new TableSchema(colNames, colTypes)))
        .set(FIELD_DELIMITER, fieldDelim)
        .set(ROW_DELIMITER, rowDelim)
    );
}
 
Example 16
Source Project: Alink   Source File: HiveDB.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Table getBatchTable(String tableName, Params parameter, Long sessionId) throws Exception {
    ExecutionEnvironment env = MLEnvironmentFactory.get(sessionId).getExecutionEnvironment();
    HiveBatchSource hiveTableSource = getHiveBatchSource(tableName, parameter);
    DataSet<BaseRow> dataSet = hiveTableSource.getDataSet(env);
    TableSchema schema = hiveTableSource.getTableSchema();
    final DataType[] dataTypes = schema.getFieldDataTypes();
    DataSet<Row> rows = dataSet.map(new BaseRowToRow(dataTypes));
    Table tbl = DataSetConversionUtil.toTable(sessionId, rows, schema);
    if (getPartitionCols(tableName).size() > 0) { // remove static partition columns
        String[] fieldNames = getColNames(tableName);
        tbl = tbl.select(Strings.join(fieldNames, ","));
    }
    return tbl;
}
 
Example 17
@Override
public void save(Tuple2<Params, Iterable<Tuple3<Integer, String, Long>>> modelData, Collector<Row> collector) {
    if (modelData.f0 != null) {
        collector.collect(Row.of(-1L, modelData.f0.toJson(), null));
    }
    modelData.f1.forEach(tuple -> {
        collector.collect(Row.of(tuple.f0.longValue(), tuple.f1, tuple.f2));
    });
}
 
Example 18
Source Project: Alink   Source File: LdaModelDataConverterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void ldaModelDataConverterTest() {

    Tuple2<Params, Iterable<String>> res = converter.serializeModel(generateLdaModelData());
    LdaModelData modelData = converter.deserializeModel(res.f0, res.f1);
    assertEquals(modelData.alpha, new Double[]{0.2, 0.2, 0.2, 0.2, 0.2});
    assertEquals(modelData.list, generateDocData());

}
 
Example 19
Source Project: Alink   Source File: BinarizerMapperTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test3() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"feature"}, new TypeInformation<?>[] {Types.DOUBLE});

    Params params = new Params()
        .set(BinarizerParams.SELECTED_COL, "feature");

    BinarizerMapper mapper = new BinarizerMapper(schema, params);

    assertEquals(mapper.map(Row.of(0.6)).getField(0), 1.0);
    assertEquals(mapper.getOutputSchema(), schema);
}
 
Example 20
Source Project: Alink   Source File: BaseFormatTrans.java    License: Apache License 2.0 5 votes vote down vote up
public BaseFormatTrans(FormatType fromFormat, FormatType toFormat, Params params) {
    this(
        (null == params ? new Params() : params)
            .set(FormatTransParams.FROM_FORMAT, fromFormat)
            .set(FormatTransParams.TO_FORMAT, toFormat)
    );
}
 
Example 21
Source Project: Alink   Source File: FeatureSplitter.java    License: Apache License 2.0 5 votes vote down vote up
public FeatureSplitter(
	Params params, DenseData data, FeatureMeta featureMeta, SequentialPartition partition) {
	this.params = params;
	this.data = data;
	this.featureMeta = featureMeta;
	this.partition = partition;

	this.minSamplesPerLeaf = params.get(HasMinSamplesPerLeaf.MIN_SAMPLES_PER_LEAF);
	this.minSampleRatioPerChild = params.get(HasMinSampleRatioPerChild.MIN_SAMPLE_RATIO_PERCHILD);
	this.minInfoGain = params.get(HasMinInfoGain.MIN_INFO_GAIN);
	this.maxDepth = params.get(HasMaxDepth.MAX_DEPTH);
	this.maxLeaves = params.get(HasMaxLeaves.MAX_LEAVES);
}
 
Example 22
Source Project: Alink   Source File: LinearModelData.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Construct function.
 * @param labelType label Type.
 * @param meta meta information of model.
 * @param featureNames the feature column names.
 * @param coefVector
 */
public LinearModelData(TypeInformation labelType, Params meta, String[] featureNames, DenseVector coefVector) {
	this.labelType = labelType;
	this.coefVector = coefVector;
	this.featureNames = featureNames;
	if (meta.contains(ModelParamName.LABEL_VALUES)) {
		this.labelValues = FeatureLabelUtil.recoverLabelType(meta.get(ModelParamName.LABEL_VALUES), this.labelType);
	}
	setMetaInfo(meta);
}
 
Example 23
Source Project: flink   Source File: PipelineStageBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Params getParams() {
	if (null == this.params) {
		this.params = new Params();
	}
	return this.params;
}
 
Example 24
Source Project: Alink   Source File: NumSeqSourceStreamOp.java    License: Apache License 2.0 5 votes vote down vote up
public NumSeqSourceStreamOp(long from, long to, String colName, Double[] timeZones, Params params) {
    super(params);

    DataStreamSource<Long> seq = MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamExecutionEnvironment().generateSequence(from, to);
    DataStream<Long> data = seq.map(new transform(timeZones));

    this.setOutputTable(MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamTableEnvironment().fromDataStream(data, colName));
}
 
Example 25
Source Project: flink   Source File: Pipeline.java    License: Apache License 2.0 4 votes vote down vote up
public Params getParams() {
	return params;
}
 
Example 26
Source Project: Alink   Source File: HiveSourceStreamOp.java    License: Apache License 2.0 4 votes vote down vote up
public HiveSourceStreamOp(Params params) {
    super(AnnotationUtils.annotatedName(HiveDB.class), params);
}
 
Example 27
Source Project: Alink   Source File: AppendIdBatchOp.java    License: Apache License 2.0 4 votes vote down vote up
public AppendIdBatchOp(Params params) {
	super(params);
}
 
Example 28
Source Project: Alink   Source File: Pipeline.java    License: Apache License 2.0 4 votes vote down vote up
public Params getParams() {
	return params;
}
 
Example 29
Source Project: Alink   Source File: BaseSourceBatchOp.java    License: Apache License 2.0 4 votes vote down vote up
protected BaseSourceBatchOp(String nameSrcSnk, Params params) {
    super(params);
    this.getParams().set(HasIoType.IO_TYPE, IO_TYPE)
        .set(HasIoName.IO_NAME, nameSrcSnk);

}
 
Example 30
Source Project: Alink   Source File: JsonToKvStreamOp.java    License: Apache License 2.0 4 votes vote down vote up
public JsonToKvStreamOp() {
    this(new Params());
}