org.apache.flink.ml.api.misc.param.Params Java Examples

The following examples show how to use org.apache.flink.ml.api.misc.param.Params. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BaseLinearModelTrainBatchOp.java    From Alink with Apache License 2.0 6 votes vote down vote up
/**
 * Get label info: including label values and label type.
 *
 * @param in        input train data in BatchOperator format.
 * @param params    train parameters.
 * @param isRegProc is regression process or not.
 * @return label info.
 */
private Tuple2<DataSet<Object>, TypeInformation> getLabelInfo(BatchOperator in,
                                                              Params params,
                                                              boolean isRegProc) {
    String labelName = params.get(LinearTrainParams.LABEL_COL);
    // Prepare label values
    DataSet<Object> labelValues;
    TypeInformation<?> labelType = null;
    if (isRegProc) {
        labelType = Types.DOUBLE;
        labelValues = MLEnvironmentFactory.get(in.getMLEnvironmentId())
            .getExecutionEnvironment().fromElements(new Object());
    } else {
        labelType = in.getColTypes()[TableUtil.findColIndexWithAssertAndHint(in.getColNames(), labelName)];
        labelValues = in.select(new String[] {labelName}).distinct().getDataSet().map(
            new MapFunction<Row, Object>() {
                @Override
                public Object map(Row row) {
                    return row.getField(0);
                }
            });
    }
    return Tuple2.of(labelValues, labelType);
}
 
Example #2
Source File: ParamsTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetAliasParam() {
	ParamInfo <String> predResultColName = ParamInfoFactory
		.createParamInfo("predResultColName", String.class)
		.setDescription("Column name of predicted result.")
		.setRequired()
		.setAlias(new String[] {"predColName", "outputColName"})
		.build();

	Params params = Params.fromJson("{\"predResultColName\":\"\\\"f0\\\"\"}");

	Assert.assertEquals("f0", params.get(predResultColName));

	params = Params.fromJson("{\"predResultColName\":\"\\\"f0\\\"\", \"predColName\":\"\\\"f0\\\"\"}");

	try {
		params.get(predResultColName);
		Assert.fail("failure");
	} catch (IllegalArgumentException ex) {
		Assert.assertTrue(ex.getMessage().startsWith("Duplicate parameters of predResultColName and predColName"));
	}
}
 
Example #3
Source File: VectorToColumnsMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testNull() throws Exception {
	TableSchema schema = new TableSchema(new String[] {"vec"}, new TypeInformation <?>[] {Types.STRING});

	Params params = new Params()
		.set(VectorToColumnsParams.SELECTED_COL, "vec")
		.set(VectorToColumnsParams.RESERVED_COLS, new String[] {})
		.set(VectorToColumnsParams.OUTPUT_COLS, new String[] {"f0", "f1"});

	VectorToColumnsMapper mapper = new VectorToColumnsMapper(schema, params);

	Row row = mapper.map(Row.of((Object) null));
	assertEquals(row.getField(0), null);
	assertEquals(row.getField(1), null);
	assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"f0", "f1"},
		new TypeInformation <?>[] {Types.DOUBLE, Types.DOUBLE}));
}
 
Example #4
Source File: FeatureHasherMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void test2() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"double", "bool", "number", "str"},
        new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING()});

    Params params = new Params()
        .set(FeatureHasherParams.SELECTED_COLS, new String[] {"double", "bool", "number", "str"})
        .set(FeatureHasherParams.OUTPUT_COL, "output")
        .set(FeatureHasherParams.NUM_FEATURES, 10);

    FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params);

    assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(4),
        new SparseVector(10, new int[]{5, 8, 9}, new double[]{2.0, 1.1, 1.0}));
    assertEquals(mapper.map(Row.of(2.1, true, "1", "B")).getField(4),
        new SparseVector(10, new int[]{1, 5, 6, 8}, new double[]{1.0, 1.0, 1.0, 2.1}));
    assertEquals(mapper.getOutputSchema(),
        new TableSchema(new String[] {"double", "bool", "number", "str", "output"},
            new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING(),
                VectorTypes.VECTOR}));
}
 
Example #5
Source File: BucketizerMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiFeatures() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"featureA", "featureB"},
        new TypeInformation<?>[] {Types.LONG, Types.LONG});

    Params params = new Params()
        .set(BucketizerParams.SELECTED_COLS, new String[] {"featureA", "featureB"})
        .set(BucketizerParams.CUTS_ARRAY, cutsArray);

    BucketizerMapper mapper = new BucketizerMapper(schema, params);
    assertEquals(mapper.map(Row.of(-999.9, -999.9)).getField(1), 0L);
    assertEquals(mapper.map(Row.of(-0.5, -0.2)).getField(1), 1L);
    assertEquals(mapper.map(Row.of(-0.3, -0.6)).getField(1), 0L);
    assertEquals(mapper.map(Row.of(0.0, 0.0)).getField(1), 1L);
    assertEquals(mapper.map(Row.of(0.5, 0.4)).getField(1), 3L);
    assertEquals(mapper.map(Row.of(0.5, null)).getField(1), 5L);
    assertEquals(mapper.getOutputSchema(), schema);
}
 
Example #6
Source File: BinarizerMapper.java    From Alink with Apache License 2.0 6 votes vote down vote up
public BinarizerMapper(TableSchema dataSchema, Params params) {
	super(dataSchema, params);
	this.threshold = this.params.get(BinarizerParams.THRESHOLD);

	selectedColType = TableUtil.findColTypeWithAssertAndHint(
		dataSchema,
		this.params.get(BinarizerParams.SELECTED_COL)
	);

	if (TableUtil.isNumber(selectedColType)) {
		try {
			Constructor constructor = selectedColType.getTypeClass().getConstructor(String.class);
			objectValue0 = constructor.newInstance("0");
			objectValue1 = constructor.newInstance("1");
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}
}
 
Example #7
Source File: ClassificationEvaluationUtilTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void judgeEvaluationTypeTest(){
    Params params = new Params()
        .set(HasPredictionDetailCol.PREDICTION_DETAIL_COL, "detail");

    ClassificationEvaluationUtil.Type type = ClassificationEvaluationUtil.judgeEvaluationType(params);
    Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_DETAIL);

    params.set(HasPredictionCol.PREDICTION_COL, "pred");
    type = ClassificationEvaluationUtil.judgeEvaluationType(params);
    Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_DETAIL);

    params.remove(HasPredictionDetailCol.PREDICTION_DETAIL_COL);
    type = ClassificationEvaluationUtil.judgeEvaluationType(params);
    Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_RESULT);

    params.remove(HasPredictionCol.PREDICTION_COL);
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Error Input, must give either predictionCol or predictionDetailCol!");
    ClassificationEvaluationUtil.judgeEvaluationType(params);
}
 
Example #8
Source File: SelectMapperTest.java    From Alink with Apache License 2.0 6 votes vote down vote up
@Test
public void testGeneral() throws Exception {
    TableSchema dataSchema = TableSchema.builder().fields(
        new String[] {"id", "name"},
        new DataType[] {DataTypes.INT(), DataTypes.STRING()}).build();
    Params params = new Params();
    params.set(HasClause.CLAUSE,
        "id, name as eman, id + 1 as id2, CASE WHEN id=1 THEN 'q' ELSE 'p' END as col3, UPPER(name) as col4");
    SelectMapper selectMapper = new SelectMapper(dataSchema, params);
    selectMapper.open();
    Row expected = Row.of(1, "'abc'", 2, "q", "'ABC'");
    Row output = selectMapper.map(Row.of(1, "'abc'"));
    try {
        assertEquals(expected, output);
    } finally {
        selectMapper.close();
    }
}
 
Example #9
Source File: CsvSourceBatchOp.java    From Alink with Apache License 2.0 5 votes vote down vote up
public CsvSourceBatchOp(String filePath, String[] colNames, TypeInformation<?>[] colTypes,
                        String fieldDelim, String rowDelim) {
    this(new Params()
        .set(FILE_PATH, filePath)
        .set(SCHEMA_STR, CsvUtil.schema2SchemaStr(new TableSchema(colNames, colTypes)))
        .set(FIELD_DELIMITER, fieldDelim)
        .set(ROW_DELIMITER, rowDelim)
    );
}
 
Example #10
Source File: ParamsTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testColorAlias() {
    Params params = new Params()
        .set("enumType2", "green")
        .set("appendType", "DENSE");

    Assert.assertEquals(Color.GREEN, params.get(HasEnumTypeColor.ENUM_TYPE_COLOR));

    TestBatchOpColor testBatchOp = new TestBatchOpColor(params);
    Assert.assertEquals(Color.GREEN, testBatchOp.getEnumTypeColor());
    Assert.assertEquals(Color.GREEN, testBatchOp.get(HasEnumTypeColor.ENUM_TYPE_COLOR));
}
 
Example #11
Source File: NaiveBayesTextModelDataConverter.java    From Alink with Apache License 2.0 5 votes vote down vote up
/**
 * Deserialize the model data.
 *
 * @param meta           The model meta data.
 * @param data           The model data.
 * @param distinctLabels The labels.
 * @return The model data used by mapper.
 */
@Override
public NaiveBayesTextPredictModelData deserializeModel(Params meta, Iterable<String> data, Iterable<Object> distinctLabels) {
    NaiveBayesTextPredictModelData modelData = new NaiveBayesTextPredictModelData();
    modelData.meta = meta;
    String json = data.iterator().next();
    NaiveBayesTextProbInfo dataInfo = JsonConverter.fromJson(json, NaiveBayesTextProbInfo.class);
    modelData.pi = dataInfo.piArray;
    modelData.theta = dataInfo.theta;
    modelData.label = Iterables.toArray(distinctLabels, Object.class);
    modelData.vectorColName = modelData.meta.get(NaiveBayesTextTrainParams.VECTOR_COL);
    modelData.modelType = modelData.meta.get(NaiveBayesTextTrainParams.MODEL_TYPE);
    modelData.featLen = modelData.theta.numCols();

    int rowSize = modelData.theta.numRows();
    modelData.phi = new double[rowSize];
    modelData.minMat = new DenseMatrix(rowSize, modelData.featLen);
    //construct special model data for the bernoulli model.
    if (ModelType.Bernoulli.equals(modelData.modelType)) {
        for (int i = 0; i < rowSize; ++i) {
            for (int j = 0; j < modelData.featLen; ++j) {
                double tmp = Math.log(1 - Math.exp(modelData.theta.get(i, j)));
                modelData.phi[i] += tmp;
                modelData.minMat.set(i, j, modelData.theta.get(i, j) - tmp);
            }
        }
    }
    return modelData;
}
 
Example #12
Source File: JdbcDB.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkBatch(String tableName, Table in, Params parameter, Long sessionId) {
    dropAndCreateTable(this, tableName, in, parameter);

    TableSchema schema = in.getSchema();
    String[] colNames = schema.getFieldNames();
    StringBuilder sbd = new StringBuilder();
    sbd.append("INSERT INTO ").append(tableName).append(" (").append(colNames[0]);
    for (int i = 1; i < colNames.length; i++) {
        sbd.append(",").append(colNames[i]);
    }
    sbd.append(") VALUES (?");
    for (int i = 1; i < colNames.length; i++) {
        sbd.append(",").append("?");
    }
    sbd.append(")");

    JDBCAppendTableSink jdbcAppendTableSink = JDBCAppendTableSink.builder()
            .setUsername(getUserName())
            .setPassword(getPassword())
            .setDrivername(getDriverName())
            .setDBUrl(getDbUrl())
            .setQuery(sbd.toString())
            .setParameterTypes(schema.getFieldTypes())
            .build();

    jdbcAppendTableSink.emitDataSet(BatchOperator.fromTable(in).setMLEnvironmentId(sessionId).getDataSet());
}
 
Example #13
Source File: MySqlDB.java    From Alink with Apache License 2.0 5 votes vote down vote up
public MySqlDB(Params params) {
	this(params.get(MySqlDBParams.DB_NAME),
		params.get(MySqlDBParams.IP),
		params.get(MySqlDBParams.PORT),
		params.get(MySqlDBParams.USERNAME),
		params.get(MySqlDBParams.PASSWORD));
}
 
Example #14
Source File: IsotonicRegressionModelMapperTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRowData() throws Exception {
	Row[] rows = new Row[] {
		Row.of(0L, "{\"vectorColName\":\"\\\"vector\\\"\",\"modelName\":\"\\\"IsotonicRegressionModel\\\"\","
			+ "\"featureColName\":null,\"featureIndex\":\"0\",\"modelSchema\":\"\\\"model_id bigint,model_info "
			+ "string\\\"\",\"isNewFormat\":\"true\"}\n"),
		Row.of(1048576L, "[0.02,0.1,0.2,0.27,0.3,0.35,0.45,0.5,0.7,0.8,0.9]"),
		Row.of(2097152L,
			"[0.0,0.3333333333333333,0.3333333333333333,0.5,0.5,0.6666666666666666,0.6666666666666666,0.75,0.75,"
				+ "1.0,1.0]")
	};
	List <Row> model = Arrays.asList(rows);
	TableSchema modelSchema = new TableSchema(new String[] {"model_id", "model_info"},
		new TypeInformation[] {Types.LONG, Types.STRING});

	TableSchema dataSchema = new TableSchema(new String[] {"vector"}, new TypeInformation <?>[] {Types.DOUBLE});

	Params params = new Params()
		.set(IsotonicRegPredictParams.PREDICTION_COL, "pred");

	IsotonicRegressionModelMapper mapper = new IsotonicRegressionModelMapper(modelSchema, dataSchema, params);
	mapper.loadModel(model);

	assertEquals(Double.parseDouble(mapper.map(Row.of("0.81, 0.35")).getField(1).toString()), 1.0, 0.01);
	assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"vector", "pred"},
		new TypeInformation <?>[] {Types.DOUBLE, Types.DOUBLE}));
}
 
Example #15
Source File: LabeledModelDataConverter.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public M2 load(List<Row> rows) {
    Tuple2<Params, Iterable<String>> metaAndData = extractModelMetaAndData(rows);
    Iterable<Object> labels = extractAuxiliaryData(rows, true);
    return deserializeModel(metaAndData.f0, metaAndData.f1, labels);

}
 
Example #16
Source File: HiveDB.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public Table getBatchTable(String tableName, Params parameter, Long sessionId) throws Exception {
    ExecutionEnvironment env = MLEnvironmentFactory.get(sessionId).getExecutionEnvironment();
    HiveBatchSource hiveTableSource = getHiveBatchSource(tableName, parameter);
    DataSet<BaseRow> dataSet = hiveTableSource.getDataSet(env);
    TableSchema schema = hiveTableSource.getTableSchema();
    final DataType[] dataTypes = schema.getFieldDataTypes();
    DataSet<Row> rows = dataSet.map(new BaseRowToRow(dataTypes));
    Table tbl = DataSetConversionUtil.toTable(sessionId, rows, schema);
    if (getPartitionCols(tableName).size() > 0) { // remove static partition columns
        String[] fieldNames = getColNames(tableName);
        tbl = tbl.select(Strings.join(fieldNames, ","));
    }
    return tbl;
}
 
Example #17
Source File: MultiStringIndexerModelDataConverter.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Override
public void save(Tuple2<Params, Iterable<Tuple3<Integer, String, Long>>> modelData, Collector<Row> collector) {
    if (modelData.f0 != null) {
        collector.collect(Row.of(-1L, modelData.f0.toJson(), null));
    }
    modelData.f1.forEach(tuple -> {
        collector.collect(Row.of(tuple.f0.longValue(), tuple.f1, tuple.f2));
    });
}
 
Example #18
Source File: LdaModelDataConverterTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void ldaModelDataConverterTest() {

    Tuple2<Params, Iterable<String>> res = converter.serializeModel(generateLdaModelData());
    LdaModelData modelData = converter.deserializeModel(res.f0, res.f1);
    assertEquals(modelData.alpha, new Double[]{0.2, 0.2, 0.2, 0.2, 0.2});
    assertEquals(modelData.list, generateDocData());

}
 
Example #19
Source File: BinarizerMapperTest.java    From Alink with Apache License 2.0 5 votes vote down vote up
@Test
public void test3() throws Exception {
    TableSchema schema = new TableSchema(new String[] {"feature"}, new TypeInformation<?>[] {Types.DOUBLE});

    Params params = new Params()
        .set(BinarizerParams.SELECTED_COL, "feature");

    BinarizerMapper mapper = new BinarizerMapper(schema, params);

    assertEquals(mapper.map(Row.of(0.6)).getField(0), 1.0);
    assertEquals(mapper.getOutputSchema(), schema);
}
 
Example #20
Source File: BaseFormatTrans.java    From Alink with Apache License 2.0 5 votes vote down vote up
public BaseFormatTrans(FormatType fromFormat, FormatType toFormat, Params params) {
    this(
        (null == params ? new Params() : params)
            .set(FormatTransParams.FROM_FORMAT, fromFormat)
            .set(FormatTransParams.TO_FORMAT, toFormat)
    );
}
 
Example #21
Source File: FeatureSplitter.java    From Alink with Apache License 2.0 5 votes vote down vote up
public FeatureSplitter(
	Params params, DenseData data, FeatureMeta featureMeta, SequentialPartition partition) {
	this.params = params;
	this.data = data;
	this.featureMeta = featureMeta;
	this.partition = partition;

	this.minSamplesPerLeaf = params.get(HasMinSamplesPerLeaf.MIN_SAMPLES_PER_LEAF);
	this.minSampleRatioPerChild = params.get(HasMinSampleRatioPerChild.MIN_SAMPLE_RATIO_PERCHILD);
	this.minInfoGain = params.get(HasMinInfoGain.MIN_INFO_GAIN);
	this.maxDepth = params.get(HasMaxDepth.MAX_DEPTH);
	this.maxLeaves = params.get(HasMaxLeaves.MAX_LEAVES);
}
 
Example #22
Source File: LinearModelData.java    From Alink with Apache License 2.0 5 votes vote down vote up
/**
 * Construct function.
 * @param labelType label Type.
 * @param meta meta information of model.
 * @param featureNames the feature column names.
 * @param coefVector
 */
public LinearModelData(TypeInformation labelType, Params meta, String[] featureNames, DenseVector coefVector) {
	this.labelType = labelType;
	this.coefVector = coefVector;
	this.featureNames = featureNames;
	if (meta.contains(ModelParamName.LABEL_VALUES)) {
		this.labelValues = FeatureLabelUtil.recoverLabelType(meta.get(ModelParamName.LABEL_VALUES), this.labelType);
	}
	setMetaInfo(meta);
}
 
Example #23
Source File: PipelineStageBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Params getParams() {
	if (null == this.params) {
		this.params = new Params();
	}
	return this.params;
}
 
Example #24
Source File: NumSeqSourceStreamOp.java    From Alink with Apache License 2.0 5 votes vote down vote up
public NumSeqSourceStreamOp(long from, long to, String colName, Double[] timeZones, Params params) {
    super(params);

    DataStreamSource<Long> seq = MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamExecutionEnvironment().generateSequence(from, to);
    DataStream<Long> data = seq.map(new transform(timeZones));

    this.setOutputTable(MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamTableEnvironment().fromDataStream(data, colName));
}
 
Example #25
Source File: LinearSvmPredictStreamOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
public LinearSvmPredictStreamOp(BatchOperator model, Params params) {
	super(model, LinearModelMapper::new, params);
}
 
Example #26
Source File: DecisionTreePredictBatchOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
public DecisionTreePredictBatchOp(Params params) {
	super(RandomForestModelMapper::new, params);
}
 
Example #27
Source File: KvToColumnsBatchOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
public KvToColumnsBatchOp() {
    this(new Params());
}
 
Example #28
Source File: JsonValueStreamOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
public JsonValueStreamOp(Params param) {
	super(JsonPathMapper::new, param);
}
 
Example #29
Source File: AnnotationUtilsTest.java    From Alink with Apache License 2.0 4 votes vote down vote up
public FakeOp2(Params params) {
    super(params);
}
 
Example #30
Source File: KvToCsvStreamOp.java    From Alink with Apache License 2.0 4 votes vote down vote up
public KvToCsvStreamOp() {
    this(new Params());
}