org.datavec.api.transform.schema.Schema Java Examples
The following examples show how to use
org.datavec.api.transform.schema.Schema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TimeWindowFunction.java From DataVec with Apache License 2.0 | 6 votes |
@Override public void setInputSchema(Schema schema) { if (!(schema instanceof SequenceSchema)) throw new IllegalArgumentException( "Invalid schema: TimeWindowFunction can " + "only operate on SequenceSchema"); if (!schema.hasColumn(timeColumn)) throw new IllegalStateException("Input schema does not have a column with name \"" + timeColumn + "\""); if (schema.getMetaData(timeColumn).getColumnType() != ColumnType.Time) throw new IllegalStateException("Invalid column: column \"" + timeColumn + "\" is not of type " + ColumnType.Time + "; is " + schema.getMetaData(timeColumn).getColumnType()); this.inputSchema = schema; timeZone = ((TimeMetaData) schema.getMetaData(timeColumn)).getTimeZone(); }
Example #2
Source File: TestPythonTransformProcess.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testNumpyTransform() { PythonTransform pythonTransform = PythonTransform.builder() .code("a += 2; b = 'hello world'") .returnAllInputs(true) .build(); List<List<Writable>> inputs = new ArrayList<>(); inputs.add(Arrays.asList((Writable) new NDArrayWritable(Nd4j.scalar(1).reshape(1,1)))); Schema inputSchema = new Builder() .addColumnNDArray("a",new long[]{1,1}) .build(); TransformProcess tp = new TransformProcess.Builder(inputSchema) .transform(pythonTransform) .build(); List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp); assertFalse(execute.isEmpty()); assertNotNull(execute.get(0)); assertNotNull(execute.get(0).get(0)); assertNotNull(execute.get(0).get(1)); assertEquals(Nd4j.scalar(3).reshape(1, 1),((NDArrayWritable)execute.get(0).get(0)).get()); assertEquals("hello world",execute.get(0).get(1).toString()); }
Example #3
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testRemoveAllColumnsExceptForTransform() { Schema schema = new Schema.Builder().addColumnDouble("first").addColumnString("second") .addColumnInteger("third").addColumnLong("fourth").build(); Transform transform = new RemoveAllColumnsExceptForTransform("second", "third"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(2, out.getColumnMetaData().size()); TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType()); TestCase.assertEquals(ColumnType.Integer, out.getMetaData(1).getColumnType()); assertEquals(Arrays.asList(new Text("one"), new IntWritable(1)), transform.map(Arrays.asList((Writable) new DoubleWritable(1.0), new Text("one"), new IntWritable(1), new LongWritable(1L)))); }
Example #4
Source File: TransformProcessRecordReaderTests.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void simpleTransformTestSequence() { List<List<Writable>> sequence = new ArrayList<>(); //First window: sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L), new IntWritable(0), new IntWritable(0))); sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 100L), new IntWritable(1), new IntWritable(0))); sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 200L), new IntWritable(2), new IntWritable(0))); Schema schema = new SequenceSchema.Builder().addColumnTime("timecolumn", DateTimeZone.UTC) .addColumnInteger("intcolumn").addColumnInteger("intcolumn2").build(); TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("intcolumn2").build(); InMemorySequenceRecordReader inMemorySequenceRecordReader = new InMemorySequenceRecordReader(Arrays.asList(sequence)); TransformProcessSequenceRecordReader transformProcessSequenceRecordReader = new TransformProcessSequenceRecordReader(inMemorySequenceRecordReader, transformProcess); List<List<Writable>> next = transformProcessSequenceRecordReader.sequenceRecord(); assertEquals(2, next.get(0).size()); }
Example #5
Source File: DuplicateColumnsTransform.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Schema transform(Schema inputSchema) { List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size()); List<String> oldNames = inputSchema.getColumnNames(); int dupCount = 0; for (int i = 0; i < oldMeta.size(); i++) { String current = oldNames.get(i); newMeta.add(oldMeta.get(i)); if (columnsToDuplicateSet.contains(current)) { //Duplicate the current columnName, and place it after... String dupName = newColumnNames.get(dupCount); ColumnMetaData m = oldMeta.get(i).clone(); m.setName(dupName); newMeta.add(m); dupCount++; } } return inputSchema.newSchema(newMeta); }
Example #6
Source File: TestConditions.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testDoubleCondition() { Schema schema = TestTransforms.getSchema(ColumnType.Double); Condition condition = new DoubleColumnCondition("column", SequenceConditionMode.Or, ConditionOp.GreaterOrEqual, 0); condition.setInputSchema(schema); assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(0.0)))); assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(0.5)))); assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(-0.5)))); assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(-1)))); Set<Double> set = new HashSet<>(); set.add(0.0); set.add(3.0); condition = new DoubleColumnCondition("column", SequenceConditionMode.Or, ConditionOp.InSet, set); condition.setInputSchema(schema); assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(0.0)))); assertTrue(condition.condition(Collections.singletonList((Writable) new DoubleWritable(3.0)))); assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(1.0)))); assertFalse(condition.condition(Collections.singletonList((Writable) new DoubleWritable(2.0)))); }
Example #7
Source File: TestTransforms.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testLongColumnsMathOpTransform() { Schema schema = new Schema.Builder().addColumnLong("first").addColumnString("second").addColumnLong("third") .build(); Transform transform = new LongColumnsMathOpTransform("out", MathOp.Add, "first", "third"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(4, out.numColumns()); assertEquals(Arrays.asList("first", "second", "third", "out"), out.getColumnNames()); assertEquals(Arrays.asList(ColumnType.Long, ColumnType.String, ColumnType.Long, ColumnType.Long), out.getColumnTypes()); assertEquals(Arrays.asList((Writable) new LongWritable(1), new Text("something"), new LongWritable(2), new LongWritable(3)), transform.map(Arrays.asList((Writable) new LongWritable(1), new Text("something"), new LongWritable(2)))); assertEquals(Arrays.asList((Writable) new LongWritable(100), new Text("something2"), new LongWritable(21), new LongWritable(121)), transform.map(Arrays.asList((Writable) new LongWritable(100), new Text("something2"), new LongWritable(21)))); }
Example #8
Source File: CustomerRetentionPredictionExample.java From Java-Deep-Learning-Cookbook with MIT License | 6 votes |
private static Schema generateSchema(){ final Schema schema = new Schema.Builder() .addColumnString("RowNumber") .addColumnInteger("CustomerId") .addColumnString("Surname") .addColumnInteger("CreditScore") .addColumnCategorical("Geography", Arrays.asList("France","Germany","Spain")) .addColumnCategorical("Gender", Arrays.asList("Male","Female")) .addColumnsInteger("Age", "Tenure") .addColumnDouble("Balance") .addColumnsInteger("NumOfProducts","HasCrCard","IsActiveMember") .addColumnDouble("EstimatedSalary") .addColumnInteger("Exited") .build(); return schema; }
Example #9
Source File: LocalTransformProcessRecordReaderTests.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void simpleTransformTestSequence() { List<List<Writable>> sequence = new ArrayList<>(); //First window: sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L), new IntWritable(0), new IntWritable(0))); sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 100L), new IntWritable(1), new IntWritable(0))); sequence.add(Arrays.asList((Writable) new LongWritable(1451606400000L + 200L), new IntWritable(2), new IntWritable(0))); Schema schema = new SequenceSchema.Builder().addColumnTime("timecolumn", DateTimeZone.UTC) .addColumnInteger("intcolumn").addColumnInteger("intcolumn2").build(); TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("intcolumn2").build(); InMemorySequenceRecordReader inMemorySequenceRecordReader = new InMemorySequenceRecordReader(Arrays.asList(sequence)); LocalTransformProcessSequenceRecordReader transformProcessSequenceRecordReader = new LocalTransformProcessSequenceRecordReader(inMemorySequenceRecordReader, transformProcess); List<List<Writable>> next = transformProcessSequenceRecordReader.sequenceRecord(); assertEquals(2, next.get(0).size()); }
Example #10
Source File: CategoricalToOneHotTransform.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public void setInputSchema(Schema inputSchema) { super.setInputSchema(inputSchema); columnIdx = inputSchema.getIndexOfColumn(columnName); ColumnMetaData meta = inputSchema.getMetaData(columnName); if (!(meta instanceof CategoricalMetaData)) throw new IllegalStateException("Cannot convert column \"" + columnName + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType() + ")"); this.stateNames = ((CategoricalMetaData) meta).getStateNames(); this.statesMap = new HashMap<>(stateNames.size()); for (int i = 0; i < stateNames.size(); i++) { this.statesMap.put(stateNames.get(i), i); } }
Example #11
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testStringToCategoricalTransform() { Schema schema = getSchema(ColumnType.String); Transform transform = new StringToCategoricalTransform("column", Arrays.asList("zero", "one", "two")); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(1, out.getColumnMetaData().size()); TestCase.assertEquals(ColumnType.Categorical, out.getMetaData(0).getColumnType()); CategoricalMetaData meta = (CategoricalMetaData) out.getMetaData(0); assertEquals(Arrays.asList("zero", "one", "two"), meta.getStateNames()); assertEquals(Collections.singletonList((Writable) new Text("zero")), transform.map(Collections.singletonList((Writable) new Text("zero")))); assertEquals(Collections.singletonList((Writable) new Text("one")), transform.map(Collections.singletonList((Writable) new Text("one")))); assertEquals(Collections.singletonList((Writable) new Text("two")), transform.map(Collections.singletonList((Writable) new Text("two")))); }
Example #12
Source File: ArrowConverterTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testToArrayFromINDArray() { Schema.Builder schemaBuilder = new Schema.Builder(); schemaBuilder.addColumnNDArray("outputArray",new long[]{1,4}); Schema schema = schemaBuilder.build(); int numRows = 4; List<List<Writable>> ret = new ArrayList<>(numRows); for(int i = 0; i < numRows; i++) { ret.add(Arrays.<Writable>asList(new NDArrayWritable(Nd4j.linspace(1,4,4).reshape(1, 4)))); } List<FieldVector> fieldVectors = ArrowConverter.toArrowColumns(bufferAllocator, schema, ret); ArrowWritableRecordBatch arrowWritableRecordBatch = new ArrowWritableRecordBatch(fieldVectors,schema); INDArray array = ArrowConverter.toArray(arrowWritableRecordBatch); assertArrayEquals(new long[]{4,4},array.shape()); INDArray assertion = Nd4j.repeat(Nd4j.linspace(1,4,4),4).reshape(4,4); assertEquals(assertion,array); }
Example #13
Source File: TestTransforms.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testCategoricalToOneHotTransform() { Schema schema = getSchema(ColumnType.Categorical, "zero", "one", "two"); Transform transform = new CategoricalToOneHotTransform("column"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(3, out.getColumnMetaData().size()); for (int i = 0; i < 3; i++) { TestCase.assertEquals(ColumnType.Integer, out.getMetaData(i).getColumnType()); IntegerMetaData meta = (IntegerMetaData) out.getMetaData(i); assertNotNull(meta.getMinAllowedValue()); assertEquals(0, (int) meta.getMinAllowedValue()); assertNotNull(meta.getMaxAllowedValue()); assertEquals(1, (int) meta.getMaxAllowedValue()); } assertEquals(Arrays.asList(new IntWritable(1), new IntWritable(0), new IntWritable(0)), transform.map(Collections.singletonList((Writable) new Text("zero")))); assertEquals(Arrays.asList(new IntWritable(0), new IntWritable(1), new IntWritable(0)), transform.map(Collections.singletonList((Writable) new Text("one")))); assertEquals(Arrays.asList(new IntWritable(0), new IntWritable(0), new IntWritable(1)), transform.map(Collections.singletonList((Writable) new Text("two")))); }
Example #14
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testLongColumnsMathOpTransform() { Schema schema = new Schema.Builder().addColumnLong("first").addColumnString("second").addColumnLong("third") .build(); Transform transform = new LongColumnsMathOpTransform("out", MathOp.Add, "first", "third"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(4, out.numColumns()); assertEquals(Arrays.asList("first", "second", "third", "out"), out.getColumnNames()); assertEquals(Arrays.asList(ColumnType.Long, ColumnType.String, ColumnType.Long, ColumnType.Long), out.getColumnTypes()); assertEquals(Arrays.asList((Writable) new LongWritable(1), new Text("something"), new LongWritable(2), new LongWritable(3)), transform.map(Arrays.asList((Writable) new LongWritable(1), new Text("something"), new LongWritable(2)))); assertEquals(Arrays.asList((Writable) new LongWritable(100), new Text("something2"), new LongWritable(21), new LongWritable(121)), transform.map(Arrays.asList((Writable) new LongWritable(100), new Text("something2"), new LongWritable(21)))); }
Example #15
Source File: LocalTransformExecutor.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * Convert a string time series to * the proper writable set based on the schema. * Note that this does not use arrow. * This just uses normal writable objects. * * @param stringInput the string input * @param schema the schema to use * @return the converted records */ public static List<List<Writable>> convertStringInput(List<List<String>> stringInput,Schema schema) { List<List<Writable>> ret = new ArrayList<>(); List<List<Writable>> timeStepAdd = new ArrayList<>(); for(int j = 0; j < stringInput.size(); j++) { List<String> record = stringInput.get(j); List<Writable> recordAdd = new ArrayList<>(); for(int k = 0; k < record.size(); k++) { switch(schema.getType(k)) { case Double: recordAdd.add(new DoubleWritable(Double.parseDouble(record.get(k)))); break; case Float: recordAdd.add(new FloatWritable(Float.parseFloat(record.get(k)))); break; case Integer: recordAdd.add(new IntWritable(Integer.parseInt(record.get(k)))); break; case Long: recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break; case String: recordAdd.add(new Text(record.get(k))); break; case Time: recordAdd.add(new LongWritable(Long.parseLong(record.get(k)))); break; } } timeStepAdd.add(recordAdd); } return ret; }
Example #16
Source File: TestPythonTransformProcess.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testPythonTransformNoOutputSpecified() throws Exception { PythonTransform pythonTransform = PythonTransform.builder() .code("a += 2; b = 'hello world'") .returnAllInputs(true) .build(); List<List<Writable>> inputs = new ArrayList<>(); inputs.add(Arrays.asList((Writable)new IntWritable(1))); Schema inputSchema = new Builder() .addColumnInteger("a") .build(); TransformProcess tp = new TransformProcess.Builder(inputSchema) .transform(pythonTransform) .build(); List<List<Writable>> execute = LocalTransformExecutor.execute(inputs, tp); assertEquals(3,execute.get(0).get(0).toInt()); assertEquals("hello world",execute.get(0).get(1).toString()); }
Example #17
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testAppendStringColumnTransform() { Schema schema = getSchema(ColumnType.String); Transform transform = new AppendStringColumnTransform("column", "_AppendThis"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(1, out.getColumnMetaData().size()); TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType()); assertEquals(Collections.singletonList((Writable) new Text("one_AppendThis")), transform.map(Collections.singletonList((Writable) new Text("one")))); assertEquals(Collections.singletonList((Writable) new Text("two_AppendThis")), transform.map(Collections.singletonList((Writable) new Text("two")))); assertEquals(Collections.singletonList((Writable) new Text("three_AppendThis")), transform.map(Collections.singletonList((Writable) new Text("three")))); }
Example #18
Source File: TestTransforms.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testDoubleColumnsMathOpTransform() { Schema schema = new Schema.Builder().addColumnString("first").addColumnDouble("second").addColumnDouble("third") .build(); Transform transform = new DoubleColumnsMathOpTransform("out", MathOp.Add, "second", "third"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(4, out.numColumns()); assertEquals(Arrays.asList("first", "second", "third", "out"), out.getColumnNames()); assertEquals(Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Double, ColumnType.Double), out.getColumnTypes()); assertEquals(Arrays.asList((Writable) new Text("something"), new DoubleWritable(1.0), new DoubleWritable(2.1), new DoubleWritable(3.1)), transform.map(Arrays.asList((Writable) new Text("something"), new DoubleWritable(1.0), new DoubleWritable(2.1)))); assertEquals(Arrays.asList((Writable) new Text("something2"), new DoubleWritable(100.0), new DoubleWritable(21.1), new DoubleWritable(121.1)), transform.map(Arrays.asList((Writable) new Text("something2"), new DoubleWritable(100.0), new DoubleWritable(21.1)))); }
Example #19
Source File: BaseSequenceExpansionTransform.java From DataVec with Apache License 2.0 | 6 votes |
@Override public Schema transform(Schema inputSchema) { //Same schema *except* for the expanded columns List<ColumnMetaData> meta = new ArrayList<>(inputSchema.numColumns()); List<ColumnMetaData> oldMetaToExpand = new ArrayList<>(); for(String s : requiredColumns){ oldMetaToExpand.add(inputSchema.getMetaData(s)); } List<ColumnMetaData> newMetaToExpand = expandedColumnMetaDatas(oldMetaToExpand, expandedColumnNames); int modColumnIdx = 0; for(ColumnMetaData m : inputSchema.getColumnMetaData()){ if(requiredColumns.contains(m.getName())){ //Possibly changed column (expanded) meta.add(newMetaToExpand.get(modColumnIdx++)); } else { //Unmodified column meta.add(m); } } return inputSchema.newSchema(meta); }
Example #20
Source File: DuplicateColumnsTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema inputSchema) { columnIndexesToDuplicateSet.clear(); List<String> schemaColumnNames = inputSchema.getColumnNames(); for (String s : columnsToDuplicate) { int idx = schemaColumnNames.indexOf(s); if (idx == -1) throw new IllegalStateException("Invalid state: column to duplicate \"" + s + "\" does not appear " + "in input schema"); columnIndexesToDuplicateSet.add(idx); } this.inputSchema = inputSchema; }
Example #21
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 5 votes |
public static Schema getSchema(ColumnType type, String... colNames) { Schema.Builder schema = new Schema.Builder(); switch (type) { case String: schema.addColumnString("column"); break; case Integer: schema.addColumnInteger("column"); break; case Long: schema.addColumnLong("column"); break; case Double: schema.addColumnDouble("column"); break; case Categorical: schema.addColumnCategorical("column", colNames); break; case Time: schema.addColumnTime("column", DateTimeZone.UTC); break; default: throw new RuntimeException(); } return schema.build(); }
Example #22
Source File: LegacyMappingHelper.java From DataVec with Apache License 2.0 | 5 votes |
private static Map<String,String> getLegacyMappingSchema(){ if(mapSchema == null) { Map<String, String> m = new HashMap<>(); m.put("Schema", Schema.class.getName()); m.put("SequenceSchema", SequenceSchema.class.getName()); mapSchema = m; } return mapSchema; }
Example #23
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testReplaceStringTransform() { Schema schema = getSchema(ColumnType.String); // Linked Map<String, String> map = new LinkedHashMap<>(); map.put("mid", "C2"); map.put("\\d", "one"); Transform transform = new ReplaceStringTransform("column", map); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(1, out.getColumnMetaData().size()); TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType()); assertEquals(Collections.singletonList((Writable) new Text("BoneConeTone")), transform.map(Collections.singletonList((Writable) new Text("B1midT3")))); // No link map = new HashMap<>(); map.put("^\\s+|\\s+$", ""); transform = new ReplaceStringTransform("column", map); transform.setInputSchema(schema); out = transform.transform(schema); assertEquals(1, out.getColumnMetaData().size()); TestCase.assertEquals(ColumnType.String, out.getMetaData(0).getColumnType()); assertEquals(Collections.singletonList((Writable) new Text("4.25")), transform.map(Collections.singletonList((Writable) new Text(" 4.25 ")))); }
Example #24
Source File: CalculateSortedRank.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { if (inputSchema instanceof SequenceSchema) throw new IllegalStateException("Calculating sorted rank on sequences: not yet supported"); List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(origMeta); newMeta.add(new LongMetaData(newColumnName, 0L, null)); return inputSchema.newSchema(newMeta); }
Example #25
Source File: Normalization.java From DataVec with Apache License 2.0 | 5 votes |
/** * Normalize the sequence by zero mean unit variance * * @param schema Schema of the data to normalize * @param sequence Sequence data * @param excludeColumns List of columns to exclude from the normalization * @return Normalized sequence */ public static JavaRDD<List<List<Writable>>> zeroMeanUnitVarianceSequence(Schema schema, JavaRDD<List<List<Writable>>> sequence, List<String> excludeColumns) { DataRowsFacade frame = DataFrames.toDataFrameSequence(schema, sequence); if (excludeColumns == null) excludeColumns = Arrays.asList(DataFrames.SEQUENCE_UUID_COLUMN, DataFrames.SEQUENCE_INDEX_COLUMN); else { excludeColumns = new ArrayList<>(excludeColumns); excludeColumns.add(DataFrames.SEQUENCE_UUID_COLUMN); excludeColumns.add(DataFrames.SEQUENCE_INDEX_COLUMN); } frame = zeromeanUnitVariance(frame, excludeColumns); return DataFrames.toRecordsSequence(frame).getSecond(); }
Example #26
Source File: LocalTransformProcessRecordReaderTests.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void simpleTransformTest() throws Exception { Schema schema = new Schema.Builder().addColumnDouble("0").addColumnDouble("1").addColumnDouble("2") .addColumnDouble("3").addColumnDouble("4").build(); TransformProcess transformProcess = new TransformProcess.Builder(schema).removeColumns("0").build(); CSVRecordReader csvRecordReader = new CSVRecordReader(); csvRecordReader.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile())); LocalTransformProcessRecordReader transformProcessRecordReader = new LocalTransformProcessRecordReader(csvRecordReader, transformProcess); assertEquals(4, transformProcessRecordReader.next().size()); }
Example #27
Source File: SequenceMovingWindowReduceTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { int colIdx = inputSchema.getIndexOfColumn(columnName); //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema... List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> meta = new ArrayList<>(oldMeta); ColumnMetaData m; switch (op) { case Min: case Max: case Range: case TakeFirst: case TakeLast: //Same type as input m = oldMeta.get(colIdx); m = m.clone(); m.setName(newColumnName); break; case Prod: case Sum: case Mean: case Stdev: //Double type m = new DoubleMetaData(newColumnName); break; case Count: case CountUnique: //Integer type m = new IntegerMetaData(newColumnName); break; default: throw new UnsupportedOperationException("Unknown op type: " + op); } meta.add(m); return new SequenceSchema(meta); }
Example #28
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testTextToCharacterIndexTransform(){ Schema s = new Schema.Builder().addColumnString("col").addColumnDouble("d").build(); List<List<Writable>> inSeq = Arrays.asList( Arrays.<Writable>asList(new Text("text"), new DoubleWritable(1.0)), Arrays.<Writable>asList(new Text("ab"), new DoubleWritable(2.0))); Map<Character,Integer> map = new HashMap<>(); map.put('a', 0); map.put('b', 1); map.put('e', 2); map.put('t', 3); map.put('x', 4); List<List<Writable>> exp = Arrays.asList( Arrays.<Writable>asList(new IntWritable(3), new DoubleWritable(1.0)), Arrays.<Writable>asList(new IntWritable(2), new DoubleWritable(1.0)), Arrays.<Writable>asList(new IntWritable(4), new DoubleWritable(1.0)), Arrays.<Writable>asList(new IntWritable(3), new DoubleWritable(1.0)), Arrays.<Writable>asList(new IntWritable(0), new DoubleWritable(2.0)), Arrays.<Writable>asList(new IntWritable(1), new DoubleWritable(2.0))); Transform t = new TextToCharacterIndexTransform("col", "newName", map, false); t.setInputSchema(s); Schema outputSchema = t.transform(s); assertEquals(2, outputSchema.getColumnNames().size()); assertEquals(ColumnType.Integer, outputSchema.getType(0)); assertEquals(ColumnType.Double, outputSchema.getType(1)); IntegerMetaData intMetadata = (IntegerMetaData)outputSchema.getMetaData(0); assertEquals(0, (int)intMetadata.getMinAllowedValue()); assertEquals(4, (int)intMetadata.getMaxAllowedValue()); List<List<Writable>> out = t.mapSequence(inSeq); assertEquals(exp, out); }
Example #29
Source File: ArrowWritableRecordBatch.java From DataVec with Apache License 2.0 | 5 votes |
public ArrowWritableRecordBatch(List<FieldVector> list,Schema schema,int offset,int rows) { this.list = list; this.schema = schema; //each column should have same number of rows this.offset = offset; this.size = rows; }
Example #30
Source File: TestTransforms.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testStringListToCategoricalSetTransform() { //Idea: String list to a set of categories... "a,c" for categories {a,b,c} -> "true","false","true" Schema schema = getSchema(ColumnType.String); Transform transform = new StringListToCategoricalSetTransform("column", Arrays.asList("a", "b", "c"), Arrays.asList("a", "b", "c"), ","); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(3, out.getColumnMetaData().size()); for (int i = 0; i < 3; i++) { TestCase.assertEquals(ColumnType.Categorical, out.getType(i)); CategoricalMetaData meta = (CategoricalMetaData) out.getMetaData(i); assertEquals(Arrays.asList("true", "false"), meta.getStateNames()); } assertEquals(Arrays.asList(new Text("false"), new Text("false"), new Text("false")), transform.map(Collections.singletonList((Writable) new Text("")))); assertEquals(Arrays.asList(new Text("true"), new Text("false"), new Text("false")), transform.map(Collections.singletonList((Writable) new Text("a")))); assertEquals(Arrays.asList(new Text("false"), new Text("true"), new Text("false")), transform.map(Collections.singletonList((Writable) new Text("b")))); assertEquals(Arrays.asList(new Text("false"), new Text("false"), new Text("true")), transform.map(Collections.singletonList((Writable) new Text("c")))); assertEquals(Arrays.asList(new Text("true"), new Text("false"), new Text("true")), transform.map(Collections.singletonList((Writable) new Text("a,c")))); assertEquals(Arrays.asList(new Text("true"), new Text("true"), new Text("true")), transform.map(Collections.singletonList((Writable) new Text("a,b,c")))); }