org.datavec.api.writable.Text Java Examples

The following examples show how to use org.datavec.api.writable.Text. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CSVSparkTransformTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformerBatch() throws Exception {
    List<Writable> input = new ArrayList<>();
    input.add(new DoubleWritable(1.0));
    input.add(new DoubleWritable(2.0));

    Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
    List<Writable> output = new ArrayList<>();
    output.add(new Text("1.0"));
    output.add(new Text("2.0"));

    TransformProcess transformProcess =
            new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
    CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
    String[] values = new String[] {"1.0", "2.0"};
    SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
    BatchCSVRecord batchCSVRecord = new BatchCSVRecord();
    for (int i = 0; i < 3; i++)
        batchCSVRecord.add(record);
    //data type is string, unable to convert
    BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord);
  /*  Base64NDArrayBody body = csvSparkTransform.toArray(batchCSVRecord1);
    INDArray fromBase64 = Nd4jBase64.fromBase64(body.getNdarray());
    assertTrue(fromBase64.isMatrix());
    System.out.println("Base 64ed array " + fromBase64); */
}
 
Example #2
Source File: TestGeoTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}
 
Example #3
Source File: JacksonRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private static void testJacksonRecordReader(RecordReader rr) {

        List<Writable> json0 = rr.next();
        List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
        assertEquals(exp0, json0);

        List<Writable> json1 = rr.next();
        List<Writable> exp1 =
                        Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
        assertEquals(exp1, json1);

        List<Writable> json2 = rr.next();
        List<Writable> exp2 =
                        Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
        assertEquals(exp2, json2);

        assertFalse(rr.hasNext());

        //Test reset
        rr.reset();
        assertEquals(exp0, rr.next());
        assertEquals(exp1, rr.next());
        assertEquals(exp2, rr.next());
        assertFalse(rr.hasNext());
    }
 
Example #4
Source File: TestGeoTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}
 
Example #5
Source File: CSVRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCsvSkipAllButOneLine() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.<Writable>asList(new Text(Integer.toString(numLines - 1)),
            new Text("one"), new Text("two"), new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines - 1, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(rr.hasNext());
    assertEquals(rr.next(), lineList);
}
 
Example #6
Source File: JDBCRecordReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testSimpleIter() throws Exception {
    try (JDBCRecordReader reader = getInitializedReader("SELECT * FROM Coffee")) {
        List<List<Writable>> records = new ArrayList<>();
        while (reader.hasNext()) {
            List<Writable> values = reader.next();
            records.add(values);
        }

        assertFalse(records.isEmpty());

        List<Writable> first = records.get(0);
        assertEquals(new Text("Bolivian Dark"), first.get(0));
        assertEquals(new Text("14-001"), first.get(1));
        assertEquals(new DoubleWritable(8.95), first.get(2));
    }
}
 
Example #7
Source File: ExcelRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private List<Writable> rowToRecord(Row currRow) {
    if(numColumns < 0) {
        numColumns = currRow.getLastCellNum();
    }

    if(currRow.getLastCellNum() != numColumns) {
        throw new IllegalStateException("Invalid number of columns for row. First number of columns found was " + numColumns + " but row " + currRow.getRowNum() + " was " + currRow.getLastCellNum());
    }

    List<Writable> ret = new ArrayList<>(currRow.getLastCellNum());
    for(Cell cell: currRow) {
        String cellValue = dataFormatter.formatCellValue(cell);
        switch(cell.getCellTypeEnum()) {
            case BLANK: ret.add(new Text("")); break;
            case STRING: ret.add(new Text("")); break;
            case BOOLEAN: ret.add(new BooleanWritable(Boolean.valueOf(cellValue))); break;
            case NUMERIC: ret.add(new DoubleWritable(Double.parseDouble(cellValue))); break;
            default: ret.add(new Text(cellValue));
        }
    }

    return ret;

}
 
Example #8
Source File: TestGeoTransforms.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}
 
Example #9
Source File: CategoricalQualityAddFunction.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public CategoricalQuality apply(CategoricalQuality v1, Writable writable) {

    long valid = v1.getCountValid();
    long invalid = v1.getCountInvalid();
    long countMissing = v1.getCountMissing();
    long countTotal = v1.getCountTotal() + 1;

    if (meta.isValid(writable))
        valid++;
    else if (writable instanceof NullWritable
                    || writable instanceof Text && (writable.toString() == null || writable.toString().isEmpty()))
        countMissing++;
    else
        invalid++;

    return new CategoricalQuality(valid, invalid, countMissing, countTotal);
}
 
Example #10
Source File: ExcelRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
private List<Writable> rowToRecord(Row currRow) {
    if(numColumns < 0) {
        numColumns = currRow.getLastCellNum();
    }

    if(currRow.getLastCellNum() != numColumns) {
        throw new IllegalStateException("Invalid number of columns for row. First number of columns found was " + numColumns + " but row " + currRow.getRowNum() + " was " + currRow.getLastCellNum());
    }

    List<Writable> ret = new ArrayList<>(currRow.getLastCellNum());
    for(Cell cell: currRow) {
        String cellValue = dataFormatter.formatCellValue(cell);
        switch(cell.getCellTypeEnum()) {
            case BLANK: ret.add(new Text("")); break;
            case STRING: ret.add(new Text("")); break;
            case BOOLEAN: ret.add(new BooleanWritable(Boolean.valueOf(cellValue))); break;
            case NUMERIC: ret.add(new DoubleWritable(Double.parseDouble(cellValue))); break;
            default: ret.add(new Text(cellValue));
        }
    }

    return ret;

}
 
Example #11
Source File: TestUI.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
@Ignore
public void testSequencePlot() throws Exception {

    Schema schema = new SequenceSchema.Builder().addColumnDouble("sinx")
                    .addColumnCategorical("cat", "s0", "s1", "s2").addColumnString("stringcol").build();

    int nSteps = 100;
    List<List<Writable>> sequence = new ArrayList<>(nSteps);
    for (int i = 0; i < nSteps; i++) {
        String c = "s" + i % 3;
        sequence.add(Arrays.<Writable>asList(new DoubleWritable(Math.sin(i / 10.0)), new Text(c),
                        new Text(String.valueOf(i))));
    }

    String tempDir = System.getProperty("java.io.tmpdir");
    String outPath = FilenameUtils.concat(tempDir, "datavec_seqplot_test.html");
    //        System.out.println(outPath);
    File f = new File(outPath);
    f.deleteOnExit();
    HtmlSequencePlotting.createHtmlSequencePlotFile("Title!", schema, sequence, f);


}
 
Example #12
Source File: CSVRegexRecordReader.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
protected List<Writable> parseLine(String line) {
    String[] split = line.split(delimiter, -1);
    List<Writable> ret = new ArrayList<>();
    for (int i = 0; i < split.length; i++) {
        String s = split[i];
        if (quote != null && s.startsWith(quote) && s.endsWith(quote)) {
            int n = quote.length();
            s = s.substring(n, s.length() - n).replace(quote + quote, quote);
        }
        if (regexs != null && regexs[i] != null) {
            Matcher m = patterns[i].matcher(s);
            if (m.matches()) {
                for (int j = 1; j <= m.groupCount(); j++) { //Note: Matcher.group(0) is the entire sequence; we only care about groups 1 onward
                    ret.add(new Text(m.group(j)));
                }
            } else {
                throw new IllegalStateException("Invalid line: value does not match regex (regex=\"" + regexs[i]
                                + "\"; value=\"" + s + "\"");
            }
        } else {
            ret.add(new Text(s));
        }
    }
    return ret;
}
 
Example #13
Source File: JsonExpanderTransformStepStepRunnerTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test
public void testJsonExpansionObjectArray() {
    JsonExpanderTransformStepRunner runner = new JsonExpanderTransformStepRunner(new JsonExpanderTransformStep());
    Record[] input = new Record[1];
    JsonArray inputArraysJson = new JsonArray();
    JsonObject jsonObject = new JsonObject();
    jsonObject.put("first", 1.0);
    jsonObject.put("second", "hello world");
    inputArraysJson.add(jsonObject);

    input[0] = new org.datavec.api.records.impl.Record(
            Arrays.asList(new Text(inputArraysJson.encodePrettily()))
            , null);

    Record[] transform = runner.transform(input);
    assertEquals(1, transform.length);
    assertEquals(2, transform[0].getRecord().size());
    assertEquals(1.0, transform[0].getRecord().get(0).toDouble(), 1e-1);
    assertEquals("hello world", transform[0].getRecord().get(1).toString());
}
 
Example #14
Source File: CSVRegexRecordReader.java    From DataVec with Apache License 2.0 6 votes vote down vote up
protected List<Writable> parseLine(String line) {
    String[] split = line.split(delimiter, -1);
    List<Writable> ret = new ArrayList<>();
    for (int i = 0; i < split.length; i++) {
        String s = split[i];
        if (quote != null && s.startsWith(quote) && s.endsWith(quote)) {
            int n = quote.length();
            s = s.substring(n, s.length() - n).replace(quote + quote, quote);
        }
        if (regexs != null && regexs[i] != null) {
            Matcher m = patterns[i].matcher(s);
            if (m.matches()) {
                for (int j = 1; j <= m.groupCount(); j++) { //Note: Matcher.group(0) is the entire sequence; we only care about groups 1 onward
                    ret.add(new Text(m.group(j)));
                }
            } else {
                throw new IllegalStateException("Invalid line: value does not match regex (regex=\"" + regexs[i]
                                + "\"; value=\"" + s + "\"");
            }
        } else {
            ret.add(new Text(s));
        }
    }
    return ret;
}
 
Example #15
Source File: WordPieceTokenizerStepTest.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
@Test
public void testWordPieceStepInference() throws Exception
{
    String sampleText = "These pages provide further information about the dictionary, its content and how it's kept up-to-date.";

    WordPieceTokenizerStepRunner step = new WordPieceTokenizerStepRunner(wordPieceTokenizerStep);

    BertIterator iterator = step.getToken(sampleText);

    assertNotEquals(0, iterator.next().getFeatures(0).length());

    List<Writable> ret = new ArrayList<>();
    ret.add(new Text(sampleText));

    Record[] tokenizedSentence = step.transform(new Record[]{
            new org.datavec.api.records.impl.Record(ret, null)
    });

    assertEquals(1, tokenizedSentence.length);
}
 
Example #16
Source File: TestTransformProcess.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testExecution(){

    Schema schema = new Schema.Builder()
            .addColumnsString("col")
            .addColumnsDouble("col2")
            .build();

    Map<Character,Integer> m = defaultCharIndex();
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .doubleMathOp("col2", MathOp.Add, 1.0)
            .build();

    List<Writable> in = Arrays.<Writable>asList(new Text("Text"), new DoubleWritable(2.0));
    List<Writable> exp = Arrays.<Writable>asList(new Text("Text"), new DoubleWritable(3.0));

    List<Writable> out = transformProcess.execute(in);
    assertEquals(exp, out);
}
 
Example #17
Source File: RegexRecordReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testRegexLineRecordReader() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    RecordReader rr = new RegexLineRecordReader(regex, 1);
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/logtestdata/logtestfile0.txt").getFile()));

    List<Writable> exp0 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"),
                    new Text("DEBUG"), new Text("First entry message!"));
    List<Writable> exp1 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"),
                    new Text("INFO"), new Text("Second entry message!"));
    List<Writable> exp2 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"),
                    new Text("WARN"), new Text("Third entry message!"));
    assertEquals(exp0, rr.next());
    assertEquals(exp1, rr.next());
    assertEquals(exp2, rr.next());
    assertFalse(rr.hasNext());

    //Test reset:
    rr.reset();
    assertEquals(exp0, rr.next());
    assertEquals(exp1, rr.next());
    assertEquals(exp2, rr.next());
    assertFalse(rr.hasNext());
}
 
Example #18
Source File: ExecutionTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecutionSimple() {
    Schema schema = new Schema.Builder().addColumnInteger("col0")
                    .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
                    .doubleMathOp("col2", MathOp.Add, 10.0).build();

    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1)));

    JavaRDD<List<Writable>> rdd = sc.parallelize(inputData);

    List<List<Writable>> out = new ArrayList<>(SparkTransformExecutor.execute(rdd, tp).collect());

    Collections.sort(out, new Comparator<List<Writable>>() {
        @Override
        public int compare(List<Writable> o1, List<Writable> o2) {
            return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
        }
    });

    List<List<Writable>> expected = new ArrayList<>();
    expected.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1)));

    assertEquals(expected, out);
}
 
Example #19
Source File: TestConvertToSequence.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertToSequenceLength1(){

    Schema s = new Schema.Builder()
            .addColumnsString("string")
            .addColumnLong("long")
            .build();

    List<List<Writable>> allExamples = Arrays.asList(
            Arrays.<Writable>asList(new Text("a"), new LongWritable(0)),
            Arrays.<Writable>asList(new Text("b"), new LongWritable(1)),
            Arrays.<Writable>asList(new Text("c"), new LongWritable(2)));

    TransformProcess tp = new TransformProcess.Builder(s)
            .convertToSequence()
            .build();

    JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples);

    JavaRDD<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp);

    List<List<List<Writable>>> out2 = out.collect();

    assertEquals(3, out2.size());

    for( int i=0; i<3; i++ ){
        assertTrue(out2.contains(Collections.singletonList(allExamples.get(i))));
    }
}
 
Example #20
Source File: TestWritablesToStringFunctions.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testSequenceWritablesToString() throws Exception {

    List<List<Writable>> l = Arrays.asList(Arrays.<Writable>asList(new DoubleWritable(1.5), new Text("someValue")),
                    Arrays.<Writable>asList(new DoubleWritable(2.5), new Text("otherValue")));

    String expected = l.get(0).get(0).toString() + "," + l.get(0).get(1).toString() + "\n"
                    + l.get(1).get(0).toString() + "," + l.get(1).get(1).toString();

    assertEquals(expected, new SequenceWritablesToStringFunction(",").apply(l));
}
 
Example #21
Source File: StringMapTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Text map(Writable writable) {
    String orig = writable.toString();
    if (map.containsKey(orig)) {
        return new Text(map.get(orig));
    }

    if (writable instanceof Text)
        return (Text) writable;
    else
        return new Text(writable.toString());
}
 
Example #22
Source File: FileRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> record(URI uri, DataInputStream dataInputStream) throws IOException {
    invokeListeners(uri);
    //Here: reading the entire file to a Text writable
    BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
    StringBuilder sb = new StringBuilder();
    String line;
    while ((line = br.readLine()) != null) {
        sb.append(line).append("\n");
    }
    return Collections.singletonList((Writable) new Text(sb.toString()));
}
 
Example #23
Source File: RealQualityAddFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public DoubleQuality call(DoubleQuality v1, Writable writable) throws Exception {

    long valid = v1.getCountValid();
    long invalid = v1.getCountInvalid();
    long countMissing = v1.getCountMissing();
    long countTotal = v1.getCountTotal() + 1;
    long nonReal = v1.getCountNonReal();
    long nan = v1.getCountNaN();
    long infinite = v1.getCountInfinite();

    if (meta.isValid(writable))
        valid++;
    else if (writable instanceof NullWritable
                    || writable instanceof Text && (writable.toString() == null || writable.toString().isEmpty()))
        countMissing++;
    else
        invalid++;

    String str = writable.toString();
    double d;
    try {
        d = Double.parseDouble(str);
        if (Double.isNaN(d))
            nan++;
        if (Double.isInfinite(d))
            infinite++;
    } catch (NumberFormatException e) {
        nonReal++;
    }

    return new DoubleQuality(valid, invalid, countMissing, countTotal, nonReal, nan, infinite);
}
 
Example #24
Source File: TestTransformProcess.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testExecuteToSequence() {

    Schema schema = new Schema.Builder()
            .addColumnsString("action")
            .build();

    Map<Character,Integer> m = defaultCharIndex();
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeAllColumnsExceptFor("action")
            .convertToSequence()
            .transform(new TextToCharacterIndexTransform("action", "action_sequence", m, true))
            .build();

    String s = "in text";
    List<Writable> input = Collections.<Writable>singletonList(new Text(s));

    List<List<Writable>> expSeq = new ArrayList<>(s.length());
    for( int i = 0; i<s.length(); i++) {
        expSeq.add(Collections.<Writable>singletonList(new IntWritable(m.get(s.charAt(i)))));
    }


    List<List<Writable>> out = transformProcess.executeToSequence(input);

    assertEquals(expSeq, out);
}
 
Example #25
Source File: GeographicMidpointReduction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> get() {
    double x = sumx / count;
    double y = sumy / count;
    double z = sumz / count;

    if(count == 0){
        throw new IllegalStateException("Cannot calculate geographic midpoint: no datapoints were added to be reduced");
    }

    if(Math.abs(x) < EDGE_CASE_EPS && Math.abs(y) < EDGE_CASE_EPS && Math.abs(z) < EDGE_CASE_EPS ){
        throw new IllegalStateException("No Geographic midpoint exists: midpoint is center of the earth");
    }

    double longRad = Math.atan2(y,x);
    double hyp = Math.sqrt(x*x + y*y);
    double latRad = Math.atan2(z, hyp);

    double latDeg = latRad / PI_180;
    double longDeg = longRad / PI_180;

    Preconditions.checkState(!Double.isNaN(latDeg), "Final latitude is NaN");
    Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN");

    String str = latDeg + delim + longDeg;
    return Collections.<Writable>singletonList(new Text(str));
}
 
Example #26
Source File: ConcatenateStringColumns.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> map(List<Writable> writables) {
    StringBuilder newColumnText = new StringBuilder();
    List<Writable> out = new ArrayList<>(writables);
    int i = 0;
    for (String columnName : columnsToConcatenate) {
        if (i++ > 0)
            newColumnText.append(delimiter);
        int columnIdx = inputSchema.getIndexOfColumn(columnName);
        newColumnText.append(writables.get(columnIdx));
    }
    out.add(new Text(newColumnText.toString()));
    return out;
}
 
Example #27
Source File: RecordReaderBytesFunction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Writable> apply(Pair<Text, BytesWritable> v1) {
    URI uri = URI.create(v1.getRight().toString());
    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(v1.getRight().getContent()));
    try {
        return recordReader.record(uri, dis);
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }

}
 
Example #28
Source File: CSVSparkTransformTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleBatchSequence() throws Exception {
    List<Writable> input = new ArrayList<>();
    input.add(new DoubleWritable(1.0));
    input.add(new DoubleWritable(2.0));

    Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
    List<Writable> output = new ArrayList<>();
    output.add(new Text("1.0"));
    output.add(new Text("2.0"));

    TransformProcess transformProcess =
            new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
    CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
    String[] values = new String[] {"1.0", "2.0"};
    SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
    BatchCSVRecord batchCSVRecord = new BatchCSVRecord();
    for (int i = 0; i < 3; i++)
        batchCSVRecord.add(record);
    BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord);
    SequenceBatchCSVRecord sequenceBatchCSVRecord = new SequenceBatchCSVRecord();
    sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord));
    Base64NDArrayBody sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord);
    INDArray outputBody = Nd4jBase64.fromBase64(sequenceArray.getNdarray());


     //ensure accumulation
    sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord));
    sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord);
    assertArrayEquals(new long[]{2,2,3},Nd4jBase64.fromBase64(sequenceArray.getNdarray()).shape());

    SequenceBatchCSVRecord transformed = csvSparkTransform.transformSequence(sequenceBatchCSVRecord);
    assertNotNull(transformed.getRecords());
    System.out.println(transformed);


}
 
Example #29
Source File: TestGeoReduction.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("1#5")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("2#6")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("3#7")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("4#8")));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new Text("10.0#26.0"));

    Schema schema = new Schema.Builder().addColumnString("key").addColumnString("coord").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Count).keyColumns("key")
                    .customReduction("coord", new CoordinatesReduction("coordSum", ReduceOp.Sum, "#")).build();

    reducer.setInputSchema(schema);

    IAggregableReduceOp<List<Writable>, List<Writable>> aggregableReduceOp = reducer.aggregableReducer();
    for (List<Writable> l : inputs)
        aggregableReduceOp.accept(l);
    List<Writable> out = aggregableReduceOp.get();

    assertEquals(2, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "coordSum"};
    ColumnType[] expTypes = new ColumnType[] {ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(2, outSchema.numColumns());
    for (int i = 0; i < 2; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}
 
Example #30
Source File: FilterWritablesBySchemaFunction.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Boolean call(Writable v1) throws Exception {
    boolean valid = meta.isValid(v1);
    if (excludeMissing && (v1 instanceof NullWritable
                    || v1 instanceof Text && (v1.toString() == null || v1.toString().isEmpty())))
        return false; //Remove (spark)
    if (keepValid)
        return valid; //Spark: return true to keep
    else
        return !valid;
}