Java Code Examples for org.pentaho.di.core.RowMetaAndData#setRowMeta()
The following examples show how to use
org.pentaho.di.core.RowMetaAndData#setRowMeta() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CommonFormatShimTest.java From pentaho-hadoop-shims with Apache License 2.0 | 6 votes |
@Test public void testAvroNestedReadLocalFileSystem() throws Exception { List<String> expectedRows = Arrays.asList( "John;4074549921", "Leslie;4079302194" ); PentahoAvroInputFormat avroInputFormat = new PentahoAvroInputFormat( mock( NamedCluster.class ) ); avroInputFormat.setInputSchemaFile( getFilePath( "/sample-schema.avro" ) ); avroInputFormat.setInputFile( getFilePath( "/sample-data.avro" ) ); avroInputFormat.setUseFieldAsInputStream( false ); avroInputFormat.setIsDataBinaryEncoded( true ); List<AvroInputField> inputFields = new ArrayList<>(); addStringField( inputFields, "FirstName" ); addStringField( inputFields, "Phone" ); avroInputFormat.setInputFields( inputFields ); RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "FirstName" ) ); rowMeta.addValueMeta( new ValueMetaString( "Phone" ) ); row.setRowMeta( rowMeta ); avroInputFormat.setOutputRowMeta( rowMeta ); IPentahoRecordReader recordReader = avroInputFormat.createRecordReader( null ); assertEquals( expectedRows, generateDataSample( recordReader, inputFields ) ); }
Example 2
Source File: CommonFormatShimTest.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Test public void testAvroArrayAndMapComplexTypes() throws Exception { List<String> expectedRows = Arrays.asList( "string1;string2;string4", "string101;string102;string104" ); PentahoAvroInputFormat avroInputFormat = new PentahoAvroInputFormat( mock( NamedCluster.class ) ); avroInputFormat.setInputFile( getFilePath( "/sampledata1.avro" ) ); avroInputFormat.setUseFieldAsInputStream( false ); avroInputFormat.setIsDataBinaryEncoded( true ); List<AvroInputField> inputFields = new ArrayList<>(); addStringField( inputFields, "parentString" ); addStringField( inputFields, "parentStringMap[key1]" ); addStringField( inputFields, "parentStringArray[0]" ); avroInputFormat.setInputFields( inputFields ); RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "parentString" ) ); rowMeta.addValueMeta( new ValueMetaString( "parentStringMap[key1]" ) ); rowMeta.addValueMeta( new ValueMetaString( "parentStringArray[0]" ) ); row.setRowMeta( rowMeta ); avroInputFormat.setOutputRowMeta( rowMeta ); IPentahoRecordReader recordReader = avroInputFormat.createRecordReader( null ); assertEquals( expectedRows, generateDataSample( recordReader, inputFields ) ); }
Example 3
Source File: CommonFormatShimTest.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Test public void testAvroDatumReadFromField() throws Exception { List<String> expectedRows = singletonList( "1;string1;string6" ); PentahoAvroInputFormat avroInputFormat = new PentahoAvroInputFormat( mock( NamedCluster.class ) ); avroInputFormat.setDatum( true ); avroInputFormat.setUseFieldAsInputStream( true ); avroInputFormat.setUseFieldAsSchema( true ); avroInputFormat.setInputStreamFieldName( "data" ); avroInputFormat.setSchemaFieldName( "schema" ); avroInputFormat.setIncomingFields( new Object[] { jsonDatumData, jsonSchema } ); avroInputFormat.setIsDataBinaryEncoded( false ); List<AvroInputField> inputFields = new ArrayList<>(); addStringField( inputFields, "parentInt" ); addStringField( inputFields, "parentString" ); addStringField( inputFields, "childData.childString" ); avroInputFormat.setInputFields( inputFields ); RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "parentInt" ) ); rowMeta.addValueMeta( new ValueMetaString( "parentString" ) ); rowMeta.addValueMeta( new ValueMetaString( "childData.childString" ) ); row.setRowMeta( rowMeta ); avroInputFormat.setOutputRowMeta( rowMeta ); RowMeta inRowMeta = new RowMeta(); inRowMeta.addValueMeta( new ValueMetaString( "data" ) ); inRowMeta.addValueMeta( new ValueMetaString( "schema" ) ); avroInputFormat.setIncomingRowMeta( inRowMeta ); IPentahoRecordReader recordReader = avroInputFormat.createRecordReader( null ); assertEquals( expectedRows, generateDataSample( recordReader, inputFields ) ); }
Example 4
Source File: CommonFormatShimTest.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Test public void testAvroJsonDatumReadFromFile() throws Exception { String tempDir = Files.createTempDirectory( "avro" ).toString(); String datumFile = tempDir + File.separator + "datum"; FileUtils.writeStringToFile( new File( datumFile ), jsonDatumData, "utf8" ); List<String> expectedRows = singletonList( "1;string1;string6" ); PentahoAvroInputFormat avroInputFormat = new PentahoAvroInputFormat( mock( NamedCluster.class ) ); avroInputFormat.setDatum( true ); avroInputFormat.setUseFieldAsInputStream( false ); avroInputFormat.setUseFieldAsSchema( true ); avroInputFormat.setInputFile( datumFile ); avroInputFormat.setSchemaFieldName( "schema" ); avroInputFormat.setIncomingFields( new Object[] { jsonSchema } ); avroInputFormat.setIsDataBinaryEncoded( false ); List<AvroInputField> inputFields = new ArrayList<>(); addStringField( inputFields, "parentInt" ); addStringField( inputFields, "parentString" ); addStringField( inputFields, "childData.childString" ); avroInputFormat.setInputFields( inputFields ); RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "parentInt" ) ); rowMeta.addValueMeta( new ValueMetaString( "parentString" ) ); rowMeta.addValueMeta( new ValueMetaString( "childData.childString" ) ); row.setRowMeta( rowMeta ); avroInputFormat.setOutputRowMeta( rowMeta ); RowMeta inRowMeta = new RowMeta(); inRowMeta.addValueMeta( new ValueMetaString( "schema" ) ); avroInputFormat.setIncomingRowMeta( inRowMeta ); IPentahoRecordReader recordReader = avroInputFormat.createRecordReader( null ); assertEquals( expectedRows, generateDataSample( recordReader, inputFields ) ); }
Example 5
Source File: CommonFormatShimTest.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Test public void testAvroBinaryDatumReadFromFile() throws Exception { List<String> expectedRows = singletonList( "1;aString" ); PentahoAvroInputFormat avroInputFormat = new PentahoAvroInputFormat( mock( NamedCluster.class ) ); avroInputFormat.setDatum( true ); avroInputFormat.setUseFieldAsInputStream( false ); avroInputFormat.setUseFieldAsSchema( false ); avroInputFormat.setInputFile( getFilePath( "/avro/flatschema.datum" ) ); avroInputFormat.setInputSchemaFile( getFilePath( "/avro/flatschema.avsc" ) ); avroInputFormat.setIsDataBinaryEncoded( true ); avroInputFormat.setIncomingFields( new Object[] {} ); List<AvroInputField> inputFields = new ArrayList<>(); addStringField( inputFields, "parentInt" ); addStringField( inputFields, "parentString" ); avroInputFormat.setInputFields( inputFields ); RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "parentInt" ) ); rowMeta.addValueMeta( new ValueMetaString( "parentString" ) ); row.setRowMeta( rowMeta ); avroInputFormat.setOutputRowMeta( rowMeta ); IPentahoRecordReader recordReader = avroInputFormat.createRecordReader( null ); assertEquals( expectedRows, generateDataSample( recordReader, inputFields ) ); }
Example 6
Source File: CommonFormatShimTest.java From pentaho-hadoop-shims with Apache License 2.0 | 5 votes |
@Test public void testAvroBinaryDatumReadFromField() throws Exception { List<String> expectedRows = singletonList( "1;aString" ); byte[] datumBytes = Files.readAllBytes( new File( getFilePath( "/avro/flatschema.datum" ) ).toPath() ); PentahoAvroInputFormat avroInputFormat = new PentahoAvroInputFormat( mock( NamedCluster.class ) ); avroInputFormat.setDatum( true ); avroInputFormat.setUseFieldAsInputStream( true ); avroInputFormat.setInputStreamFieldName( "binaryData" ); avroInputFormat.setUseFieldAsSchema( false ); avroInputFormat.setInputSchemaFile( getFilePath( "/avro/flatschema.avsc" ) ); avroInputFormat.setIsDataBinaryEncoded( true ); avroInputFormat.setIncomingFields( new Object[] { datumBytes } ); List<AvroInputField> inputFields = new ArrayList<>(); addStringField( inputFields, "parentInt" ); addStringField( inputFields, "parentString" ); avroInputFormat.setInputFields( inputFields ); RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "parentInt" ) ); rowMeta.addValueMeta( new ValueMetaString( "parentString" ) ); row.setRowMeta( rowMeta ); avroInputFormat.setOutputRowMeta( rowMeta ); RowMeta inRowMeta = new RowMeta(); inRowMeta.addValueMeta( new ValueMetaBinary( "binaryData" ) ); avroInputFormat.setIncomingRowMeta( inRowMeta ); IPentahoRecordReader recordReader = avroInputFormat.createRecordReader( null ); assertEquals( expectedRows, generateDataSample( recordReader, inputFields ) ); }
Example 7
Source File: MetaInjectTest.java From pentaho-kettle with Apache License 2.0 | 5 votes |
private static RowMetaAndData createRowMetaAndData( ValueMetaInterface valueMeta, Object data ) { RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( valueMeta ); row.setRowMeta( rowMeta ); row.setData( new Object[] { data } ); return row; }
Example 8
Source File: CommonFormatShimTest.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
@Test public void testParquetWriteSuccessLocalFileSystem() throws Exception { final String PARQUET_FILE_NAME = "test.parquet"; String tempFile = Files.createTempDirectory( "parquet" ).toUri().toString(); ConfigurationProxy jobConfiguration = new ConfigurationProxy(); jobConfiguration.set( FileOutputFormat.OUTDIR, tempFile ); String parquetFilePath = jobConfiguration.get( FileOutputFormat.OUTDIR ) + PARQUET_FILE_NAME; IPentahoParquetOutputFormat pentahoParquetOutputFormat = null; switch ( provider ) { case "APACHE": pentahoParquetOutputFormat = new PentahoApacheOutputFormat(); break; case "TWITTER": pentahoParquetOutputFormat = new PentahoTwitterOutputFormat(); break; default: org.junit.Assert.fail( "Invalid provider name used." ); } pentahoParquetOutputFormat.setOutputFile( parquetFilePath, true ); pentahoParquetOutputFormat.setFields( ParquetUtils.createOutputFields( ParquetSpec.DataType.INT_64 ) ); IPentahoRecordWriter recordWriter = pentahoParquetOutputFormat.createRecordWriter(); RowMetaAndData rowInput = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "Name" ) ); rowMeta.addValueMeta( new ValueMetaString( "Age" ) ); rowInput.setRowMeta( rowMeta ); rowInput.setData( new Object[] { "Andrey", "11" } ); recordWriter.write( rowInput ); recordWriter.close(); IPentahoRecordReader recordReader = readCreatedParquetFile( parquetFilePath ); Object[] rowInputArr = new Object[] { rowInput.getData()[ 0 ].toString(), Long.parseLong( rowInput.getData()[ 1 ].toString() ) }; recordReader.forEach( rowMetaAndData -> org.junit.Assert.assertArrayEquals( rowMetaAndData.getData(), rowInputArr ) ); }
Example 9
Source File: PentahoParquetRecordWriterTest.java From pentaho-hadoop-shims with Apache License 2.0 | 4 votes |
@Test public void recordWriterCreateFile() throws Exception { IPentahoOutputFormat.IPentahoRecordWriter writer = null; Object recordWriterObject = null; switch ( provider ) { case "APACHE": org.apache.parquet.hadoop.api.WriteSupport apacheSupport = new org.pentaho.hadoop.shim.common.format.parquet.delegate.apache.PentahoParquetWriteSupport( ParquetUtils.createOutputFields( ParquetSpec.DataType.INT_64 ) ); org.apache.parquet.hadoop.ParquetOutputFormat apacheNativeParquetOutputFormat = new org.apache.parquet.hadoop.ParquetOutputFormat<>( apacheSupport ); org.apache.parquet.hadoop.ParquetRecordWriter<RowMetaAndData> apacheRecordWriter = (org.apache.parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) apacheNativeParquetOutputFormat .getRecordWriter( task ); recordWriterObject = apacheRecordWriter; writer = new org.pentaho.hadoop.shim.common.format.parquet.delegate.apache.PentahoParquetRecordWriter( apacheRecordWriter, task ); break; case "TWITTER": parquet.hadoop.api.WriteSupport twitterSupport = new org.pentaho.hadoop.shim.common.format.parquet.delegate.twitter.PentahoParquetWriteSupport( ParquetUtils.createOutputFields( ParquetSpec.DataType.INT_64 ) ); parquet.hadoop.ParquetOutputFormat twitterNativeParquetOutputFormat = new parquet.hadoop.ParquetOutputFormat<>( twitterSupport ); parquet.hadoop.ParquetRecordWriter<RowMetaAndData> twitterRecordWriter = (parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) twitterNativeParquetOutputFormat.getRecordWriter( task ); recordWriterObject = twitterRecordWriter; writer = new org.pentaho.hadoop.shim.common.format.parquet.delegate.twitter.PentahoParquetRecordWriter( twitterRecordWriter, task ); break; default: org.junit.Assert.fail( "Invalid provider name used." ); } RowMetaAndData row = new RowMetaAndData(); RowMeta rowMeta = new RowMeta(); rowMeta.addValueMeta( new ValueMetaString( "Name" ) ); rowMeta.addValueMeta( new ValueMetaString( "Age" ) ); row.setRowMeta( rowMeta ); switch ( testType ) { case "DATA": row.setData( new Object[] { "Alex", "87" } ); break; case "NULL": row.setData( new Object[] { null, null } ); break; default: org.junit.Assert.fail( "Invalid test type used." ); } writer.write( row ); switch ( provider ) { case "APACHE": ( (org.apache.parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) recordWriterObject ).close( task ); break; case "TWITTER": ( (parquet.hadoop.ParquetRecordWriter<RowMetaAndData>) recordWriterObject ).close( task ); break; default: org.junit.Assert.fail( "Invalid provider name used." ); } Files.walk( Paths.get( tempFile.toString() ) ) .filter( Files::isRegularFile ) .forEach( ( f ) -> { String file = f.toString(); if ( file.endsWith( "parquet" ) ) { try { switch ( testType ) { case "DATA": IPentahoInputFormat.IPentahoRecordReader recordReader = readCreatedParquetFile( Paths.get( file ).toUri().toString() ); recordReader.forEach( rowMetaAndData -> Assert.assertTrue( rowMetaAndData.size() == 2 ) ); break; case "NULL": Assert.assertTrue( Files.size( Paths.get( file ) ) > 0 ); break; default: org.junit.Assert.fail( "Invalid test type used." ); } } catch ( Exception e ) { e.printStackTrace(); } } } ); }