org.apache.hadoop.io.ArrayWritable Java Examples

The following examples show how to use org.apache.hadoop.io.ArrayWritable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractParquetMapInspector.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public int getMapSize(final Object data) {
  if (data == null) {
    return -1;
  }

  if (data instanceof ArrayWritable) {
    final Writable[] mapContainer = ((ArrayWritable) data).get();

    if (mapContainer == null || mapContainer.length == 0) {
      return -1;
    } else {
      return ((ArrayWritable) mapContainer[0]).get().length;
    }
  }

  if (data instanceof Map) {
    return ((Map) data).size();
  }

  throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
 
Example #2
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint()
		throws IOException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2003encrypt.xls";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	// set locale to the one of the test data
	conf.set("hadoopoffice.read.locale.bcp47", "de");

	// low footprint
	conf.set("hadoopoffice.read.lowFootprint", "true");
	// for decryption simply set the password
	conf.set("hadoopoffice.read.security.crypt.password", "test2");
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	List<InputSplit> splits = format.getSplits(job);
	assertEquals(1, splits.size(), "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);

	InterruptedException ex = assertThrows(InterruptedException.class,
			() -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password");
}
 
Example #3
Source File: FSEditLogOp.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public void writeFields(DataOutputStream out) throws IOException {
  FSImageSerialization.writeLong(inodeId, out);
  FSImageSerialization.writeString(path, out);
  FSImageSerialization.writeShort(replication, out);
  FSImageSerialization.writeLong(mtime, out);
  FSImageSerialization.writeLong(atime, out);
  FSImageSerialization.writeLong(blockSize, out);
  new ArrayWritable(Block.class, blocks).write(out);
  permissions.write(out);

  if (this.opCode == OP_ADD) {
    AclEditLogUtil.write(aclEntries, out);
    XAttrEditLogProto.Builder b = XAttrEditLogProto.newBuilder();
    b.addAllXAttrs(PBHelper.convertXAttrProto(xAttrs));
    b.build().writeDelimitedTo(out);
    FSImageSerialization.writeString(clientName,out);
    FSImageSerialization.writeString(clientMachine,out);
    FSImageSerialization.writeBoolean(overwrite, out);
    FSImageSerialization.writeByte(storagePolicyId, out);
    // write clientId and callId
    writeRpcIds(rpcClientId, rpcCallId, out);
  }
}
 
Example #4
Source File: RealtimeUnmergedRecordReader.java    From hudi with Apache License 2.0 6 votes vote down vote up
/**
 * Construct a Unmerged record reader that parallely consumes both parquet and log records and buffers for upstream
 * clients to consume.
 *
 * @param split File split
 * @param job Job Configuration
 * @param realReader Parquet Reader
 */
public RealtimeUnmergedRecordReader(HoodieRealtimeFileSplit split, JobConf job,
    RecordReader<NullWritable, ArrayWritable> realReader) {
  super(split, job);
  this.parquetReader = new SafeParquetRecordReaderWrapper(realReader);
  // Iterator for consuming records from parquet file
  this.parquetRecordsIterator = new RecordReaderValueIterator<>(this.parquetReader);
  this.executor = new BoundedInMemoryExecutor<>(getMaxCompactionMemoryInBytes(), getParallelProducers(),
      Option.empty(), x -> x, new DefaultSizeEstimator<>());
  // Consumer of this record reader
  this.iterator = this.executor.getQueue().iterator();
  this.logRecordScanner = new HoodieUnMergedLogRecordScanner(FSUtils.getFs(split.getPath().toString(), jobConf),
      split.getBasePath(), split.getDeltaLogPaths(), getReaderSchema(), split.getMaxCommitTime(),
      Boolean.parseBoolean(jobConf.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)),
      false, jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE), record -> {
        // convert Hoodie log record to Hadoop AvroWritable and buffer
        GenericRecord rec = (GenericRecord) record.getData().getInsertValue(getReaderSchema()).get();
        ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(rec, getHiveSchema());
        this.executor.getQueue().insertRecord(aWritable);
  });
  // Start reading and buffering
  this.executor.startProducers();
}
 
Example #5
Source File: DiscoveryLogic.java    From datawave with Apache License 2.0 6 votes vote down vote up
/**
 * Takes in a batch scanner and returns an iterator over the DiscoveredThing objects contained in the value.
 *
 * @param scanner
 * @return
 */
public static Iterator<DiscoveredThing> transformScanner(final BatchScanner scanner) {
    return concat(transform(scanner.iterator(), new Function<Entry<Key,Value>,Iterator<DiscoveredThing>>() {
        DataInputBuffer in = new DataInputBuffer();
        
        @Override
        public Iterator<DiscoveredThing> apply(Entry<Key,Value> from) {
            Value value = from.getValue();
            in.reset(value.get(), value.getSize());
            ArrayWritable aw = new ArrayWritable(DiscoveredThing.class);
            try {
                aw.readFields(in);
            } catch (IOException e) {
                log.error(e);
                return null;
            }
            ArrayList<DiscoveredThing> thangs = Lists.newArrayListWithCapacity(aw.get().length);
            for (Writable w : aw.get()) {
                thangs.add((DiscoveredThing) w);
            }
            return thangs.iterator();
        }
    }));
}
 
Example #6
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint()
		throws IOException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2013encrypt.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	// set locale to the one of the test data
	conf.set("hadoopoffice.read.locale.bcp47", "de");

	// low footprint
	conf.set("hadoopoffice.read.lowFootprint", "true");
	conf.set("hadoopoffice.read.lowFootprint.parser", "stax");
	// for decryption simply set the password
	conf.set("hadoopoffice.read.security.crypt.password", "test2");
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	List<InputSplit> splits = format.getSplits(job);
	assertEquals(1, splits.size(), "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
	InterruptedException ex = assertThrows(InterruptedException.class,
			() -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password");
}
 
Example #7
Source File: StandardParquetHiveMapInspector.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public Object getMapValueElement(final Object data, final Object key) {
  if (data == null || key == null) {
    return null;
  }
  if (data instanceof ArrayWritable) {
    final Writable[] mapContainer = ((ArrayWritable) data).get();

    if (mapContainer == null || mapContainer.length == 0) {
      return null;
    }
    final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();
    for (final Writable obj : mapArray) {
      final ArrayWritable mapObj = (ArrayWritable) obj;
      final Writable[] arr = mapObj.get();
      if (key.equals(arr[0])) {
        return arr[1];
      }
    }
    return null;
  }
  if (data instanceof Map) {
    return ((Map) data).get(key);
  }
  throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
 
Example #8
Source File: ArrayWritableObjectInspector.java    From indexr with Apache License 2.0 6 votes vote down vote up
@Override
public Object getStructFieldData(final Object data, final StructField fieldRef) {
    if (data == null) {
        return null;
    }

    if (data instanceof ArrayWritable) {
        final ArrayWritable arr = (ArrayWritable) data;
        return arr.get()[((StructFieldImpl) fieldRef).getIndex()];
    }

    //since setStructFieldData and create return a list, getStructFieldData should be able to
    //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde
    //is something else.
    if (data instanceof List) {
        return ((List) data).get(((StructFieldImpl) fieldRef).getIndex());
    }

    throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
 
Example #9
Source File: QueryUtils.java    From incubator-retired-pirk with Apache License 2.0 6 votes vote down vote up
/**
 * Pulls the correct selector from the MapWritable data element given the queryType
 * <p>
 * Pulls first element of array if element is an array type
 */
public static String getSelectorByQueryType(MapWritable dataMap, QuerySchema qSchema, DataSchema dSchema)
{
  String selector;

  String fieldName = qSchema.getSelectorName();
  if (dSchema.isArrayElement(fieldName))
  {
    if (dataMap.get(dSchema.getTextName(fieldName)) instanceof WritableArrayWritable)
    {
      String[] selectorArray = ((WritableArrayWritable) dataMap.get(dSchema.getTextName(fieldName))).toStrings();
      selector = selectorArray[0];
    }
    else
    {
      String[] elementArray = ((ArrayWritable) dataMap.get(dSchema.getTextName(fieldName))).toStrings();
      selector = elementArray[0];
    }
  }
  else
  {
    selector = dataMap.get(dSchema.getTextName(fieldName)).toString();
  }

  return selector;
}
 
Example #10
Source File: FSEditLogOp.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public void writeFields(DataOutputStream out) throws IOException {
  FSImageSerialization.writeLong(inodeId, out);
  FSImageSerialization.writeString(path, out);
  FSImageSerialization.writeShort(replication, out);
  FSImageSerialization.writeLong(mtime, out);
  FSImageSerialization.writeLong(atime, out);
  FSImageSerialization.writeLong(blockSize, out);
  new ArrayWritable(Block.class, blocks).write(out);
  permissions.write(out);

  if (this.opCode == OP_ADD) {
    AclEditLogUtil.write(aclEntries, out);
    XAttrEditLogProto.Builder b = XAttrEditLogProto.newBuilder();
    b.addAllXAttrs(PBHelper.convertXAttrProto(xAttrs));
    b.build().writeDelimitedTo(out);
    FSImageSerialization.writeString(clientName,out);
    FSImageSerialization.writeString(clientMachine,out);
    FSImageSerialization.writeBoolean(overwrite, out);
    FSImageSerialization.writeByte(storagePolicyId, out);
    // write clientId and callId
    writeRpcIds(rpcClientId, rpcCallId, out);
  }
}
 
Example #11
Source File: HoodieParquetInputFormat.java    From hudi with Apache License 2.0 6 votes vote down vote up
@Override
public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf job,
    final Reporter reporter) throws IOException {
  // TODO enable automatic predicate pushdown after fixing issues
  // FileSplit fileSplit = (FileSplit) split;
  // HoodieTableMetadata metadata = getTableMetadata(fileSplit.getPath().getParent());
  // String tableName = metadata.getTableName();
  // String mode = HoodieHiveUtil.readMode(job, tableName);

  // if (HoodieHiveUtil.INCREMENTAL_SCAN_MODE.equals(mode)) {
  // FilterPredicate predicate = constructHoodiePredicate(job, tableName, split);
  // LOG.info("Setting parquet predicate push down as " + predicate);
  // ParquetInputFormat.setFilterPredicate(job, predicate);
  // clearOutExistingPredicate(job);
  // }
  return super.getRecordReader(split, job, reporter);
}
 
Example #12
Source File: FSEditLogOp.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override 
void writeFields(DataOutputStream out) throws IOException {
  out.writeInt(5);
  FSImageSerialization.writeString(path, out);
  FSImageSerialization.writeShortAsString(replication, out);
  FSImageSerialization.writeLongAsString(mtime, out);
  FSImageSerialization.writeLongAsString(atime, out);
  FSImageSerialization.writeLongAsString(blockSize, out);
  new ArrayWritable(Block.class, blocks).write(out);
  permissions.write(out);

  if (this.opCode == OP_ADD) {
    FSImageSerialization.writeString(clientName,out);
    FSImageSerialization.writeString(clientMachine,out);
  }
}
 
Example #13
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException {
	JobConf job = new JobConf(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2013encrypt.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.read.locale.bcp47", "de");
	// low footprint
	job.set("hadoopoffice.read.lowFootprint", "true");

	job.set("hadoopoffice.read.lowFootprint.parser", "stax");
	// for decryption simply set the password

	job.set("hadoopoffice.read.security.crypt.password", "test2");
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	format.configure(job);
	InputSplit[] inputSplits = format.getSplits(job, 1);
	assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNull(reader, "Null record reader implies invalid password");
}
 
Example #14
Source File: AbstractSpreadSheetDocumentRecordWriter.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
/**
*
* Write SpreadSheetDAO into a table document. Note this does not necessarily mean it is already written in the OutputStream, but usually the in-memory representation.
* @param key is ignored
* @param value is a SpreadSheet Cell to be inserted into the table document
*
*/
@Override
public synchronized void write(NullWritable key, K value) throws IOException {
		try {
			if (value==null) {
				return;
			}
			if (value instanceof ArrayWritable) {
				ArrayWritable row = (ArrayWritable)value;
				Writable[] rowCellDAO = row.get();
				for (int i=0;i<rowCellDAO.length;i++) {
					this.officeWriter.write(rowCellDAO[i]);
				}
			} else {
				this.officeWriter.write(value);
			}
		} catch (OfficeWriterException e) {
			LOG.error(e);
		}
}
 
Example #15
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2003SingleSheetEncryptedNegativeLowFootprint() throws IOException {
	JobConf job = new JobConf(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2003encrypt.xls";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.read.locale.bcp47", "de");

	// low footprint
	job.set("hadoopoffice.read.lowFootprint", "true");
	// for decryption simply set the password
	job.set("hadoopoffice.read.security.crypt.password", "test2");
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	format.configure(job);
	InputSplit[] inputSplits = format.getSplits(job, 1);
	assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNull(reader, "Null record reader implies invalid password");
}
 
Example #16
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException {
	JobConf job = new JobConf(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2013encrypt.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.read.locale.bcp47", "de");
	// low footprint
	job.set("hadoopoffice.read.lowFootprint", "true");

	job.set("hadoopoffice.read.lowFootprint.parser", "sax");
	// for decryption simply set the password

	job.set("hadoopoffice.read.security.crypt.password", "test2");
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	format.configure(job);
	InputSplit[] inputSplits = format.getSplits(job, 1);
	assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNull(reader, "Null record reader implies invalid password");
}
 
Example #17
Source File: TestHoodieParquetInputFormat.java    From hudi with Apache License 2.0 6 votes vote down vote up
private void ensureRecordsInCommit(String msg, String commit, int expectedNumberOfRecordsInCommit,
    int totalExpected) throws IOException {
  int actualCount = 0;
  int totalCount = 0;
  InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
  for (InputSplit split : splits) {
    RecordReader<NullWritable, ArrayWritable> recordReader = inputFormat.getRecordReader(split, jobConf, null);
    NullWritable key = recordReader.createKey();
    ArrayWritable writable = recordReader.createValue();

    while (recordReader.next(key, writable)) {
      // writable returns an array with [field1, field2, _hoodie_commit_time,
      // _hoodie_commit_seqno]
      // Take the commit time and compare with the one we are interested in
      if (commit.equals((writable.get()[2]).toString())) {
        actualCount++;
      }
      totalCount++;
    }
  }
  assertEquals(expectedNumberOfRecordsInCommit, actualCount, msg);
  assertEquals(totalExpected, totalCount, msg);
}
 
Example #18
Source File: LinkedMapWritable.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public String toString() {
    Iterator<Entry<Writable, Writable>> i = entrySet().iterator();
    if (!i.hasNext())
        return "{}";

    StringBuilder sb = new StringBuilder();
    sb.append('{');
    for (;;) {
        Entry<Writable, Writable> e = i.next();
        Writable key = e.getKey();
        Writable value = e.getValue();
        sb.append(key == this ? "(this Map)" : key);
        sb.append('=');
        if (value instanceof ArrayWritable) {
            sb.append(Arrays.toString(((ArrayWritable) value).get()));
        }
        else {
            sb.append(value == this ? "(this Map)" : value);
        }
        if (!i.hasNext())
            return sb.append('}').toString();
        sb.append(", ");
    }
}
 
Example #19
Source File: ArrayWritableObjectInspector.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public Object getStructFieldData(final Object data, final StructField fieldRef) {
  if (data == null) {
    return null;
  }

  if (data instanceof ArrayWritable) {
    final ArrayWritable arr = (ArrayWritable) data;
    return arr.get()[((StructFieldImpl) fieldRef).getIndex()];
  }

  //since setStructFieldData and create return a list, getStructFieldData should be able to 
  //handle list data. This is required when table serde is ParquetHiveSerDe and partition serde
  //is something else.
  if (data instanceof List) {
    return ((List) data).get(((StructFieldImpl) fieldRef).getIndex());
  }

  throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
 
Example #20
Source File: DataWritableGroupConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public final ArrayWritable getCurrentArray() {
  final Writable[] writableArr;
  if (this.rootMap != null) { // We're at the root : we can safely re-use the same map to save perf
    writableArr = this.rootMap;
  } else {
    writableArr = new Writable[currentArr.length];
  }

  for (int i = 0; i < currentArr.length; i++) {
    final Object obj = currentArr[i];
    if (obj instanceof List) {
      final List<?> objList = (List<?>)obj;
      final ArrayWritable arr = new ArrayWritable(Writable.class,
          objList.toArray(new Writable[objList.size()]));
      writableArr[i] = arr;
    } else {
      writableArr[i] = (Writable) obj;
    }
  }
  return new ArrayWritable(Writable.class, writableArr);
}
 
Example #21
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "multisheetheader.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	// set locale to the one of the test data
	conf.set("hadoopoffice.read.locale.bcp47", "us");
	conf.set("hadoopoffice.read.header.read", "true");
	conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true");
	conf.set("hadoopoffice.read.header.column.names.regex","column");
	conf.set("hadoopoffice.read.header.column.names.replace", "spalte");
	conf.set("hadoopoffice.read.lowFootprint", "true");
	conf.set("hadoopoffice.read.lowFootprint.parser", "sax");
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	List<InputSplit> splits = format.getSplits(job);
	assertEquals(1, splits.size(), "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull(reader, "Format returned  null RecordReader");
	reader.initialize(splits.get(0), context);

	assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0],
			" header column 1 correctly read");
	assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1],
			" header column 2 correctly read");
	assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2],
			" header column 3 correctly read");
}
 
Example #22
Source File: ParquetHiveArrayInspector.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<?> getList(final Object data) {
  if (data == null) {
    return null;
  }

  if (data instanceof ArrayWritable) {
    final Writable[] listContainer = ((ArrayWritable) data).get();

    if (listContainer == null || listContainer.length == 0) {
      return null;
    }

    final Writable subObj = listContainer[0];

    if (subObj == null) {
      return null;
    }

    final Writable[] array = ((ArrayWritable) subObj).get();
    final List<Writable> list = new ArrayList<Writable>();

    for (final Writable obj : array) {
      list.add(obj);
    }

    return list;
  }

  throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
 
Example #23
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedPositiveLowFootprint() throws IOException {
	JobConf job = new JobConf(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "excel2013encrypt.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.read.locale.bcp47", "de");
	// low footprint
	job.set("hadoopoffice.read.lowFootprint", "true");

	job.set("hadoopoffice.read.lowFootprint.parser", "sax");
	// for decryption simply set the password
	job.set("hadoopoffice.read.security.crypt.password", "test");
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	format.configure(job);
	InputSplit[] inputSplits = format.getSplits(job, 1);
	assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNotNull(reader, "Format returned  null RecordReader");
	Text spreadSheetKey = new Text();
	ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class);
	assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 1");
	assertEquals("[excel2013encrypt.xlsx]Sheet1!A1", spreadSheetKey.toString(),
			"Input Split for Excel file has keyname == \"[excel2013encrypt.xlsx]Sheet1!A1\"");
	assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 3 columns");
	assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(),
			"Input Split for Excel file contains row 1 with cell 1 == \"test1\"");
	assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(),
			"Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\"");
	assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(),
			"Input Split for Excel file contains row 1 with cell 1 address == \"A1\"");
	assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(),
			"Input Split for Excel file contains row 1 with cell 2 == \"test2\"");
	assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(),
			"Input Split for Excel file contains row 1 with cell 3 == \"test3\"");
}
 
Example #24
Source File: TestStandardParquetHiveMapInspector.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegularMap() {
  final Writable[] entry1 = new Writable[]{new IntWritable(0), new IntWritable(1)};
  final Writable[] entry2 = new Writable[]{new IntWritable(2), new IntWritable(3)};

  final ArrayWritable internalMap = new ArrayWritable(ArrayWritable.class, new Writable[]{
    new ArrayWritable(Writable.class, entry1), new ArrayWritable(Writable.class, entry2)});

  final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new Writable[]{internalMap});

  assertEquals("Wrong result of inspection", new IntWritable(1), inspector.getMapValueElement(map, new IntWritable(0)));
  assertEquals("Wrong result of inspection", new IntWritable(3), inspector.getMapValueElement(map, new IntWritable(2)));
  assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 0)));
  assertNull("Wrong result of inspection", inspector.getMapValueElement(map, new ShortWritable((short) 2)));
}
 
Example #25
Source File: ArrayWritableObjectInspector.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<Object> getStructFieldsDataAsList(final Object data) {
  if (data == null) {
    return null;
  }

  if (data instanceof ArrayWritable) {
    final ArrayWritable arr = (ArrayWritable) data;
    final Object[] arrWritable = arr.get();
    return new ArrayList<Object>(Arrays.asList(arrWritable));
  }

  throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
 
Example #26
Source File: ParquetRecordReaderWrapper.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public ParquetRecordReaderWrapper(
    final ParquetInputFormat<ArrayWritable> newInputFormat,
    final InputSplit oldSplit,
    final JobConf oldJobConf,
    final Reporter reporter)
        throws IOException, InterruptedException {
  this(newInputFormat, oldSplit, oldJobConf, reporter,
      (new HiveBindingFactory()).create());
}
 
Example #27
Source File: TestParquetHiveArrayInspector.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNullContainer() {
  final ArrayWritable list = new ArrayWritable(ArrayWritable.class, null);
  assertEquals("Wrong size", -1, inspector.getListLength(list));
  assertNull("Should be null", inspector.getList(list));
  assertNull("Should be null", inspector.getListElement(list, 0));
}
 
Example #28
Source File: AbstractParquetMapInspector.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public Map<?, ?> getMap(final Object data) {
  if (data == null) {
    return null;
  }

  if (data instanceof ArrayWritable) {
    final Writable[] mapContainer = ((ArrayWritable) data).get();

    if (mapContainer == null || mapContainer.length == 0) {
      return null;
    }

    final Writable[] mapArray = ((ArrayWritable) mapContainer[0]).get();
    final Map<Writable, Writable> map = new HashMap<Writable, Writable>();

    for (final Writable obj : mapArray) {
      final ArrayWritable mapObj = (ArrayWritable) obj;
      final Writable[] arr = mapObj.get();
      map.put(arr[0], arr[1]);
    }

    return map;
  }

  if (data instanceof Map) {
    return (Map) data;
  }

  throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName());
}
 
Example #29
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException, InterruptedException {
	Configuration conf = new Configuration(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "multisheetheader.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	// set locale to the one of the test data
	conf.set("hadoopoffice.read.locale.bcp47", "us");
	conf.set("hadoopoffice.read.header.read", "true");
	conf.set("hadoopoffice.read.header.skipheaderinallsheets", "true");
	conf.set("hadoopoffice.read.header.column.names.regex","column");
	conf.set("hadoopoffice.read.header.column.names.replace", "spalte");
	conf.set("hadoopoffice.read.lowFootprint", "true");
	conf.set("hadoopoffice.read.lowFootprint.parser", "stax");
	Job job = Job.getInstance(conf);
	FileInputFormat.setInputPaths(job, file);
	TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	List<InputSplit> splits = format.getSplits(job);
	assertEquals(1, splits.size(), "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
	assertNotNull(reader, "Format returned  null RecordReader");
	reader.initialize(splits.get(0), context);

	assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0],
			" header column 1 correctly read");
	assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1],
			" header column 2 correctly read");
	assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2],
			" header column 3 correctly read");
}
 
Example #30
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java    From hadoopoffice with Apache License 2.0 5 votes vote down vote up
@Test
public void readExcelInputFormatExcel2013MultiSheetHeaderRegExLowFootprint() throws IOException {
	JobConf job = new JobConf(defaultConf);
	ClassLoader classLoader = getClass().getClassLoader();
	String fileName = "multisheetheader.xlsx";
	String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
	Path file = new Path(fileNameSpreadSheet);
	FileInputFormat.setInputPaths(job, file);
	// set locale to the one of the test data
	job.set("hadoopoffice.read.locale.bcp47", "us");
	job.set("hadoopoffice.read.header.read", "true");
	job.set("hadoopoffice.read.header.skipheaderinallsheets", "true");
	job.set("hadoopoffice.read.header.column.names.regex", "column");
	job.set("hadoopoffice.read.header.column.names.replace", "spalte");
	job.set("hadoopoffice.read.lowFootprint", "true");

	job.set("hadoopoffice.read.lowFootprint.parser", "stax");
	ExcelFileInputFormat format = new ExcelFileInputFormat();
	format.configure(job);
	InputSplit[] inputSplits = format.getSplits(job, 1);
	assertEquals(1, inputSplits.length, "Only one split generated for Excel file");
	RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
	assertNotNull(reader, "Format returned  null RecordReader");

	assertEquals("spalte1", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[0],
			" header column 1 correctly read");
	assertEquals("spalte2", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[1],
			" header column 2 correctly read");
	assertEquals("spalte3", ((ExcelRecordReader) reader).getOfficeReader().getCurrentParser().getHeader()[2],
			" header column 3 correctly read");
}