 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package com.micmiu.hive.fotmater;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Properties;

import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.util.Progressable;

 * FileOutputFormat for base64 encoded text files.
 * Each line is a base64-encoded record. The key is a LongWritable which is the
 * offset. The value is a BytesWritable containing the base64-decoded bytes.
 * This class accepts a configurable parameter:
 * "base64.text.output.format.signature"
 * The UTF-8 encoded signature will be prepended to each BytesWritable before we
 * do base64 encoding.
public class Base64TextOutputFormat<K extends WritableComparable, V extends Writable>
    extends HiveIgnoreKeyTextOutputFormat<K, V> {

   * Base64RecordWriter.
  public static class Base64RecordWriter implements RecordWriter,
      JobConfigurable {

    RecordWriter writer;
    BytesWritable bytesWritable;

    public Base64RecordWriter(RecordWriter writer) {
      this.writer = writer;
      bytesWritable = new BytesWritable();

    public void write(Writable w) throws IOException {

      // Get input data
      byte[] input;
      int inputLength;
      if (w instanceof Text) {
        input = ((Text) w).getBytes();
        inputLength = ((Text) w).getLength();
      } else {
        assert (w instanceof BytesWritable);
        input = ((BytesWritable) w).getBytes();
        inputLength = ((BytesWritable) w).getLength();

      // Add signature
      byte[] wrapped = new byte[signature.length + inputLength];
      for (int i = 0; i < signature.length; i++) {
        wrapped[i] = signature[i];
      for (int i = 0; i < inputLength; i++) {
        wrapped[i + signature.length] = input[i];

      // Encode
      byte[] output = base64.encode(wrapped);
      bytesWritable.set(output, 0, output.length);


    public void close(boolean abort) throws IOException {

    private byte[] signature;
    private final Base64 base64 = Base64TextInputFormat.createBase64();

    public void configure(JobConf job) {
      try {
        String signatureString = job.get("base64.text.output.format.signature");
        if (signatureString != null) {
          signature = signatureString.getBytes("UTF-8");
        } else {
          signature = new byte[0];
      } catch (UnsupportedEncodingException e) {

  public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath,
      Class<? extends Writable> valueClass, boolean isCompressed,
      Properties tableProperties, Progressable progress) throws IOException {

    Base64RecordWriter writer = new Base64RecordWriter(super
        .getHiveRecordWriter(jc, finalOutPath, BytesWritable.class,
        isCompressed, tableProperties, progress));
    return writer;
