cascading.tap.Tap Java Examples

The following examples show how to use cascading.tap.Tap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TapDataWriter.java    From plunger with Apache License 2.0 6 votes vote down vote up
/** Writes the {@link Tuple Tuples} provided in the {@link Data} instance to the supplied {@link Tap}. */
public Tap<?, ?, ?> toTap(Tap<?, ?, ?> tap) throws IOException {
  Class<?> tapConfigClass = TapTypeUtil.getTapConfigClass(tap);
  if (Configuration.class.equals(tapConfigClass)) {
    if (tap instanceof BasePartitionTap) {
      writeToHadoopPartitionTap(tap);
    } else {
      writeToHadoopTap(tap);
    }
  } else if (Properties.class.equals(tapConfigClass)) {
    writeToLocalTap(tap);
  } else {
    throw new IllegalArgumentException("Unsupported tap type: " + tap.getClass());
  }
  return tap;
}
 
Example #2
Source File: TapDataWriter.java    From plunger with Apache License 2.0 6 votes vote down vote up
private void writeToHadoopPartitionTap(Tap<?, ?, ?> tap) throws IOException {
  @SuppressWarnings("unchecked")
  BasePartitionTap<JobConf, ?, ?> hadoopTap = (BasePartitionTap<JobConf, ?, ?>) tap;
  JobConf conf = new JobConf();

  // Avoids deletion of results when using a partition tap (close() will delete the _temporary before the copy has
  // been done if not in a flow)
  HadoopUtil.setIsInflow(conf);

  HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sinkConfInit(flowProcess, conf);
  TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess);
  for (TupleEntry tuple : data.asTupleEntryList()) {
    collector.add(tuple);
  }
  collector.close();

  // We need to clean up the '_temporary' folder
  BasePartitionTap<JobConf, ?, ?> partitionTap = hadoopTap;
  @SuppressWarnings("unchecked")
  String basePath = partitionTap.getParent().getFullIdentifier(flowProcess);
  deleteTemporaryPath(new Path(basePath), FileSystem.get(conf));
}
 
Example #3
Source File: BottomUpBoundariesNodePartitioner.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
public BottomUpNoSplitConsecutiveBoundariesExpressionGraph()
{
	super( SearchOrder.ReverseTopological );

	this.arc(
			or(
					new FlowElementExpression( Boundary.class, TypeExpression.Topo.LinearOut ),
					new FlowElementExpression( Tap.class, TypeExpression.Topo.LinearOut ),
					new FlowElementExpression( Group.class, TypeExpression.Topo.LinearOut )
			),

			PathScopeExpression.ANY,

			new BoundariesElementExpression( ElementCapture.Primary )
	);
}
 
Example #4
Source File: BoundaryBeforeSinkTapTransformer.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
public SinkTapGraph() {

			super(SearchOrder.ReverseTopological);

			arc(
					not(
							OrElementExpression.or(
									new FlowElementExpression(Extent.class),
									new FlowElementExpression(Boundary.class)
							)
						),
					ScopeExpression.ANY,
					new FlowElementExpression(ElementCapture.Primary, Tap.class)

				);
		}
 
Example #5
Source File: JDBCScheme.java    From SpyGlass with Apache License 2.0 6 votes vote down vote up
@Override
public void sourceConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
    JobConf conf ) {
    int concurrentReads = ( (JDBCTap) tap ).concurrentReads;

    if( selectQuery != null )
        DBInputFormat.setInput( conf, TupleRecord.class, selectQuery, countQuery, limit, concurrentReads );
    else {
        String tableName = ( (JDBCTap) tap ).getTableName();
        String joinedOrderBy = orderBy != null ? Util.join( orderBy, ", " ) : null;
        DBInputFormat.setInput( conf, TupleRecord.class, tableName, conditions, joinedOrderBy, limit, concurrentReads, columns );
    }

    if( inputFormatClass != null )
        conf.setInputFormat( inputFormatClass );
}
 
Example #6
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) {

		Tap tap = this.getSingle(node.getSourceTaps());
		JobConf tapConfig = new JobConf(this.getNodeConfig(node));
		tap.sourceConfInit(flowProcess, tapConfig);
		tapConfig.set( "cascading.step.source", Tap.id( tap ) );

		Fields outFields = tap.getSourceFields();
		registerKryoTypes(outFields);

		JobConf sourceConfig = new JobConf(this.getNodeConfig(node));
		MultiInputFormat.addInputFormat(sourceConfig, tapConfig);

		DataSet<Tuple> src = env
				.createInput(new TapInputFormat(node), new TupleTypeInfo(outFields))
						.name(tap.getIdentifier())
						.setParallelism(dop)
						.withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig)));

		return src;

	}
 
Example #7
Source File: TapDataWriter.java    From plunger with Apache License 2.0 6 votes vote down vote up
private void writeToLocalTap(Tap<?, ?, ?> tap) throws IOException {
  @SuppressWarnings("unchecked")
  Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) tap;
  Properties conf = new Properties();
  LocalFlowProcess flowProcess = new LocalFlowProcess(conf);

  flowProcess.setStepStats(new LocalStepStats(new NullFlowStep(), NullClientState.INSTANCE));

  localTap.sinkConfInit(flowProcess, conf);
  TupleEntryCollector collector = localTap.openForWrite(flowProcess);
  for (TupleEntry tuple : data.asTupleEntryList()) {
    collector.add(tuple);
  }
  collector.close();
  localTap.commitResource(conf);
}
 
Example #8
Source File: TapDataWriterTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void writeMultiSink() throws IOException {
  File tsvFolder1 = temporaryFolder.newFolder("data1");
  File tsvFolder2 = temporaryFolder.newFolder("data2");

  Tap<?, ?, ?> tap1 = new cascading.tap.hadoop.Hfs(new cascading.scheme.hadoop.TextDelimited(fields),
      tsvFolder1.getAbsolutePath());
  Tap<?, ?, ?> tap2 = new cascading.tap.hadoop.Hfs(new cascading.scheme.hadoop.TextDelimited(valueFields),
      tsvFolder2.getAbsolutePath());

  @SuppressWarnings("unchecked")
  cascading.tap.MultiSinkTap<?, ?, ?> multiTap = new cascading.tap.MultiSinkTap<>(tap1, tap2);
  Tap<?, ?, ?> returnedTap = new TapDataWriter(data).toTap(multiTap);

  assertThat(returnedTap == multiTap, is(true));

  String written1 = FileUtils.readFileToString(new File(tsvFolder1, "part-00000"), Charset.forName("UTF-8"));
  assertThat(written1, is("X\t1\thello\nY\t2\tworld\n"));

  String written2 = FileUtils.readFileToString(new File(tsvFolder2, "part-00000"), Charset.forName("UTF-8"));
  assertThat(written2, is("1\thello\n2\tworld\n"));
}
 
Example #9
Source File: TapDataWriterTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void writeHadoopPartition() throws IOException {
  File tsvFolder = temporaryFolder.newFolder("data");
  cascading.tap.hadoop.PartitionTap partitionTap = new cascading.tap.hadoop.PartitionTap(
      new cascading.tap.hadoop.Hfs(new cascading.scheme.hadoop.TextDelimited(valueFields),
          tsvFolder.getAbsolutePath()), new DelimitedPartition(partitionFields));

  Data data = new Data(fields, Arrays.asList(new Tuple("X", 1, "hello"), new Tuple("Y", 2, "world")));
  Tap<?, ?, ?> returnedTap = new TapDataWriter(data).toTap(partitionTap);

  assertThat((cascading.tap.hadoop.PartitionTap) returnedTap, is(partitionTap));

  File tsvFileX = new File(new File(tsvFolder, "X"), "part-00000-00000");
  String writtenX = FileUtils.readFileToString(tsvFileX, Charset.forName("UTF-8"));

  assertThat(writtenX, is("1\thello\n"));

  File tsvFileY = new File(new File(tsvFolder, "Y"), "part-00000-00001");
  String writtenY = FileUtils.readFileToString(tsvFileY, Charset.forName("UTF-8"));

  assertThat(writtenY, is("2\tworld\n"));

  assertThat(new File(tsvFolder, Hadoop18TapUtil.TEMPORARY_PATH).exists(), is(false));
}
 
Example #10
Source File: TapDataWriterTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void writeLocalPartition() throws IOException {
  File tsvFolder = temporaryFolder.newFolder("data");
  cascading.tap.local.PartitionTap partitionTap = new cascading.tap.local.PartitionTap(
      new cascading.tap.local.FileTap(new cascading.scheme.local.TextDelimited(valueFields),
          tsvFolder.getAbsolutePath()), new DelimitedPartition(partitionFields));
  Tap<?, ?, ?> returnedTap = new TapDataWriter(data).toTap(partitionTap);

  assertThat((cascading.tap.local.PartitionTap) returnedTap, is(partitionTap));

  File tsvFileX = new File(tsvFolder, "X");
  String writtenX = FileUtils.readFileToString(tsvFileX, Charset.forName("UTF-8"));

  assertThat(writtenX, is("1\thello\n"));

  File tsvFileY = new File(tsvFolder, "Y");
  String writtenY = FileUtils.readFileToString(tsvFileY, Charset.forName("UTF-8"));

  assertThat(writtenY, is("2\tworld\n"));
}
 
Example #11
Source File: FlinkFlowProcess.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
@Override
public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException {

	if (trap instanceof Hfs) {

		JobConf jobConf = new JobConf(this.getConfigCopy());

		int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 );
		int nodeNum = jobConf.getInt( "cascading.flow.node.num", 0 );

		String partname = String.format( "-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum() );
		jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname );

		String value = String.format( "attempt_%012d_0000_m_%06d_0", (int) Math.rint( System.currentTimeMillis() ), this.getCurrentSliceNum() );
		jobConf.set( "mapred.task.id", value );
		jobConf.set( "mapreduce.task.id", value );

		return trap.openForWrite( new FlinkFlowProcess( jobConf ), null);
	}
	else {
		throw new UnsupportedOperationException("Only Hfs taps are supported as traps");
	}
}
 
Example #12
Source File: TapTypeUtil.java    From plunger with Apache License 2.0 6 votes vote down vote up
/** Determines the type of the configuration type argument of the supplied {@link Tap}. */
static Class<?> getTapConfigClass(Tap<?, ?, ?> tap) {
  Class<?> currentClass = tap.getClass();
  if (CompositeTap.class.isAssignableFrom(currentClass)) {
    currentClass = ((CompositeTap<?>) tap).getChildTaps().next().getClass();
  }
  while (currentClass != null) {
    if (Tap.class.isAssignableFrom(currentClass)) {
      Type genericSuperclass = currentClass.getGenericSuperclass();
      if (genericSuperclass instanceof ParameterizedType) {
        ParameterizedType tapType = (ParameterizedType) genericSuperclass;
        Type[] typeParameters = tapType.getActualTypeArguments();
        Type configTypeParameter = typeParameters[0];
        if (configTypeParameter instanceof Class) {
          Class<?> configClassParameter = (Class<?>) configTypeParameter;
          return configClassParameter;
        }
      }
    }
    currentClass = currentClass.getSuperclass();
  }
  return null;
}
 
Example #13
Source File: CommonCrawlIndexTest.java    From aws-big-data-blog with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateCommonCrawlFlowDef() throws Exception {
    Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);

    String sourcePath = properties.getProperty("inPath");
    String sinkPath = properties.getProperty("testCreateCommonCrawlFlowDefOutput");
    String sinkValidationPath = properties.getProperty("testCreateCommonCrawlFlowDefOutputValidation");

    // create the Cascading "source" (input) tap to read the commonCrawl WAT file(s)
    Tap source = new FileTap(new TextLine(new Fields("line")) ,sourcePath);

    // create the Cascading "sink" (output) tap to dump the results
    Tap sink = new FileTap(new TextLine(new Fields("line")) ,sinkPath);

    //Build the Cascading Flow Definition
    FlowDef flowDef = CommonCrawlIndex.createCommonCrawlFlowDef(source, sink);
    new LocalFlowConnector(properties).connect(flowDef).complete();

    Assert.sameContent(sinkPath, sinkValidationPath);
}
 
Example #14
Source File: JDBCScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
    JobConf conf ) {
    if( selectQuery != null )
        throw new TapException( "cannot sink to this Scheme" );

    String tableName = ( (JDBCTap) tap ).getTableName();
    int batchSize = ( (JDBCTap) tap ).getBatchSize();
    DBOutputFormat.setOutput( conf, DBOutputFormat.class, tableName, columns, updateBy, batchSize );

    if( outputFormatClass != null )
        conf.setOutputFormat( outputFormatClass );
}
 
Example #15
Source File: TapDataWriterTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void writeHfs() throws IOException {
  File tsvFolder = temporaryFolder.newFolder("data");
  cascading.tap.hadoop.Hfs hfsTap = new cascading.tap.hadoop.Hfs(new cascading.scheme.hadoop.TextDelimited(),
      tsvFolder.getAbsolutePath());
  Tap<?, ?, ?> returnedTap = new TapDataWriter(data).toTap(hfsTap);

  assertThat((cascading.tap.hadoop.Hfs) returnedTap, is(hfsTap));
  String written = FileUtils.readFileToString(new File(tsvFolder, "part-00000"), Charset.forName("UTF-8"));

  assertThat(written, is("X\t1\thello\nY\t2\tworld\n"));
  assertThat(new File(tsvFolder, Hadoop18TapUtil.TEMPORARY_PATH).exists(), is(false));
}
 
Example #16
Source File: HBaseScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkConfInit(FlowProcess<JobConf> process,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
  conf.setOutputFormat(HBaseOutputFormat.class);

  conf.setOutputKeyClass(ImmutableBytesWritable.class);
  conf.setOutputValueClass(Put.class);
  
  String tableName = conf.get(HBaseOutputFormat.OUTPUT_TABLE);
  useSalt = conf.getBoolean(String.format(HBaseConstants.USE_SALT, tableName), false);
}
 
Example #17
Source File: HBaseRawScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
		JobConf conf) {

	DeprecatedInputFormatWrapper.setInputFormat(org.apache.hadoop.hbase.mapreduce.TableInputFormat.class, conf,
			ValueCopier.class);
	if (null != familyNames) {
		String columns = Util.join(this.familyNames, " ");
		LOG.debug("sourcing from column families: {}", columns);
		conf.set(org.apache.hadoop.hbase.mapreduce.TableInputFormat.SCAN_COLUMNS, columns);
	}
}
 
Example #18
Source File: TapDataReader.java    From plunger with Apache License 2.0 5 votes vote down vote up
private TupleEntryIterator getHadoopTupleEntryIterator() throws IOException {
  @SuppressWarnings("unchecked")
  Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) source;
  JobConf conf = new JobConf();
  FlowProcess<JobConf> flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sourceConfInit(flowProcess, conf);
  return hadoopTap.openForRead(flowProcess);
}
 
Example #19
Source File: JDBCTapCollector.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor TapCollector creates a new TapCollector instance.
 *
 * @param flowProcess
 * @param tap               of type Tap
 * @throws IOException when fails to initialize
 */
public JDBCTapCollector( FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap ) throws IOException {
    super( flowProcess, tap.getScheme() );
    this.hadoopFlowProcess = flowProcess;

    this.tap = tap;
    this.conf = new JobConf( flowProcess.getConfigCopy() );

    this.setOutput( this );
}
 
Example #20
Source File: TapDataReader.java    From plunger with Apache License 2.0 5 votes vote down vote up
private TupleEntryIterator getLocalTupleEntryIterator() throws IOException {
  @SuppressWarnings("unchecked")
  Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) source;
  Properties properties = new Properties();
  FlowProcess<Properties> flowProcess = new LocalFlowProcess(properties);
  localTap.sourceConfInit(flowProcess, properties);
  return localTap.openForRead(flowProcess);
}
 
Example #21
Source File: TapDataWriter.java    From plunger with Apache License 2.0 5 votes vote down vote up
private void writeToHadoopTap(Tap<?, ?, ?> tap) throws IOException {
  @SuppressWarnings("unchecked")
  Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) tap;
  JobConf conf = new JobConf();

  HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sinkConfInit(flowProcess, conf);
  TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess);
  for (TupleEntry tuple : data.asTupleEntryList()) {
    collector.add(tuple);
  }
  collector.close();
}
 
Example #22
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Override
public void sinkConfInit(FlowProcess<JobConf> fp,
        Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema);
  ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class);
}
 
Example #23
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public Fields retrieveSourceFields(FlowProcess<? extends JobConf> flowProcess, Tap tap) {
   MessageType schema = readSchema(flowProcess, tap);
   SchemaIntersection intersection = new SchemaIntersection(schema, getSourceFields());

   setSourceFields(intersection.getSourceFields());

   return getSourceFields();
 }
 
Example #24
Source File: WordCount.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {

		if (args.length < 2) {
			throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
		}

		Fields token = new Fields( "token", String.class );
		Fields text = new Fields( "text" );
		RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
		// only returns "token"
		Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );

		Pipe wcPipe = new Pipe( "wc", docPipe );
		wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));

		Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
		Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);

		FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
				.addSource( docPipe, inTap )
				.addTailSink( wcPipe, outTap );

		FlowConnector flowConnector = new FlinkConnector();

		Flow wcFlow = flowConnector.connect( flowDef );

		wcFlow.complete();
	}
 
Example #25
Source File: ParquetValueScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit(FlowProcess<? extends JobConf> jobConfFlowProcess, Tap<JobConf, RecordReader, OutputCollector> jobConfRecordReaderOutputCollectorTap, JobConf jobConf) {
  setPredicatePushdown(jobConf);
  setProjectionPushdown(jobConf);
  setStrictProjectionPushdown(jobConf);
  setRecordClass(jobConf);
}
 
Example #26
Source File: ParquetTBaseScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

  if (this.config.getKlass() == null) {
    throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor");
  }

  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class);
  TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass());
}
 
Example #27
Source File: ParquetTBaseScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  super.sourceConfInit(fp, tap, jobConf);
  jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
  ParquetInputFormat.setReadSupportClass(jobConf, ThriftReadSupport.class);
  ThriftReadSupport.setRecordConverterClass(jobConf, TBaseRecordConverter.class);
}
 
Example #28
Source File: ParquetScroogeScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit(FlowProcess<JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  super.sourceConfInit(fp, tap, jobConf);
  jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
  ParquetInputFormat.setReadSupportClass(jobConf, ScroogeReadSupport.class);
  ThriftReadSupport.setRecordConverterClass(jobConf, ScroogeRecordConverter.class);
}
 
Example #29
Source File: ParquetScroogeScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkConfInit(FlowProcess<JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  ParquetOutputFormat.setWriteSupportClass(jobConf, ScroogeWriteSupport.class);
  ScroogeWriteSupport.setScroogeClass(jobConf, this.config.getKlass());
}
 
Example #30
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
 @Override
 public void sourceConfInit(FlowProcess<? extends JobConf> fp,
     Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

   if (filterPredicate != null) {
     ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate);
   }

   jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
   ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class);
   TupleReadSupport.setRequestedFields(jobConf, getSourceFields());
}