cascading.flow.hadoop.util.HadoopUtil Java Examples

The following examples show how to use cascading.flow.hadoop.util.HadoopUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TapInputFormat.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void configure(Configuration config) {

	this.jobConf = HadoopUtil.asJobConfInstance(FlinkConfigConverter.toHadoopConfig(config));

	// set the correct class loader
	// not necessary for Flink versions >= 0.10 but we set this anyway to be on the safe side
	jobConf.setClassLoader(this.getClass().getClassLoader());

	this.mapredInputFormat = jobConf.getInputFormat();

	if (this.mapredInputFormat instanceof JobConfigurable) {
		((JobConfigurable) this.mapredInputFormat).configure(jobConf);
	}
}
 
Example #2
Source File: TapDataWriter.java    From plunger with Apache License 2.0 6 votes vote down vote up
private void writeToHadoopPartitionTap(Tap<?, ?, ?> tap) throws IOException {
  @SuppressWarnings("unchecked")
  BasePartitionTap<JobConf, ?, ?> hadoopTap = (BasePartitionTap<JobConf, ?, ?>) tap;
  JobConf conf = new JobConf();

  // Avoids deletion of results when using a partition tap (close() will delete the _temporary before the copy has
  // been done if not in a flow)
  HadoopUtil.setIsInflow(conf);

  HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sinkConfInit(flowProcess, conf);
  TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess);
  for (TupleEntry tuple : data.asTupleEntryList()) {
    collector.add(tuple);
  }
  collector.close();

  // We need to clean up the '_temporary' folder
  BasePartitionTap<JobConf, ?, ?> partitionTap = hadoopTap;
  @SuppressWarnings("unchecked")
  String basePath = partitionTap.getParent().getFullIdentifier(flowProcess);
  deleteTemporaryPath(new Path(basePath), FileSystem.get(conf));
}
 
Example #3
Source File: FlinkFlow.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
@Override
protected void initConfig(Map<Object, Object> properties, Configuration parentConfig) {
	if( properties != null ) {
		parentConfig = createConfig( properties, parentConfig );
	}

	if( parentConfig == null ) {
	// this is ok, getJobConf will pass a default parent in
		return;
	}

	config = HadoopUtil.copyJobConf(parentConfig);
}
 
Example #4
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
private Configuration getNodeConfig(FlowNode node) {

		Configuration nodeConfig = HadoopUtil.copyConfiguration(this.getConfig());
		ConfigurationSetter configSetter = new ConfigurationSetter(nodeConfig);
		this.initConfFromNodeConfigDef(node.getElementGraph(), configSetter);
		this.initConfFromStepConfigDef(configSetter);
		nodeConfig.set("cascading.flow.node.num", Integer.toString(node.getOrdinal()));

		return nodeConfig;
	}
 
Example #5
Source File: TapOutputFormat.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
@Override
public void finalizeGlobal(int parallelism) throws IOException {

	org.apache.hadoop.conf.Configuration config = HadoopUtil.copyConfiguration(this.config);
	Tap tap = this.flowNode.getSinkTaps().iterator().next();

	config.setBoolean(HadoopUtil.CASCADING_FLOW_EXECUTING, false);
	HadoopUtil.setOutputPath(config, new Path(tap.getIdentifier()));

	Hadoop18TapUtil.cleanupJob( config );
}
 
Example #6
Source File: FlinkFlowProcess.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
@Override
public Configuration mergeMapIntoConfig(Configuration defaultConfig, Map<String, String> map) {

	Configuration mergedConf = HadoopUtil.copyJobConf(defaultConfig);
	for(String key : map.keySet()) {
		mergedConf.set(key, map.get(key));
	}
	return mergedConf;
}
 
Example #7
Source File: FlinkFlow.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Configuration newConfig(Configuration defaultConfig) {
	return defaultConfig == null ? new Configuration() : HadoopUtil.copyJobConf(defaultConfig);
}
 
Example #8
Source File: FlinkFlow.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@ProcessConfiguration
@Override
public Configuration getConfigCopy() {
	return HadoopUtil.copyJobConf(getConfig());
}
 
Example #9
Source File: FlinkFlowProcess.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
public Configuration getConfigCopy() {
	return HadoopUtil.copyJobConf(this.conf);
}
 
Example #10
Source File: FlinkFlowProcess.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
public <C> C copyConfig(C conf) {
	return HadoopUtil.copyJobConf(conf);
}
 
Example #11
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 3 votes vote down vote up
/**
 * Configures the Flink program for this step
 */
public Configuration createInitializedConfig( FlowProcess<Configuration> flowProcess, Configuration parentConfig ) {

	this.env.getConfig().registerKryoType(Tuple.class);

	Configuration config = parentConfig == null ? new JobConf() : HadoopUtil.copyJobConf( parentConfig );
	config.set( "cascading.flow.step.num", Integer.toString( getOrdinal() ) );
	HadoopUtil.setIsInflow(config);

	this.setConfig(config);

	return config;
}