org.apache.hadoop.mapred.TaskAttemptContextImpl Java Examples
The following examples show how to use
org.apache.hadoop.mapred.TaskAttemptContextImpl.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopSource.java From twister2 with Apache License 2.0 | 6 votes |
@Override public boolean hasNext() { if (currentReader != null) { try { boolean current = currentReader.nextKeyValue(); while (!current && consumingSplit < assignedSplits.size() - 1) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); consumingSplit++; TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); current = currentReader.nextKeyValue(); } return current; } catch (IOException | InterruptedException e) { throw new RuntimeException("Failed to read the next key vale", e); } } return false; }
Example #2
Source File: HadoopSourceWithMap.java From twister2 with Apache License 2.0 | 6 votes |
@Override public boolean hasNext() { if (currentReader != null) { try { boolean current = currentReader.nextKeyValue(); while (!current && consumingSplit < assignedSplits.size() - 1) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); consumingSplit++; TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); current = currentReader.nextKeyValue(); } return current; } catch (IOException | InterruptedException e) { throw new RuntimeException("Failed to read the next key vale", e); } } return false; }
Example #3
Source File: TaskAttemptImpl.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { TaskAttemptContext taskContext = new TaskAttemptContextImpl(taskAttempt.conf, TypeConverter.fromYarn(taskAttempt.attemptId)); taskAttempt.eventHandler.handle(new CommitterTaskAbortEvent( taskAttempt.attemptId, taskContext)); }
Example #4
Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example #5
Source File: HadoopV1OutputCollector.java From ignite with Apache License 2.0 | 5 votes |
/** * Abort task. */ public void abort() { try { if (writer != null) jobConf.getOutputCommitter().abortTask(new TaskAttemptContextImpl(jobConf, attempt)); } catch (IOException ignore) { // No-op. } }
Example #6
Source File: HadoopV1OutputCollector.java From ignite with Apache License 2.0 | 5 votes |
/** * Commit task. * * @throws IOException In failed. */ public void commit() throws IOException { if (writer != null) { OutputCommitter outputCommitter = jobConf.getOutputCommitter(); TaskAttemptContext taskCtx = new TaskAttemptContextImpl(jobConf, attempt); if (outputCommitter.needsTaskCommit(taskCtx)) outputCommitter.commitTask(taskCtx); } }
Example #7
Source File: MneMapredPersonDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@BeforeClass public void setUp() throws IOException { m_workdir = new Path( System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "person-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.DURABLE}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{PersonListEFProxy.class}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.DURABLE}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{PersonListEFProxy.class}); }
Example #8
Source File: MneMapredLongDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@BeforeClass public void setUp() throws IOException { m_workdir = new Path( System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "long-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.LONG}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 2); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.LONG}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{}); }
Example #9
Source File: MneMapredChunkDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@BeforeClass public void setUp() throws Exception { m_workdir = new Path( System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); unsafe = Utils.getUnsafe(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[]{DurableType.CHUNK}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[]{}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[]{DurableType.CHUNK}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[]{}); }
Example #10
Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0 | 5 votes |
public void testAbort() throws IOException { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer .getTaskAttemptPath(tContext)); // do setup committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do abort committer.abortTask(tContext); File expectedFile = new File(new Path(committer .getTaskAttemptPath(tContext), file).toString()); assertFalse("task temp dir still exists", expectedFile.exists()); committer.abortJob(jContext, JobStatus.State.FAILED); expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME) .toString()); assertFalse("job temp dir "+expectedFile+" still exists", expectedFile.exists()); assertEquals("Output directory not empty", 0, new File(outDir.toString()) .listFiles().length); FileUtil.fullyDelete(new File(outDir.toString())); }
Example #11
Source File: HadoopOutputFormatBase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example #12
Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0 | 5 votes |
public void testAbort() throws IOException { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer .getTaskAttemptPath(tContext)); // do setup committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do abort committer.abortTask(tContext); File expectedFile = new File(new Path(committer .getTaskAttemptPath(tContext), file).toString()); assertFalse("task temp dir still exists", expectedFile.exists()); committer.abortJob(jContext, JobStatus.State.FAILED); expectedFile = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME) .toString()); assertFalse("job temp dir "+expectedFile+" still exists", expectedFile.exists()); assertEquals("Output directory not empty", 0, new File(outDir.toString()) .listFiles().length); FileUtil.fullyDelete(new File(outDir.toString())); }
Example #13
Source File: TaskAttemptImpl.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { TaskAttemptContext taskContext = new TaskAttemptContextImpl(taskAttempt.conf, TypeConverter.fromYarn(taskAttempt.attemptId)); taskAttempt.eventHandler.handle(new CommitterTaskAbortEvent( taskAttempt.attemptId, taskContext)); }
Example #14
Source File: HadoopSourceWithMap.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void prepare(TSetContext ctx) { this.context = ctx; Configuration hadoopConf = this.wrappedConfiguration.getConfiguration(); jconf = new JobConf(hadoopConf); try { format = inputClazz.newInstance(); JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(), context.getIndex())); List<InputSplit> splits = format.getSplits(jobContext); for (int i = 0; i < splits.size(); i++) { if (i % context.getParallelism() == context.getIndex()) { assignedSplits.add(splits.get(i)); } } if (assignedSplits.size() > 0) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); } } catch (InstantiationException | IllegalAccessException | InterruptedException | IOException e) { throw new RuntimeException("Failed to initialize hadoop input", e); } }
Example #15
Source File: HadoopSource.java From twister2 with Apache License 2.0 | 5 votes |
@Override public void prepare(TSetContext ctx) { this.context = ctx; Configuration hadoopConf = this.wrappedConfiguration.getConfiguration(); jconf = new JobConf(hadoopConf); try { format = inputClazz.newInstance(); JobContext jobContext = new JobContextImpl(hadoopConf, new JobID(context.getId(), context.getIndex())); List<InputSplit> splits = format.getSplits(jobContext); for (int i = 0; i < splits.size(); i++) { if (i % context.getParallelism() == context.getIndex()) { assignedSplits.add(splits.get(i)); } } if (assignedSplits.size() > 0) { TaskID taskID = new TaskID(context.getId(), context.getIndex(), TaskType.MAP, context.getIndex()); TaskAttemptID taskAttemptID = new TaskAttemptID(taskID, context.getIndex()); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jconf, taskAttemptID); currentReader = format.createRecordReader(assignedSplits.get(consumingSplit), taskAttemptContext); currentReader.initialize(assignedSplits.get(consumingSplit), taskAttemptContext); } } catch (InstantiationException | IllegalAccessException | InterruptedException | IOException e) { throw new RuntimeException("Failed to initialize hadoop input", e); } }
Example #16
Source File: HadoopOutputFormatBase.java From flink with Apache License 2.0 | 5 votes |
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
Example #17
Source File: TestMRCJCFileOutputCommitter.java From big-c with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public void testCommitter() throws Exception { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTaskAttemptPath(tContext)); committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output File expectedFile = new File(new Path(outDir, file).toString()); StringBuffer expectedOutput = new StringBuffer(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); expectedOutput.append(key2).append("\n"); expectedOutput.append(key1).append("\n"); expectedOutput.append(key2).append('\t').append(val2).append("\n"); String output = UtilsForTests.slurp(expectedFile); assertEquals(output, expectedOutput.toString()); FileUtil.fullyDelete(new File(outDir.toString())); }
Example #18
Source File: TestMRCJCFileOutputCommitter.java From hadoop with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public void testCommitter() throws Exception { JobConf job = new JobConf(); setConfForFileOutputCommitter(job); JobContext jContext = new JobContextImpl(job, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(job, taskID); FileOutputCommitter committer = new FileOutputCommitter(); FileOutputFormat.setWorkOutputPath(job, committer.getTaskAttemptPath(tContext)); committer.setupJob(jContext); committer.setupTask(tContext); String file = "test.txt"; // A reporter that does nothing Reporter reporter = Reporter.NULL; // write output FileSystem localFs = FileSystem.getLocal(job); TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(localFs, job, file, reporter); writeOutput(theRecordWriter, reporter); // do commit committer.commitTask(tContext); committer.commitJob(jContext); // validate output File expectedFile = new File(new Path(outDir, file).toString()); StringBuffer expectedOutput = new StringBuffer(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); expectedOutput.append(key2).append("\n"); expectedOutput.append(key1).append("\n"); expectedOutput.append(key2).append('\t').append(val2).append("\n"); String output = UtilsForTests.slurp(expectedFile); assertEquals(output, expectedOutput.toString()); FileUtil.fullyDelete(new File(outDir.toString())); }
Example #19
Source File: HiveTableOutputFormat.java From flink with Apache License 2.0 | 4 votes |
@Override public void open(int taskNumber, int numTasks) throws IOException { try { StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); Object serdeLib = Class.forName(sd.getSerdeInfo().getSerializationLib()).newInstance(); Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer, "Expect a SerDe lib implementing both Serializer and Deserializer, but actually got " + serdeLib.getClass().getName()); recordSerDe = (Serializer) serdeLib; ReflectionUtils.setConf(recordSerDe, jobConf); // TODO: support partition properties, for now assume they're same as table properties SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null); outputClass = recordSerDe.getSerializedClass(); } catch (IllegalAccessException | SerDeException | InstantiationException | ClassNotFoundException e) { throw new FlinkRuntimeException("Error initializing Hive serializer", e); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber).length()) + "s", " ").replace(" ", "0") + taskNumber + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); if (!isDynamicPartition) { staticWriter = writerForLocation(hiveTablePartition.getStorageDescriptor().getLocation()); } else { dynamicPartitionOffset = fieldNames.length - partitionColumns.size() + hiveTablePartition.getPartitionSpec().size(); } numNonPartitionColumns = isPartitioned ? fieldNames.length - partitionColumns.size() : fieldNames.length; hiveConversions = new HiveObjectConversion[numNonPartitionColumns]; List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length); for (int i = 0; i < numNonPartitionColumns; i++) { ObjectInspector objectInspector = HiveInspectors.getObjectInspector(fieldTypes[i]); objectInspectors.add(objectInspector); hiveConversions[i] = HiveInspectors.getConversion(objectInspector, fieldTypes[i].getLogicalType()); } if (!isPartitioned) { rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( Arrays.asList(fieldNames), objectInspectors); } else { rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( Arrays.asList(fieldNames).subList(0, fieldNames.length - partitionColumns.size()), objectInspectors); defaultPartitionName = jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname, HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal); } }
Example #20
Source File: HadoopV1OutputCollector.java From ignite with Apache License 2.0 | 2 votes |
/** * Setup task. * * @throws IOException If failed. */ public void setup() throws IOException { if (writer != null) jobConf.getOutputCommitter().setupTask(new TaskAttemptContextImpl(jobConf, attempt)); }