Java Code Examples for org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getWorkOutputPath()

The following examples show how to use org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getWorkOutputPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroImportMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
protected void setup(Context context)
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  schema = AvroJob.getMapOutputSchema(conf);
  lobLoader = new LargeObjectLoader(conf, FileOutputFormat.getWorkOutputPath(context));
  bigDecimalFormatString = conf.getBoolean(
      ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT,
      ImportJobBase.PROPERTY_BIGDECIMAL_FORMAT_DEFAULT);
}
 
Example 2
Source File: SegmentCreationPhaseMapReduceJob.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
@Override
public void setup(Context context) throws IOException, InterruptedException {

  currentHdfsWorkDir = FileOutputFormat.getWorkOutputPath(context);
  currentDiskWorkDir = "pinot_hadoop_tmp";

  // Temporary HDFS path for local machine
  localHdfsSegmentTarPath = currentHdfsWorkDir + "/segmentTar";

  // Temporary DISK path for local machine
  localDiskSegmentDirectory = currentDiskWorkDir + "/segments/";
  localDiskSegmentTarPath = currentDiskWorkDir + "/segmentsTar/";
  new File(localDiskSegmentTarPath).mkdirs();

  LOGGER.info("*********************************************************************");
  LOGGER.info("Configurations : {}", context.getConfiguration().toString());
  LOGGER.info("*********************************************************************");
  LOGGER.info("Current HDFS working dir : {}", currentHdfsWorkDir);
  LOGGER.info("Current DISK working dir : {}", new File(currentDiskWorkDir).getAbsolutePath());
  LOGGER.info("*********************************************************************");
  properties = context.getConfiguration();

  outputPath = properties.get(SEGMENT_CREATION_OUTPUT_PATH.toString());

  thirdeyeConfig = OBJECT_MAPPER.readValue(properties.get(SEGMENT_CREATION_THIRDEYE_CONFIG.toString()), ThirdEyeConfig.class);
  LOGGER.info(thirdeyeConfig.encode());
  schema = ThirdeyePinotSchemaUtils.createSchema(thirdeyeConfig);
  tableName = thirdeyeConfig.getCollection();

  segmentWallClockStartTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_START_TIME.toString()));
  segmentWallClockEndTime = Long.valueOf(properties.get(SEGMENT_CREATION_WALLCLOCK_END_TIME.toString()));
  segmentSchedule = properties.get(SEGMENT_CREATION_SCHEDULE.toString());
  isBackfill = Boolean.valueOf(properties.get(SEGMENT_CREATION_BACKFILL.toString()));
}
 
Example 3
Source File: SequenceFileImportMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
@Override
protected void setup(Context context)
    throws IOException, InterruptedException {
  this.lobLoader = new LargeObjectLoader(context.getConfiguration(), FileOutputFormat.getWorkOutputPath(context));
}
 
Example 4
Source File: TextImportMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
@Override
protected void setup(Context context)
    throws IOException, InterruptedException {
  this.lobLoader = new LargeObjectLoader(context.getConfiguration(), FileOutputFormat.getWorkOutputPath(context));
}
 
Example 5
Source File: Purge.java    From Cubert with Apache License 2.0 4 votes vote down vote up
@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException,
        InterruptedException
{
    block = input.values().iterator().next();
    conf = PhaseContext.getConf();
    output = TupleFactory.getInstance().newTuple(3);
    purgeFileName = FileCache.get(filesToCache.get(0));

    if (purgeFileName == null)
    {
        throw new IOException("purgeFileName is null");
    }

    loadMembersToPurge(purgeFileName);

    String columnName = JsonUtils.getText(json.get("args"), "purgeColumnName");
    setColumnName(columnName);

    // Create temp file
    Path root = null;
    String filename = null;
    tempFileName = null;

    if (PhaseContext.isMapper())
    {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename =
                FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(),
                                               "tempFileForPurge",
                                               "");
    }
    else
    {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename =
                FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(),
                                               "tempFileForPurge",
                                               "");
    }

    tempFileName = root + "/" + filename;
}
 
Example 6
Source File: TeeOperator.java    From Cubert with Apache License 2.0 4 votes vote down vote up
@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException,
        InterruptedException
{
    block = input.values().iterator().next();
    String prefix = JsonUtils.getText(json, "prefix");

    passthrough = json.get("passthrough").getBooleanValue();

    BlockSchema teeSchema = new BlockSchema(json.get("teeSchema"));

    if (json.has("generate") && !json.get("generate").isNull())
    {
        ObjectNode generateJson =
                JsonUtils.createObjectNode("name",
                                           "GENERATE",
                                           "input",
                                           json.get("input"),
                                           "output",
                                           json.get("input"),
                                           "outputTuple",
                                           json.get("generate"));

        generateOperator = new GenerateOperator();

        BlockProperties generateProps =
                new BlockProperties("teeGenerate", teeSchema, props);
        generateOperator.setInput(input, generateJson, generateProps);
    }

    Configuration conf = PhaseContext.getConf();

    Path root = null;
    String filename = null;

    if (PhaseContext.isMapper())
    {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getMapContext());
        filename =
                FileOutputFormat.getUniqueFile(PhaseContext.getMapContext(),
                                               prefix,
                                               "");
    }
    else
    {
        root = FileOutputFormat.getWorkOutputPath(PhaseContext.getRedContext());
        filename =
                FileOutputFormat.getUniqueFile(PhaseContext.getRedContext(),
                                               prefix,
                                               "");
    }

    writer = openedWriters.get(prefix);

    if (writer == null)
    {
        writer = StorageFactory.get(JsonUtils.getText(json, "type")).getTeeWriter();
        writer.open(conf, json, teeSchema, root, filename);
        openedWriters.put(prefix, writer);
    }

    if (json.has("filter") && json.get("filter") != null
            && !json.get("filter").isNull())
    {
        JsonNode filterJson = json.get("filter");
        filterTree = new FunctionTree(block);
        try
        {
            filterTree.addFunctionTree(filterJson);
        }
        catch (PreconditionException e)
        {
            throw new RuntimeException(e);
        }

    }
}
 
Example 7
Source File: SegmentCreationMapper.java    From incubator-pinot with Apache License 2.0 4 votes vote down vote up
@Override
public void setup(Context context)
    throws IOException, InterruptedException {
  _jobConf = context.getConfiguration();
  logConfigurations();

  _useRelativePath = _jobConf.getBoolean(JobConfigConstants.USE_RELATIVE_PATH, false);
  _rawTableName = _jobConf.get(JobConfigConstants.SEGMENT_TABLE_NAME);
  _schema = Schema.fromString(_jobConf.get(JobConfigConstants.SCHEMA));

  // Optional.
  // Once we move to dateTimeFieldSpec, check that table config (w/ valid timeColumnName) is provided if multiple dateTimeFieldSpecs are configured
  String tableConfigString = _jobConf.get(JobConfigConstants.TABLE_CONFIG);
  if (tableConfigString != null) {
    _tableConfig = JsonUtils.stringToObject(tableConfigString, TableConfig.class);
  }
  String readerConfigFile = _jobConf.get(JobConfigConstants.PATH_TO_READER_CONFIG);
  if (readerConfigFile != null) {
    _readerConfigFile = new Path(readerConfigFile);
  }
  _recordReaderPath = _jobConf.get(JobConfigConstants.RECORD_READER_PATH);

  // Set up segment name generator
  String segmentNameGeneratorType =
      _jobConf.get(JobConfigConstants.SEGMENT_NAME_GENERATOR_TYPE, JobConfigConstants.DEFAULT_SEGMENT_NAME_GENERATOR);
  switch (segmentNameGeneratorType) {
    case JobConfigConstants.SIMPLE_SEGMENT_NAME_GENERATOR:
      _segmentNameGenerator =
          new SimpleSegmentNameGenerator(_rawTableName, _jobConf.get(JobConfigConstants.SEGMENT_NAME_POSTFIX));
      break;
    case JobConfigConstants.NORMALIZED_DATE_SEGMENT_NAME_GENERATOR:
      Preconditions.checkState(_tableConfig != null,
          "In order to use NormalizedDateSegmentNameGenerator, table config must be provided");
      SegmentsValidationAndRetentionConfig validationConfig = _tableConfig.getValidationConfig();
      DateTimeFormatSpec dateTimeFormatSpec = null;
      String timeColumnName = _tableConfig.getValidationConfig().getTimeColumnName();

      if (timeColumnName != null) {
        DateTimeFieldSpec dateTimeFieldSpec = _schema.getSpecForTimeColumn(timeColumnName);
        if (dateTimeFieldSpec != null) {
          dateTimeFormatSpec = new DateTimeFormatSpec(dateTimeFieldSpec.getFormat());
        }
      }
      _segmentNameGenerator =
          new NormalizedDateSegmentNameGenerator(_rawTableName, _jobConf.get(JobConfigConstants.SEGMENT_NAME_PREFIX),
              _jobConf.getBoolean(JobConfigConstants.EXCLUDE_SEQUENCE_ID, false),
              validationConfig.getSegmentPushType(), validationConfig.getSegmentPushFrequency(), dateTimeFormatSpec);
      break;
    default:
      throw new UnsupportedOperationException("Unsupported segment name generator type: " + segmentNameGeneratorType);
  }

  // Working directories
  _hdfsSegmentTarDir = new Path(FileOutputFormat.getWorkOutputPath(context), JobConfigConstants.SEGMENT_TAR_DIR);
  _localStagingDir = new File(LOCAL_TEMP_DIR);
  _localInputDir = new File(_localStagingDir, "inputData");
  _localSegmentDir = new File(_localStagingDir, "segments");
  _localSegmentTarDir = new File(_localStagingDir, JobConfigConstants.SEGMENT_TAR_DIR);

  if (_localStagingDir.exists()) {
    _logger.warn("Deleting existing file: {}", _localStagingDir);
    FileUtils.forceDelete(_localStagingDir);
  }
  _logger
      .info("Making local temporary directories: {}, {}, {}", _localStagingDir, _localInputDir, _localSegmentTarDir);
  Preconditions.checkState(_localStagingDir.mkdirs());
  Preconditions.checkState(_localInputDir.mkdir());
  Preconditions.checkState(_localSegmentDir.mkdir());
  Preconditions.checkState(_localSegmentTarDir.mkdir());

  _logger.info("*********************************************************************");
  _logger.info("Raw Table Name: {}", _rawTableName);
  _logger.info("Schema: {}", _schema);
  _logger.info("Segment Name Generator: {}", _segmentNameGenerator);
  _logger.info("Table Config: {}", _tableConfig);
  _logger.info("Reader Config File: {}", _readerConfigFile);
  _logger.info("*********************************************************************");
  _logger.info("HDFS Segment Tar Directory: {}", _hdfsSegmentTarDir);
  _logger.info("Local Staging Directory: {}", _localStagingDir);
  _logger.info("Local Input Directory: {}", _localInputDir);
  _logger.info("Local Segment Tar Directory: {}", _localSegmentTarDir);
  _logger.info("*********************************************************************");
}