Java Code Examples for org.apache.hadoop.io.DefaultStringifier#store()

The following examples show how to use org.apache.hadoop.io.DefaultStringifier#store() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JdbcExportJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 6 votes vote down vote up
private void configureGenericRecordExportInputFormat(Job job, String tableName)
    throws IOException {
  ConnManager connManager = context.getConnManager();
  Map<String, Integer> columnTypeInts;
  if (options.getCall() == null) {
    columnTypeInts = connManager.getColumnTypes(
        tableName,
        options.getSqlQuery());
  } else {
    columnTypeInts = connManager.getColumnTypesForProcedure(
        options.getCall());
  }
  String[] specifiedColumns = options.getColumns();
  MapWritable columnTypes = new MapWritable();
  for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
    String column = e.getKey();
    column = (specifiedColumns == null) ? column : options.getColumnNameCaseInsensitive(column);
    if (column != null) {
      Text columnName = new Text(column);
      Text columnType = new Text(connManager.toJavaType(tableName, column, e.getValue()));
      columnTypes.put(columnName, columnType);
    }
  }
  DefaultStringifier.store(job.getConfiguration(), columnTypes,
      AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
}
 
Example 2
Source File: KeyValueOutputFormat.java    From marklogic-contentpump with Apache License 2.0 6 votes vote down vote up
@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) 
throws IOException {
    // check for required configuration
    if (conf.get(OUTPUT_QUERY) == null) {
        throw new IllegalArgumentException(OUTPUT_QUERY + 
        " is not specified.");
    }
    // warn against unsupported configuration
    if (conf.get(BATCH_SIZE) != null) {
        LOG.warn("Config entry for " +
                "\"mapreduce.marklogic.output.batchsize\" is not " +
                "supported for " + this.getClass().getName() + 
                " and will be ignored.");
    }
    String queryLanguage = conf.get(OUTPUT_QUERY_LANGUAGE);
    if (queryLanguage != null) {
        InternalUtilities.checkQueryLanguage(queryLanguage);
    }
    // store hosts into config system
    DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST);
}
 
Example 3
Source File: TransformOutputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs)
    throws IOException {
    super.checkOutputSpecs(conf, cs);

    // store mimetypes map into config system
    DefaultStringifier.store(conf, getMimetypesMap(),
        ConfigConstants.CONF_MIMETYPES);
}
 
Example 4
Source File: NodeOutputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) 
throws IOException {
    // warn against unsupported configuration
    if (conf.get(BATCH_SIZE) != null) {
        LOG.warn("Config entry for " +
                "\"mapreduce.marklogic.output.batchsize\" is not " +
                "supported for " + this.getClass().getName() + 
                " and will be ignored.");
    }     
    // store hosts into config system
    DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST);
}
 
Example 5
Source File: PropertyOutputFormat.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) 
throws IOException {
    // warn against unsupported configuration
    if (conf.get(BATCH_SIZE) != null) {
        LOG.warn("Config entry for " +
                "\"mapreduce.marklogic.output.batchsize\" is not " +
                "supported for " + this.getClass().getName() + 
                " and will be ignored.");
    }      
    // store hosts into config system
    DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST);
}
 
Example 6
Source File: DelimitedTextInputFormat.java    From marklogic-contentpump with Apache License 2.0 4 votes vote down vote up
public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }

    if (splits.size()>= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:"
            + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter =0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file: splits) {
        FileSplit fsplit = ((FileSplit)file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);
        
        if (fsplit.getStart() == 0) {
        // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER,
                ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter
                    + ". Expects single character.");
            }
            String encoding = conf.get(
                MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream, CSVParserFormatter.
            		getFormat(delimiter, DelimitedTextReader.encapsulator,
            				true, true));
            Iterator<CSVRecord> it = parser.iterator();
            
            String[] header = null;
            if (it.hasNext()) {
            	CSVRecord record = (CSVRecord)it.next();
            	Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                	if (recordIterator.hasNext()) {
                		header[i] = (String)recordIterator.next();
                	} else {
                		throw new IOException("Record size doesn't match the real size");
                	}
                }
                
                EncodingUtil.handleBOMUTF8(header, 0);
                
                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }
        
        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(
            hlist.toArray(new Text[hlist.size()])), path,
            fsplit.getStart(), fsplit.getLength(),
            fsplit.getLocations());
        populatedSplits.add(ds);
    }
    
    return populatedSplits;
}