Java Code Examples for org.apache.hadoop.io.DefaultStringifier#store()
The following examples show how to use
org.apache.hadoop.io.DefaultStringifier#store() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JdbcExportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 6 votes |
private void configureGenericRecordExportInputFormat(Job job, String tableName) throws IOException { ConnManager connManager = context.getConnManager(); Map<String, Integer> columnTypeInts; if (options.getCall() == null) { columnTypeInts = connManager.getColumnTypes( tableName, options.getSqlQuery()); } else { columnTypeInts = connManager.getColumnTypesForProcedure( options.getCall()); } String[] specifiedColumns = options.getColumns(); MapWritable columnTypes = new MapWritable(); for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) { String column = e.getKey(); column = (specifiedColumns == null) ? column : options.getColumnNameCaseInsensitive(column); if (column != null) { Text columnName = new Text(column); Text columnType = new Text(connManager.toJavaType(tableName, column, e.getValue())); columnTypes.put(columnName, columnType); } } DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP); }
Example 2
Source File: KeyValueOutputFormat.java From marklogic-contentpump with Apache License 2.0 | 6 votes |
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { // check for required configuration if (conf.get(OUTPUT_QUERY) == null) { throw new IllegalArgumentException(OUTPUT_QUERY + " is not specified."); } // warn against unsupported configuration if (conf.get(BATCH_SIZE) != null) { LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for " + this.getClass().getName() + " and will be ignored."); } String queryLanguage = conf.get(OUTPUT_QUERY_LANGUAGE); if (queryLanguage != null) { InternalUtilities.checkQueryLanguage(queryLanguage); } // store hosts into config system DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST); }
Example 3
Source File: TransformOutputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { super.checkOutputSpecs(conf, cs); // store mimetypes map into config system DefaultStringifier.store(conf, getMimetypesMap(), ConfigConstants.CONF_MIMETYPES); }
Example 4
Source File: NodeOutputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { // warn against unsupported configuration if (conf.get(BATCH_SIZE) != null) { LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for " + this.getClass().getName() + " and will be ignored."); } // store hosts into config system DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST); }
Example 5
Source File: PropertyOutputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { // warn against unsupported configuration if (conf.get(BATCH_SIZE) != null) { LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for " + this.getClass().getName() + " and will be ignored."); } // store hosts into config system DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST); }
Example 6
Source File: DelimitedTextInputFormat.java From marklogic-contentpump with Apache License 2.0 | 4 votes |
public List<InputSplit> getSplits(JobContext job) throws IOException { boolean delimSplit = isSplitInput(job.getConfiguration()); //if delimSplit is true, size of each split is determined by //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat List<InputSplit> splits = super.getSplits(job); if (!delimSplit) { return splits; } if (splits.size()>= SPLIT_COUNT_LIMIT) { //if #splits > 1 million, there is enough parallelism //therefore no point to split LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT); DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT); return splits; } // add header info into splits List<InputSplit> populatedSplits = new ArrayList<InputSplit>(); LOG.info(splits.size() + " DelimitedSplits generated"); Configuration conf = job.getConfiguration(); char delimiter =0; ArrayList<Text> hlist = new ArrayList<Text>(); for (InputSplit file: splits) { FileSplit fsplit = ((FileSplit)file); Path path = fsplit.getPath(); FileSystem fs = path.getFileSystem(conf); if (fsplit.getStart() == 0) { // parse the inSplit, get the header FSDataInputStream fileIn = fs.open(path); String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER); if (delimStr.length() == 1) { delimiter = delimStr.charAt(0); } else { LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character."); } String encoding = conf.get( MarkLogicConstants.OUTPUT_CONTENT_ENCODING, MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING); InputStreamReader instream = new InputStreamReader(fileIn, encoding); CSVParser parser = new CSVParser(instream, CSVParserFormatter. getFormat(delimiter, DelimitedTextReader.encapsulator, true, true)); Iterator<CSVRecord> it = parser.iterator(); String[] header = null; if (it.hasNext()) { CSVRecord record = (CSVRecord)it.next(); Iterator<String> recordIterator = record.iterator(); int recordSize = record.size(); header = new String[recordSize]; for (int i = 0; i < recordSize; i++) { if (recordIterator.hasNext()) { header[i] = (String)recordIterator.next(); } else { throw new IOException("Record size doesn't match the real size"); } } EncodingUtil.handleBOMUTF8(header, 0); hlist.clear(); for (String s : header) { hlist.add(new Text(s)); } } instream.close(); } DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable( hlist.toArray(new Text[hlist.size()])), path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations()); populatedSplits.add(ds); } return populatedSplits; }