Java Code Examples for org.apache.hadoop.util.StringUtils#unEscapeString()

The following examples show how to use org.apache.hadoop.util.StringUtils#unEscapeString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobHistory.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/**
 * Parse a single line of history. 
 * @param line
 * @param l
 * @throws IOException
 */
private static void parseLine(String line, Listener l, boolean isEscaped) 
throws IOException{
  // extract the record type 
  int idx = line.indexOf(' '); 
  String recType = line.substring(0, idx);
  String data = line.substring(idx+1, line.length());
  
  Matcher matcher = pattern.matcher(data); 

  while(matcher.find()){
    String tuple = matcher.group(0);
    String []parts = StringUtils.split(tuple, StringUtils.ESCAPE_CHAR, '=');
    String value = parts[1].substring(1, parts[1].length() -1);
    if (isEscaped) {
      value = StringUtils.unEscapeString(value, StringUtils.ESCAPE_CHAR,
                                         charsToEscape);
    }
    parseBuffer.put(Keys.valueOf(parts[0]), value);
  }

  l.handle(RecordTypes.valueOf(recType), parseBuffer); 
  
  parseBuffer.clear(); 
}
 
Example 2
Source File: JobHistory.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/**
 * Parse a single line of history. 
 * @param line
 * @param l
 * @throws IOException
 */
private static void parseLine(String line, Listener l, boolean isEscaped) 
throws IOException{
  // extract the record type 
  int idx = line.indexOf(' '); 
  String recType = line.substring(0, idx);
  String data = line.substring(idx+1, line.length());
  
  Matcher matcher = pattern.matcher(data); 
  Map<Keys,String> parseBuffer = new HashMap<Keys, String>();

  while(matcher.find()){
    String tuple = matcher.group(0);
    String []parts = StringUtils.split(tuple, StringUtils.ESCAPE_CHAR, '=');
    String value = parts[1].substring(1, parts[1].length() -1);
    if (isEscaped) {
      value = StringUtils.unEscapeString(value, StringUtils.ESCAPE_CHAR,
                                         charsToEscape);
    }
    parseBuffer.put(Keys.valueOf(parts[0]), value);
  }

  l.handle(RecordTypes.valueOf(recType), parseBuffer); 
  
  parseBuffer.clear(); 
}
 
Example 3
Source File: QueryInputFormat.java    From Halyard with Apache License 2.0 6 votes vote down vote up
public static void setQueriesFromDirRecursive(Configuration conf, String dirs, boolean sparqlUpdate, int stage) throws IOException {
    for (String dir : StringUtils.split(dirs)) {
        Path p = new Path(StringUtils.unEscapeString(dir));
        FileStatus[] matches = p.getFileSystem(conf).globStatus(p);
        if (matches == null) {
            throw new IOException("Input path does not exist: " + p);
        } else if (matches.length == 0) {
            throw new IOException("Input Pattern " + p + " matches 0 files");
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    addQueryRecursively(conf, p, sparqlUpdate, stage);
                } else {
                    addQuery(conf, globStat, sparqlUpdate, stage);
                }
            }
        }
    }
}
 
Example 4
Source File: FileInputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 5
Source File: MetadataIdParser.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Parse "..." {, "..."} into an array of string arguments (as Object[])
 * 
 * @param args
 * @return Object[] An array of string objects
 * @throws IllegalArgumentException
 */
public static Object[] parseArgs(String args) throws IllegalArgumentException {
    List<String> argList = new ArrayList<>();
    String[] parts = StringUtils.split(args, '\\', ',');
    for (String part : parts) {
        part = part.trim();
        if (part.charAt(0) == '"' && part.charAt(part.length() - 1) == '"') {
            part = StringUtils.unEscapeString(part.substring(1, part.length() - 1));
            argList.add(part);
        } else {
            throw new IllegalArgumentException("Expected a list of strings separated by commas.  Commas within the strings must be escaped. " + part);
        }
    }
    return argList.toArray();
}
 
Example 6
Source File: FileInputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 7
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 8
Source File: FileInputFormat.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 9
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get(INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 10
Source File: FileInputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get(org.apache.hadoop.mapreduce.lib.input.
    FileInputFormat.INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 11
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get(INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 12
Source File: FileInputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get(org.apache.hadoop.mapreduce.lib.input.
    FileInputFormat.INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}
 
Example 13
Source File: CountersStrings.java    From big-c with Apache License 2.0 4 votes vote down vote up
private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR,
                                    charsToEscape);
}
 
Example 14
Source File: Counters.java    From RDFS with Apache License 2.0 4 votes vote down vote up
private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR, 
                                    charsToEscape);
}
 
Example 15
Source File: LindenJob.java    From linden with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] strings) throws Exception {
  Configuration conf = getConf();
  String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
  logger.info("input dir:" + dir);
  Path inputPath = new Path(StringUtils.unEscapeString(dir));
  Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
  String indexPath = conf.get(LindenJobConfig.INDEX_PATH);

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }
  if (fs.exists(new Path(indexPath))) {
    fs.delete(new Path(indexPath), true);
  }

  int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
  Shard[] shards = createShards(indexPath, numShards);

  Shard.setIndexShards(conf, shards);

  //empty trash;
  (new Trash(conf)).expunge();

  Job job = Job.getInstance(conf, "linden-hadoop-indexing");
  job.setJarByClass(LindenJob.class);
  job.setMapperClass(LindenMapper.class);
  job.setCombinerClass(LindenCombiner.class);
  job.setReducerClass(LindenReducer.class);
  job.setMapOutputKeyClass(Shard.class);
  job.setMapOutputValueClass(IntermediateForm.class);
  job.setOutputKeyClass(Shard.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(IndexUpdateOutputFormat.class);
  job.setReduceSpeculativeExecution(false);
  job.setNumReduceTasks(numShards);

  String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
  if (lindenSchemaFile == null) {
    throw new IOException("no schema file is found");
  }
  logger.info("Adding schema file: " + lindenSchemaFile);
  job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
  String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
  if (lindenPropertiesFile == null) {
    throw new IOException("no linden properties file is found");
  }
  logger.info("Adding linden properties file: " + lindenPropertiesFile);
  job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  Path[] inputs = FileInputFormat.getInputPaths(job);
  StringBuilder buffer = new StringBuilder(inputs[0].toString());
  for (int i = 1; i < inputs.length; i++) {
    buffer.append(",");
    buffer.append(inputs[i].toString());
  }
  logger.info("mapreduce.input.dir = " + buffer.toString());
  logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
  logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
  logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
  logger.info("mapreduce.input.format.class = " + job.getInputFormatClass());
  logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass());
  logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));

  job.waitForCompletion(true);
  if (!job.isSuccessful()) {
    throw new RuntimeException("Job failed");
  }
  return 0;
}
 
Example 16
Source File: CountersStrings.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR,
                                    charsToEscape);
}
 
Example 17
Source File: Counters.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR, 
                                    charsToEscape);
}