Java Code Examples for org.apache.hadoop.filecache.DistributedCache#getLocalCacheFiles()

The following examples show how to use org.apache.hadoop.filecache.DistributedCache#getLocalCacheFiles() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MapJoin.java    From BigData-In-Practice with Apache License 2.0 6 votes vote down vote up
@Override
protected void setup(Mapper<LongWritable, Text, NullWritable, Emp_Dep>.Context context) throws IOException, InterruptedException {
    // 预处理把要关联的文件加载到缓存中
    Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
    // 我们这里只缓存了一个文件,所以取第一个即可,创建BufferReader去读取
    BufferedReader reader = new BufferedReader(new FileReader(paths[0].toString()));

    String str = null;
    try {
        // 一行一行读取
        while ((str = reader.readLine()) != null) {
            // 对缓存中的表进行分割
            String[] splits = str.split("\t");
            // 把字符数组中有用的数据存在一个Map中
            joinData.put(Integer.parseInt(splits[0]), splits[1]);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        reader.close();
    }

}
 
Example 2
Source File: CrossProductOperation.java    From incubator-retired-mrql with Apache License 2.0 6 votes vote down vote up
@Override
protected void setup ( Context context ) throws IOException,InterruptedException {
    super.setup(context);
    try {
        conf = context.getConfiguration();
        Plan.conf = conf;
        Config.read(Plan.conf);
        Tree code = Tree.parse(conf.get("mrql.reducer"));
        reduce_fnc = functional_argument(conf,code);
        code = Tree.parse(conf.get("mrql.mapper"));
        map_fnc = functional_argument(conf,code);
        if (conf.get("mrql.zero") != null) {
            code = Tree.parse(conf.get("mrql.zero"));
            result = Interpreter.evalE(code);
            code = Tree.parse(conf.get("mrql.accumulator"));
            acc_fnc = functional_argument(conf,code);
        } else result = null;
        counter = conf.get("mrql.counter");
        uris = DistributedCache.getCacheFiles(conf);
        local_paths = DistributedCache.getLocalCacheFiles(conf);
        index = 0;
    } catch (Exception e) {
        throw new Error("Cannot setup the crossProduct: "+e);
    }
}
 
Example 3
Source File: AvroDistributedCacheFileReader.java    From ml-ease with Apache License 2.0 6 votes vote down vote up
@Override
protected List<Path> getPaths(String filePath) throws IOException
{
  Path[] localFiles = DistributedCache.getLocalCacheFiles(getConf());
  List<Path> paths = new ArrayList<Path>();
  
  for (Path file: localFiles)
  {
    if (!file.toString().contains(filePath))
    {
      continue;
    }
    
    paths.add(file);
  }
    
  return paths;
}
 
Example 4
Source File: L2.java    From spork with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf conf) {
    try {
        Path[] paths = DistributedCache.getLocalCacheFiles(conf);
        if (paths == null || paths.length < 1) {
            throw new RuntimeException("DistributedCache no work.");
        }

        // Open the small table
        BufferedReader reader =
            new BufferedReader(new InputStreamReader(new
            FileInputStream(paths[0].toString())));
        String line;
        hash = new HashSet<String>(500);
        while ((line = reader.readLine()) != null) {
            if (line.length() < 1) continue;
            String[] fields = line.split("");
            hash.add(fields[0]);
        }
        reader.close();
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}
 
Example 5
Source File: ReplicatedUserJoin.java    From hadoop-map-reduce-patterns with Apache License 2.0 6 votes vote down vote up
public void setup(Context context) throws IOException,
		InterruptedException {
	Path[] files = DistributedCache.getLocalCacheFiles(context
			.getConfiguration());
	// Read all files in the DistributedCache
	for (Path p : files) {
		BufferedReader rdr = new BufferedReader(new InputStreamReader(
				new GZIPInputStream(new FileInputStream(new File(
						p.toString())))));
		String line = null;
		// For each record in the user file
		while ((line = rdr.readLine()) != null) {
			// Get the user ID for this record
			Map<String, String> parsed = MRDPUtils
					.transformXmlToMap(line);
			String userId = parsed.get("Id");
			// Map the user ID to the record
			userIdToInfo.put(userId, line);
		}
		rdr.close();
	}
	// Get the join type from the configuration
	joinType = context.getConfiguration().get("join.type");
}
 
Example 6
Source File: MapFeatures.java    From hadoop-book with Apache License 2.0 6 votes vote down vote up
@Override
public void configure(JobConf job) {
    caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
    inputFile = job.get("map.input.file");

    if (job.getBoolean("wordcount.skip.patterns", false)) {
        Path[] patternsFiles = new Path[0];
        try {
            patternsFiles = DistributedCache.getLocalCacheFiles(job);
        } catch (IOException ioe) {
            System.err.println("Caught exception getting cached files: "
                    + StringUtils.stringifyException(ioe));
        }
        for (Path patternsFile : patternsFiles) {
            parseSkipFile(patternsFile);
        }
    }
}
 
Example 7
Source File: BloomJoin.java    From hiped2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void setup(
    Context context)
    throws IOException, InterruptedException {

  Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
  filter = BloomFilterDumper.fromFile(
      new File(files[0].toString()));

  System.out.println("Filter = " + filter);
}
 
Example 8
Source File: DistributedCacheHelper.java    From datafu with Apache License 2.0 5 votes vote down vote up
/**
 * Deserializes an object from a path in HDFS.
 * 
 * @param conf Hadoop configuration
 * @param path Path to deserialize from
 * @return Deserialized object
 * @throws IOException IOException
 */
public static Object readObject(Configuration conf, org.apache.hadoop.fs.Path path) throws IOException
{
  String localPath = null;
  Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(conf);
  for (Path localCacheFile : localCacheFiles)
  {
    if (localCacheFile.getName().endsWith(path.getName()))
    {
      localPath = localCacheFile.getName();
      break;
    }
  }
  if (localPath == null)
  {
    throw new RuntimeException("Could not find " + path + " in local cache");
  }
  FileInputStream inputStream = new FileInputStream(new File(localPath));
  ObjectInputStream objStream = new ObjectInputStream(inputStream);
  
  try
  {
    try {
      return objStream.readObject();
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e);
    }
  }
  finally
  {
    objStream.close();
    inputStream.close();
  }
}
 
Example 9
Source File: BasicJobChaining.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
protected void setup(Context context) throws IOException, InterruptedException {
	average = getAveragePostsPerUser(context.getConfiguration());
	mos = new MultipleOutputs<Text, Text>(context);

	try {
		Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());

		if (files == null || files.length == 0) {
			throw new RuntimeException("User information is not set in DistributedCache");
		}

		// Read all files in the DistributedCache
		for (Path p : files) {
			BufferedReader rdr = new BufferedReader(new InputStreamReader(
					new GZIPInputStream(new FileInputStream(new File(p.toString())))));

			String line;
			// For each record in the user file
			while ((line = rdr.readLine()) != null) {

				// Get the user ID and reputation
				Map<String, String> parsed = MRDPUtils.transformXmlToMap(line);
				String userId = parsed.get("Id");
				String reputation = parsed.get("Reputation");

				if (userId != null && reputation != null) {
					// Map the user ID to the reputation
					userIdToReputation.put(userId, reputation);
				}
			}
		}

	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}
 
Example 10
Source File: ReduceSideJoinBloomFilter.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
public void setup(Context context) throws IOException {
	Path[] files = DistributedCache.getLocalCacheFiles(context
			.getConfiguration());
	DataInputStream strm = new DataInputStream(new FileInputStream(
			new File(files[0].toString())));
	bfilter.readFields(strm);
}
 
Example 11
Source File: BloomFilter.java    From hadoop-map-reduce-patterns with Apache License 2.0 5 votes vote down vote up
@Override
public void setup(Context context) throws IOException,
		InterruptedException {
	Path[] files = DistributedCache.getLocalCacheFiles(context
			.getConfiguration());
	System.out.println("Reading Bloom filter from: " + files[0]);

	DataInputStream stream = new DataInputStream(new FileInputStream(
			files[0].toString()));
	filter.readFields(stream);
	stream.close();
}
 
Example 12
Source File: JobLibLoader.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
public static Path[] getFileFromCache(String libPathStr,
		Configuration config) {
	Path[] localFiles = null;
	try {
		logger.info("Local Cache => " + DistributedCache.getLocalCacheFiles(config));
		logger.info("Hadoop Cache => "+ DistributedCache.getCacheFiles(config));
		if (DistributedCache.getLocalCacheFiles(config) != null) {
			localFiles = DistributedCache.getLocalCacheFiles(config);
		}
		logger.info("LocalFiles => " + localFiles);
	} catch (Exception e) {
		e.printStackTrace();
	}
	return localFiles;
}
 
Example 13
Source File: FileCache.java    From Cubert with Apache License 2.0 4 votes vote down vote up
public static void initialize(Configuration conf) throws IOException
{
    FileCache.conf = conf;
    cachedFiles = DistributedCache.getLocalCacheFiles(conf);
}
 
Example 14
Source File: AccumuloMrGeoRangePartitioner.java    From mrgeo with Apache License 2.0 4 votes vote down vote up
@SuppressFBWarnings(value = "PATH_TRAVERSAL_IN", justification = "Cutpoints file generated by code")
private synchronized TileIdWritable[] getCutPoints() throws IOException
{
  if (cutPointArray == null)
  {
    String cutFileName = conf.get(CUTFILE_KEY);
    Path[] cf = DistributedCache.getLocalCacheFiles(conf);

    if (cf != null)
    {
      for (Path path : cf)
      {
        if (path.toUri().getPath().endsWith(cutFileName.substring(cutFileName.lastIndexOf('/'))))
        {
          TreeSet<Text> cutPoints = new TreeSet<Text>();
          try (Scanner in = new Scanner(new BufferedReader(new FileReader(path.toString()))))
          {
            while (in.hasNextLine())
            {
              cutPoints.add(new Text(Base64Utils.decodeToString(in.nextLine())));
            }
          }
          catch (ClassNotFoundException e)
          {
            throw new IOException("Error decoding cutpoints", e);
          }
          cutPointArray = cutPoints.toArray(new Text[cutPoints.size()]);
          break;
        }
      }
    }
    if (cutPointArray == null)
    {
      throw new FileNotFoundException(cutFileName + " not found in distributed cache");
    }
  }
  tileIdPointArray = new TileIdWritable[cutPointArray.length];
  for (int x = 0; x < cutPointArray.length; x++)
  {
    byte[] b = cutPointArray[x].getBytes();
    ByteBuffer buffer = ByteBuffer.wrap(b);
    long k = buffer.getLong();
    tileIdPointArray[x] = new TileIdWritable(k);
  }

  return tileIdPointArray;
}
 
Example 15
Source File: GroupedKeyRangePartitioner.java    From accumulo-recipes with Apache License 2.0 4 votes vote down vote up
private synchronized Text[] getCutPoints() throws IOException {
    if (cutPointArray == null) {

        Path[] cf = DistributedCache.getLocalCacheFiles(conf);
        if (cf != null) {
            Map<String, String> curFilesAndGroups = getCurFilesAndGroups();
            SortedMap<String, SortedSet<String>> cutPointMap = new TreeMap<String, SortedSet<String>>();
            for (Path path : cf) {
                String group = null;
                for (Map.Entry<String, String> groupSplits : curFilesAndGroups.entrySet()) {
                    if (path.toString().endsWith(groupSplits.getKey()))
                        group = groupSplits.getValue();
                }


                if (group != null) {
                    Scanner in = new Scanner(new BufferedReader(new FileReader(path.toString())));

                    try {
                        while (in.hasNextLine()) {
                            String split = new String(Base64.decodeBase64(in.nextLine().getBytes()));

                            SortedSet<String> splits = cutPointMap.get(group);
                            if (splits == null) {
                                splits = new TreeSet<String>();
                                cutPointMap.put(group, splits);
                            }
                        }

                        SortedSet<Text> treeSet = new TreeSet<Text>();
                        for (Map.Entry<String, SortedSet<String>> entry : cutPointMap.entrySet()) {
                            treeSet.add(new Text(entry.getKey() + NULL_BYTE + NULL_BYTE));

                            for (String string : entry.getValue())
                                treeSet.add(new Text(entry.getKey() + NULL_BYTE + string));

                            treeSet.add(new Text(entry.getKey() + NULL_BYTE + END_BYTE));
                        }

                        cutPointArray = treeSet.toArray(new Text[]{});
                    } finally {
                        in.close();
                    }

                    break;
                } else {
                    throw new FileNotFoundException("A file was not found in distribution cache files: " + path.toString());
                }
            }
        }
    }
    return cutPointArray;
}