Java Code Examples for org.apache.hadoop.filecache.DistributedCache#getLocalCacheFiles()

The following examples show how to use org.apache.hadoop.filecache.DistributedCache#getLocalCacheFiles() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MapJoin.java From BigData-In-Practice with Apache License 2.0

6 votes

@Override
protected void setup(Mapper<LongWritable, Text, NullWritable, Emp_Dep>.Context context) throws IOException, InterruptedException {
    // 预处理把要关联的文件加载到缓存中
    Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
    // 我们这里只缓存了一个文件，所以取第一个即可，创建BufferReader去读取
    BufferedReader reader = new BufferedReader(new FileReader(paths[0].toString()));

    String str = null;
    try {
        // 一行一行读取
        while ((str = reader.readLine()) != null) {
            // 对缓存中的表进行分割
            String[] splits = str.split("\t");
            // 把字符数组中有用的数据存在一个Map中
            joinData.put(Integer.parseInt(splits[0]), splits[1]);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        reader.close();
    }

}

Example 2

Source File: CrossProductOperation.java From incubator-retired-mrql with Apache License 2.0

6 votes

@Override
protected void setup ( Context context ) throws IOException,InterruptedException {
    super.setup(context);
    try {
        conf = context.getConfiguration();
        Plan.conf = conf;
        Config.read(Plan.conf);
        Tree code = Tree.parse(conf.get("mrql.reducer"));
        reduce_fnc = functional_argument(conf,code);
        code = Tree.parse(conf.get("mrql.mapper"));
        map_fnc = functional_argument(conf,code);
        if (conf.get("mrql.zero") != null) {
            code = Tree.parse(conf.get("mrql.zero"));
            result = Interpreter.evalE(code);
            code = Tree.parse(conf.get("mrql.accumulator"));
            acc_fnc = functional_argument(conf,code);
        } else result = null;
        counter = conf.get("mrql.counter");
        uris = DistributedCache.getCacheFiles(conf);
        local_paths = DistributedCache.getLocalCacheFiles(conf);
        index = 0;
    } catch (Exception e) {
        throw new Error("Cannot setup the crossProduct: "+e);
    }
}

Example 3

Source File: AvroDistributedCacheFileReader.java From ml-ease with Apache License 2.0

6 votes

@Override
protected List<Path> getPaths(String filePath) throws IOException
{
  Path[] localFiles = DistributedCache.getLocalCacheFiles(getConf());
  List<Path> paths = new ArrayList<Path>();
  
  for (Path file: localFiles)
  {
    if (!file.toString().contains(filePath))
    {
      continue;
    }
    
    paths.add(file);
  }
    
  return paths;
}

Example 4

Source File: L2.java From spork with Apache License 2.0

6 votes

public void configure(JobConf conf) {
    try {
        Path[] paths = DistributedCache.getLocalCacheFiles(conf);
        if (paths == null || paths.length < 1) {
            throw new RuntimeException("DistributedCache no work.");
        }

        // Open the small table
        BufferedReader reader =
            new BufferedReader(new InputStreamReader(new
            FileInputStream(paths[0].toString())));
        String line;
        hash = new HashSet<String>(500);
        while ((line = reader.readLine()) != null) {
            if (line.length() < 1) continue;
            String[] fields = line.split("");
            hash.add(fields[0]);
        }
        reader.close();
    } catch (IOException ioe) {
        throw new RuntimeException(ioe);
    }
}

Example 5

Source File: ReplicatedUserJoin.java From hadoop-map-reduce-patterns with Apache License 2.0

6 votes

public void setup(Context context) throws IOException,
		InterruptedException {
	Path[] files = DistributedCache.getLocalCacheFiles(context
			.getConfiguration());
	// Read all files in the DistributedCache
	for (Path p : files) {
		BufferedReader rdr = new BufferedReader(new InputStreamReader(
				new GZIPInputStream(new FileInputStream(new File(
						p.toString())))));
		String line = null;
		// For each record in the user file
		while ((line = rdr.readLine()) != null) {
			// Get the user ID for this record
			Map<String, String> parsed = MRDPUtils
					.transformXmlToMap(line);
			String userId = parsed.get("Id");
			// Map the user ID to the record
			userIdToInfo.put(userId, line);
		}
		rdr.close();
	}
	// Get the join type from the configuration
	joinType = context.getConfiguration().get("join.type");
}

Example 6

Source File: MapFeatures.java From hadoop-book with Apache License 2.0

6 votes

@Override
public void configure(JobConf job) {
    caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
    inputFile = job.get("map.input.file");

    if (job.getBoolean("wordcount.skip.patterns", false)) {
        Path[] patternsFiles = new Path[0];
        try {
            patternsFiles = DistributedCache.getLocalCacheFiles(job);
        } catch (IOException ioe) {
            System.err.println("Caught exception getting cached files: "
                    + StringUtils.stringifyException(ioe));
        }
        for (Path patternsFile : patternsFiles) {
            parseSkipFile(patternsFile);
        }
    }
}

Example 7

Source File: BloomJoin.java From hiped2 with Apache License 2.0

5 votes

@Override
protected void setup(
    Context context)
    throws IOException, InterruptedException {

  Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
  filter = BloomFilterDumper.fromFile(
      new File(files[0].toString()));

  System.out.println("Filter = " + filter);
}

Example 8

Source File: DistributedCacheHelper.java From datafu with Apache License 2.0

5 votes

/**
 * Deserializes an object from a path in HDFS.
 * 
 * @param conf Hadoop configuration
 * @param path Path to deserialize from
 * @return Deserialized object
 * @throws IOException IOException
 */
public static Object readObject(Configuration conf, org.apache.hadoop.fs.Path path) throws IOException
{
  String localPath = null;
  Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(conf);
  for (Path localCacheFile : localCacheFiles)
  {
    if (localCacheFile.getName().endsWith(path.getName()))
    {
      localPath = localCacheFile.getName();
      break;
    }
  }
  if (localPath == null)
  {
    throw new RuntimeException("Could not find " + path + " in local cache");
  }
  FileInputStream inputStream = new FileInputStream(new File(localPath));
  ObjectInputStream objStream = new ObjectInputStream(inputStream);
  
  try
  {
    try {
      return objStream.readObject();
    } catch (ClassNotFoundException e) {
      throw new RuntimeException(e);
    }
  }
  finally
  {
    objStream.close();
    inputStream.close();
  }
}

Example 9

Source File: BasicJobChaining.java From hadoop-map-reduce-patterns with Apache License 2.0

5 votes

protected void setup(Context context) throws IOException, InterruptedException {
	average = getAveragePostsPerUser(context.getConfiguration());
	mos = new MultipleOutputs<Text, Text>(context);

	try {
		Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());

		if (files == null || files.length == 0) {
			throw new RuntimeException("User information is not set in DistributedCache");
		}

		// Read all files in the DistributedCache
		for (Path p : files) {
			BufferedReader rdr = new BufferedReader(new InputStreamReader(
					new GZIPInputStream(new FileInputStream(new File(p.toString())))));

			String line;
			// For each record in the user file
			while ((line = rdr.readLine()) != null) {

				// Get the user ID and reputation
				Map<String, String> parsed = MRDPUtils.transformXmlToMap(line);
				String userId = parsed.get("Id");
				String reputation = parsed.get("Reputation");

				if (userId != null && reputation != null) {
					// Map the user ID to the reputation
					userIdToReputation.put(userId, reputation);
				}
			}
		}

	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}

Example 10

Source File: ReduceSideJoinBloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0

5 votes

public void setup(Context context) throws IOException {
	Path[] files = DistributedCache.getLocalCacheFiles(context
			.getConfiguration());
	DataInputStream strm = new DataInputStream(new FileInputStream(
			new File(files[0].toString())));
	bfilter.readFields(strm);
}

Example 11

Source File: BloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0

5 votes

@Override
public void setup(Context context) throws IOException,
		InterruptedException {
	Path[] files = DistributedCache.getLocalCacheFiles(context
			.getConfiguration());
	System.out.println("Reading Bloom filter from: " + files[0]);

	DataInputStream stream = new DataInputStream(new FileInputStream(
			files[0].toString()));
	filter.readFields(stream);
	stream.close();
}

Example 12

Source File: JobLibLoader.java From SpyGlass with Apache License 2.0

5 votes

public static Path[] getFileFromCache(String libPathStr,
		Configuration config) {
	Path[] localFiles = null;
	try {
		logger.info("Local Cache => " + DistributedCache.getLocalCacheFiles(config));
		logger.info("Hadoop Cache => "+ DistributedCache.getCacheFiles(config));
		if (DistributedCache.getLocalCacheFiles(config) != null) {
			localFiles = DistributedCache.getLocalCacheFiles(config);
		}
		logger.info("LocalFiles => " + localFiles);
	} catch (Exception e) {
		e.printStackTrace();
	}
	return localFiles;
}

Example 13

Source File: FileCache.java From Cubert with Apache License 2.0

4 votes

public static void initialize(Configuration conf) throws IOException
{
    FileCache.conf = conf;
    cachedFiles = DistributedCache.getLocalCacheFiles(conf);
}

Example 14

Source File: AccumuloMrGeoRangePartitioner.java From mrgeo with Apache License 2.0

4 votes

@SuppressFBWarnings(value = "PATH_TRAVERSAL_IN", justification = "Cutpoints file generated by code")
private synchronized TileIdWritable[] getCutPoints() throws IOException
{
  if (cutPointArray == null)
  {
    String cutFileName = conf.get(CUTFILE_KEY);
    Path[] cf = DistributedCache.getLocalCacheFiles(conf);

    if (cf != null)
    {
      for (Path path : cf)
      {
        if (path.toUri().getPath().endsWith(cutFileName.substring(cutFileName.lastIndexOf('/'))))
        {
          TreeSet<Text> cutPoints = new TreeSet<Text>();
          try (Scanner in = new Scanner(new BufferedReader(new FileReader(path.toString()))))
          {
            while (in.hasNextLine())
            {
              cutPoints.add(new Text(Base64Utils.decodeToString(in.nextLine())));
            }
          }
          catch (ClassNotFoundException e)
          {
            throw new IOException("Error decoding cutpoints", e);
          }
          cutPointArray = cutPoints.toArray(new Text[cutPoints.size()]);
          break;
        }
      }
    }
    if (cutPointArray == null)
    {
      throw new FileNotFoundException(cutFileName + " not found in distributed cache");
    }
  }
  tileIdPointArray = new TileIdWritable[cutPointArray.length];
  for (int x = 0; x < cutPointArray.length; x++)
  {
    byte[] b = cutPointArray[x].getBytes();
    ByteBuffer buffer = ByteBuffer.wrap(b);
    long k = buffer.getLong();
    tileIdPointArray[x] = new TileIdWritable(k);
  }

  return tileIdPointArray;
}

Example 15

Source File: GroupedKeyRangePartitioner.java From accumulo-recipes with Apache License 2.0

4 votes

private synchronized Text[] getCutPoints() throws IOException {
    if (cutPointArray == null) {

        Path[] cf = DistributedCache.getLocalCacheFiles(conf);
        if (cf != null) {
            Map<String, String> curFilesAndGroups = getCurFilesAndGroups();
            SortedMap<String, SortedSet<String>> cutPointMap = new TreeMap<String, SortedSet<String>>();
            for (Path path : cf) {
                String group = null;
                for (Map.Entry<String, String> groupSplits : curFilesAndGroups.entrySet()) {
                    if (path.toString().endsWith(groupSplits.getKey()))
                        group = groupSplits.getValue();
                }


                if (group != null) {
                    Scanner in = new Scanner(new BufferedReader(new FileReader(path.toString())));

                    try {
                        while (in.hasNextLine()) {
                            String split = new String(Base64.decodeBase64(in.nextLine().getBytes()));

                            SortedSet<String> splits = cutPointMap.get(group);
                            if (splits == null) {
                                splits = new TreeSet<String>();
                                cutPointMap.put(group, splits);
                            }
                        }

                        SortedSet<Text> treeSet = new TreeSet<Text>();
                        for (Map.Entry<String, SortedSet<String>> entry : cutPointMap.entrySet()) {
                            treeSet.add(new Text(entry.getKey() + NULL_BYTE + NULL_BYTE));

                            for (String string : entry.getValue())
                                treeSet.add(new Text(entry.getKey() + NULL_BYTE + string));

                            treeSet.add(new Text(entry.getKey() + NULL_BYTE + END_BYTE));
                        }

                        cutPointArray = treeSet.toArray(new Text[]{});
                    } finally {
                        in.close();
                    }

                    break;
                } else {
                    throw new FileNotFoundException("A file was not found in distribution cache files: " + path.toString());
                }
            }
        }
    }
    return cutPointArray;
}