Java Code Examples for org.apache.hadoop.fs.FileUtil#copyMerge()

The following examples show how to use org.apache.hadoop.fs.FileUtil#copyMerge() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HDFSTool.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static void mergeIntoSingleFile(String originalDir, String newFile) throws IOException {
	Path pathOrig = new Path(originalDir);
	Path pathNew = new Path(newFile);
	if( !IOUtilFunctions.isSameFileScheme(pathOrig, pathNew) )
		throw new IOException("Cannot merge files into different target file system.");
	FileSystem fs = IOUtilFunctions.getFileSystem(pathOrig);
	FileUtil.copyMerge(fs, pathOrig, fs, pathNew, true, 
		ConfigurationManager.getCachedJobConf(), null);
}
 
Example 2
Source File: HDFSTool.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static void mergeIntoSingleFile(String originalDir, String newFile) throws IOException {
	Path pathOrig = new Path(originalDir);
	Path pathNew = new Path(newFile);
	if( !IOUtilFunctions.isSameFileScheme(pathOrig, pathNew) )
		throw new IOException("Cannot merge files into different target file system.");
	FileSystem fs = IOUtilFunctions.getFileSystem(pathOrig);
	FileUtil.copyMerge(fs, pathOrig, fs, pathNew, true, 
		ConfigurationManager.getCachedJobConf(), null);
}
 
Example 3
Source File: GetMergeFiles.java    From data-polygamy with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException {
    String fromDirectory = args[0];
    String toEventsDirectory = args[1];
    String toOutliersDirectory = args[2];
    String metadataFile = args[3];
    
    // Detecting datasets.
    
    HashSet<String> datasets = new HashSet<String>();
    
    FileReader fileReader = new FileReader(metadataFile);
    BufferedReader bufferedReader = new BufferedReader(fileReader);

    String line;
    while((line = bufferedReader.readLine()) != null) {
        String[] parts = line.split(",");
        datasets.add(parts[0]);
    }    
    bufferedReader.close();
    
    // Downloading relationships.
    
    String relationshipPatternStr = "([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})\\-([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})";
    Pattern relationshipPattern = Pattern.compile(relationshipPatternStr);
    
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileSystem localFS = FileSystem.getLocal(conf);

    for (FileStatus status : fs.listStatus(new Path(fs.getHomeDirectory() + "/" + fromDirectory))) {
        if (!status.isDirectory()) {
            continue;
        }
        Path file = status.getPath();
        
        Matcher m = relationshipPattern.matcher(file.getName());
        if (!m.find()) continue;
        
        String ds1 = m.group(1);
        String ds2 = m.group(2);
        
        if (!datasets.contains(ds1)) continue;
        if (!datasets.contains(ds2)) continue;
        
        for (FileStatus statusDir : fs.listStatus(file)) {
            if (!statusDir.isDirectory()) {
                continue;
            }
            
            Path fromPath = statusDir.getPath();
            String toPathStr;
            if (fromPath.getName().contains("events")) {
                toPathStr = toEventsDirectory + "/" +
                        fromPath.getParent().getName() + "-" + fromPath.getName();
            } else {
                toPathStr = toOutliersDirectory + "/" +
                        fromPath.getParent().getName() + "-" + fromPath.getName();
            }
            Path toPath = new Path(toPathStr);
            
            System.out.println("Copying:");
            System.out.println("  From: " + fromPath.toString());
            System.out.println("  To: " + toPath.toString());
            
            FileUtil.copyMerge(
                    fs, // HDFS File System
                    fromPath, // HDFS path
                    localFS, // Local File System
                    toPath, // Local Path
                    false, // Do not delete HDFS path
                    conf, // Configuration
                    null);
        }
    }
}