/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2019 by Hitachi Vantara : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.shim.common; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyInt; import static org.mockito.Matchers.anyShort; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.net.URL; import java.util.Arrays; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import org.apache.commons.vfs2.AllFileSelector; import org.apache.commons.vfs2.FileObject; import org.apache.commons.vfs2.FileSelector; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import org.pentaho.di.core.Const; import org.pentaho.di.core.exception.KettleFileException; import org.pentaho.di.core.vfs.KettleVFS; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.hadoop.shim.common.fs.PathProxy; /** * Test the DistributedCacheUtil */ public class DistributedCacheUtilImplTest { private static String PLUGIN_BASE = null; @BeforeClass public static void setup() throws Exception { PLUGIN_BASE = System.getProperty( Const.PLUGIN_BASE_FOLDERS_PROP ); // Fake out the "plugins" directory for the project's root directory System.setProperty( Const.PLUGIN_BASE_FOLDERS_PROP, KettleVFS.getFileObject( "." ).getURL().toURI().getPath() ); } @AfterClass public static void teardown() { if ( PLUGIN_BASE != null ) { System.setProperty( Const.PLUGIN_BASE_FOLDERS_PROP, PLUGIN_BASE ); } } //@Test( expected = NullPointerException.class ) public void instantiation() { new DistributedCacheUtilImpl(); } @Test public void deleteDirectory() throws Exception { FileObject test = KettleVFS.getFileObject( "bin/test/deleteDirectoryTest" ); test.createFolder(); DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); ch.deleteDirectory( test ); try { assertFalse( test.exists() ); } finally { // Delete the directory with java.io.File if it wasn't removed File f = new File( "bin/test/deleteDirectoryTest" ); if ( f.exists() && !f.delete() ) { throw new IOException( "unable to delete test directory: " + f.getAbsolutePath() ); } } } @Test public void extract_invalid_archive() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); try { ch.extract( KettleVFS.getFileObject( "bogus" ), null ); fail( "expected exception" ); } catch ( IllegalArgumentException ex ) { assertTrue( ex.getMessage().startsWith( "archive does not exist" ) ); } } @Test public void extract_destination_exists() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); FileObject archive = KettleVFS.getFileObject( getClass().getResource( "/pentaho-mapreduce-sample.jar" ).toURI().getPath() ); try { ch.extract( archive, KettleVFS.getFileObject( "." ) ); } catch ( IllegalArgumentException ex ) { assertTrue( ex.getMessage(), "destination already exists".equals( ex.getMessage() ) ); } } @Test public void extractToTemp() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); FileObject archive = KettleVFS.getFileObject( getClass().getResource( "/pentaho-mapreduce-sample.jar" ).toURI().getPath() ); FileObject extracted = ch.extractToTemp( archive ); assertNotNull( extracted ); assertTrue( extracted.exists() ); try { // There should be 3 files and 5 directories inside the root folder (which is the 9th entry) assertTrue( extracted.findFiles( new AllFileSelector() ).length == 9 ); } finally { // clean up after ourself ch.deleteDirectory( extracted ); } } @Test public void extractToTempZipEntriesMixed() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); File dest = File.createTempFile( "entriesMixed", ".zip" ); ZipOutputStream outputStream = new ZipOutputStream( new FileOutputStream( dest ) ); ZipEntry e = new ZipEntry( "zipEntriesMixed" + "/" + "someFile.txt" ); outputStream.putNextEntry( e ); byte[] data = "someOutString".getBytes(); outputStream.write( data, 0, data.length ); outputStream.closeEntry(); e = new ZipEntry( "zipEntriesMixed" + "/" ); outputStream.putNextEntry( e ); outputStream.closeEntry(); outputStream.close(); FileObject archive = KettleVFS.getFileObject( dest.getAbsolutePath() ); FileObject extracted = null; try { extracted = ch.extractToTemp( archive ); } catch ( IOException | KettleFileException e1 ) { e1.printStackTrace(); fail( "Exception not expected in this case" ); } assertNotNull( extracted ); assertTrue( extracted.exists() ); try { // There should be 3 files and 5 directories inside the root folder (which is the 9th entry) assertTrue( extracted.findFiles( new AllFileSelector() ).length == 3 ); } finally { // clean up after ourself ch.deleteDirectory( extracted ); dest.delete(); } } @Test public void extractToTemp_missing_archive() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); try { ch.extractToTemp( null ); fail( "Expected exception" ); } catch ( NullPointerException ex ) { assertEquals( "archive is required", ex.getMessage() ); } } @Test public void findFiles_vfs() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); FileObject testFolder = DistributedCacheTestUtil.createTestFolderWithContent(); try { // Simply test we can find the jar files in our test folder List<String> jars = ch.findFiles( testFolder, "jar" ); assertEquals( 4, jars.size() ); // Look for all files and folders List<String> all = ch.findFiles( testFolder, null ); assertEquals( 15, all.size() ); } finally { testFolder.delete( new AllFileSelector() ); } } @Test public void findFiles_vfs_hdfs() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); URL url = new URL( "http://localhost:8020/path/to/file" ); Configuration conf = mock( Configuration.class ); FileSystem fs = mock( FileSystem.class ); FileObject source = mock( FileObject.class ); Path dest = mock( Path.class ); FileObject hdfsDest = mock( FileObject.class ); Path root = mock( Path.class ); FileObject[] fileObjects = new FileObject[ 12 ]; for ( int i = 0; i < fileObjects.length; i++ ) { URL fileUrl = new URL( "http://localhost:8020/path/to/file/" + i ); FileObject fileObject = mock( FileObject.class ); fileObjects[ i ] = fileObject; doReturn( fileUrl ).when( fileObject ).getURL(); } doReturn( url ).when( source ).getURL(); doReturn( conf ).when( fs ).getConf(); doReturn( 0 ).when( conf ).getInt( any( String.class ), anyInt() ); doReturn( true ).when( source ).exists(); doReturn( fileObjects ).when( hdfsDest ).findFiles( any( FileSelector.class ) ); doReturn( true ).when( fs ).delete( root, true ); doReturn( fileObjects.length ).when( source ).delete( any( AllFileSelector.class ) ); doNothing().when( fs ).copyFromLocalFile( any( Path.class ), any( Path.class ) ); doNothing().when( fs ).setPermission( any( Path.class ), any( FsPermission.class ) ); doReturn( true ).when( fs ).setReplication( any( Path.class ), anyShort() ); try { try { ch.stageForCache( source, fs, dest, true ); List<String> files = ch.findFiles( hdfsDest, null ); assertEquals( 12, files.size() ); } finally { fs.delete( root, true ); } } finally { source.delete( new AllFileSelector() ); } } @Test public void stageForCache_missing_source() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); Path dest = new Path( "bin/test/bogus-destination" ); FileObject bogusSource = KettleVFS.getFileObject( "bogus" ); try { ch.stageForCache( bogusSource, fs, dest, true ); fail( "expected exception when source does not exist" ); } catch ( KettleFileException ex ) { assertEquals( BaseMessages .getString( DistributedCacheUtilImpl.class, "DistributedCacheUtil.SourceDoesNotExist", bogusSource ), ex.getMessage().trim() ); } } @Test public void stageForCache_destination_no_overwrite() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf ); FileObject source = DistributedCacheTestUtil.createTestFolderWithContent(); try { Path root = new Path( "bin/test/stageForCache_destination_exists" ); Path dest = new Path( root, "dest" ); fs.mkdirs( dest ); assertTrue( fs.exists( dest ) ); assertTrue( fs.getFileStatus( dest ).isDir() ); try { ch.stageForCache( source, fs, dest, false ); } catch ( KettleFileException ex ) { assertTrue( ex.getMessage(), ex.getMessage().contains( "Destination exists" ) ); } finally { fs.delete( root, true ); } } finally { source.delete( new AllFileSelector() ); } } @Test public void addCachedFilesToClasspath() throws IOException { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); Configuration conf = new Configuration(); List<Path> files = Arrays.asList( new Path( "a" ), new Path( "b" ), new Path( "c" ) ); ch.addCachedFilesToClasspath( files, conf ); // this check is not needed for each and every shim if ( "true".equals( System.getProperty( "org.pentaho.hadoop.shims.check.symlink", "false" ) ) ) { assertEquals( "yes", conf.get( "mapred.create.symlink" ) ); } for ( Path file : files ) { assertTrue( conf.get( "mapred.cache.files" ).contains( file.toString() ) ); assertTrue( conf.get( "mapred.job.classpath.files" ).contains( file.toString() ) ); } } @Test public void installKettleEnvironment_missing_arguments() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); try { ch.installKettleEnvironment( null, (org.pentaho.hadoop.shim.api.internal.fs.FileSystem) null, null, null, null, "" ); fail( "Expected exception on missing archive" ); } catch ( NullPointerException ex ) { assertEquals( "pmrArchive is required", ex.getMessage() ); } try { ch.installKettleEnvironment( KettleVFS.getFileObject( "." ), (org.pentaho.hadoop.shim.api.internal.fs.FileSystem) null, null, null, null, "" ); fail( "Expected exception on missing archive" ); } catch ( NullPointerException ex ) { assertEquals( "destination is required", ex.getMessage() ); } try { ch.installKettleEnvironment( KettleVFS.getFileObject( "." ), (org.pentaho.hadoop.shim.api.internal.fs.FileSystem) null, new PathProxy( "." ), null, null, "" ); fail( "Expected exception on missing archive" ); } catch ( NullPointerException ex ) { assertEquals( "big data plugin required", ex.getMessage() ); } } @Test( expected = IllegalArgumentException.class ) public void stagePluginsForCache_no_folders() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); ch.stagePluginsForCache( DistributedCacheTestUtil.getLocalFileSystem( new Configuration() ), new Path( "bin/test/plugins-installation-dir" ), null ); } @Test( expected = KettleFileException.class ) public void stagePluginsForCache_invalid_folder() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(); ch.stagePluginsForCache( DistributedCacheTestUtil.getLocalFileSystem( new Configuration() ), new Path( "bin/test/plugins-installation-dir" ), "bin/bogus-plugin-name" ); } @Test public void findPluginFolder() throws Exception { DistributedCacheUtilImpl util = new DistributedCacheUtilImpl(); // Fake out the "plugins" directory for the project's root directory String originalValue = System.getProperty( Const.PLUGIN_BASE_FOLDERS_PROP ); System.setProperty( Const.PLUGIN_BASE_FOLDERS_PROP, KettleVFS.getFileObject( "." ).getURL().toURI().getPath() ); assertTrue( "Should have found plugin dir: bin/", util.findPluginFolder( "bin" ).length > 0 ); assertTrue( "Should be able to find nested plugin dir: bin/test/", util.findPluginFolder( "bin/test" ).length > 0 ); assertTrue( "Should not have found plugin dir: org/", util.findPluginFolder( "org" ).length == 0 ); System.setProperty( Const.PLUGIN_BASE_FOLDERS_PROP, originalValue ); } @Test public void addFilesToClassPath() throws IOException { DistributedCacheUtilImpl util = new DistributedCacheUtilImpl(); Path p1 = new Path( "/testing1" ); Path p2 = new Path( "/testing2" ); Configuration conf = new Configuration(); util.addFileToClassPath( p1, conf ); util.addFileToClassPath( p2, conf ); assertEquals( "/testing1,/testing2", conf.get( "mapred.job.classpath.files" ) ); } @Test public void addFilesToClassPath_custom_path_separator() throws IOException { DistributedCacheUtilImpl util = new DistributedCacheUtilImpl(); Path p1 = new Path( "/testing1" ); Path p2 = new Path( "/testing2" ); Configuration conf = new Configuration(); String originalValue = System.getProperty( "hadoop.cluster.path.separator", ":" ); System.setProperty( "hadoop.cluster.path.separator", "J" ); util.addFileToClassPath( p1, conf ); util.addFileToClassPath( p2, conf ); assertEquals( "/testing1J/testing2", conf.get( "mapred.job.classpath.files" ) ); System.setProperty( "hadoop.cluster.path.separator", originalValue ); } }