org.kitesdk.data.DatasetNotFoundException Java Examples

The following examples show how to use org.kitesdk.data.DatasetNotFoundException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HBaseMetadataProvider.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public DatasetDescriptor load(String namespace, String name) {
  Preconditions.checkArgument(DEFAULT_NAMESPACE.equals(namespace),
      "Non-default namespaces are not supported");
  Preconditions.checkNotNull(name, "Dataset name cannot be null");

  if (!exists(namespace, name)) {
    throw new DatasetNotFoundException("No such dataset: " + name);
  }
  String tableName = getTableName(name);
  String entityName = getEntityName(name);
  return new DatasetDescriptor.Builder()
      .schemaLiteral(schemaManager.getEntitySchema(tableName, entityName)
          .getRawSchema())
      .build();
}
 
Example #2
Source File: HiveAbstractMetadataProvider.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Checks whether the Hive table {@code namespace.name} exists or if
 * {@code default.name} exists and should be used.
 *
 * @param namespace the requested namespace
 * @param name the table name
 * @param location location that should match or null to check the default
 * @return if namespace.name exists, namespace. if not and default.name
 *          exists, then default. {@code null} otherwise.
 */
protected String resolveNamespace(String namespace, String name,
                                  @Nullable URI location) {
  if (getMetaStoreUtil().exists(namespace, name)) {
    return namespace;
  }
  try {
    DatasetDescriptor descriptor = HiveUtils.descriptorForTable(
        conf, getMetaStoreUtil().getTable(URIBuilder.NAMESPACE_DEFAULT, name));
    URI expectedLocation = location;
    if (location == null) {
      expectedLocation = expectedLocation(namespace, name);
    }
    if ((expectedLocation == null) ||
        pathsEquivalent(expectedLocation, descriptor.getLocation())) {
      // table in the default db has the location that would have been used
      return URIBuilder.NAMESPACE_DEFAULT;
    }
    // fall through and return null
  } catch (DatasetNotFoundException e) {
    // fall through and return null
  }
  return null;
}
 
Example #3
Source File: FileSystemMetadataProvider.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * This method provides backward-compatibility for finding metadata.
 * <p>
 * This handles the case where an existing program is opening a
 * DatasetRepository by URI. For example, the DatasetSink and maven plugin do
 * this. In that case, the repository URI will directly contain a directory
 * named for the dataset with .metadata in it. This checks for the updated
 * scheme and falls back to the old scheme if the namespace is "default".
 *
 * @param namespace the requested namespace.
 * @param name the dataset name.
 * @return a Path to the correct metadata directory
 * @throws DatasetNotFoundException if neither location has metadata
 */
private Path find(String namespace, String name) {
  Path expectedPath = pathForMetadata(namespace, name);
  if (DEFAULT_NAMESPACE.equals(namespace)) {
    // when using the default namespace, the namespace may not be in the path
    try {
      checkExists(rootFileSystem, expectedPath);
      return expectedPath;
    } catch (DatasetNotFoundException e) {
      try {
        Path backwardCompatiblePath = new Path(rootDirectory, new Path(
            name.replace('.', Path.SEPARATOR_CHAR), METADATA_DIRECTORY));
        checkExists(rootFileSystem, backwardCompatiblePath);
        return backwardCompatiblePath;
      } catch (DatasetNotFoundException _) {
        throw e; // throw the original
      }
    }

  } else {
    // no need to check other locations
    checkExists(rootFileSystem, expectedPath);
    return expectedPath;
  }
}
 
Example #4
Source File: TestManagedExternalHandling.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testExternalWithManaged() {
  HiveAbstractMetadataProvider provider = new HiveManagedMetadataProvider(
      new HiveConf());
  Assert.assertTrue(provider.isManaged("default", "managed"));

  Dataset<GenericData.Record> dataset = external.load("default", "managed");
  Assert.assertNotNull("Should open managed dataset with external", dataset);
  Assert.assertEquals("Should match managed dataset",
      managed.load("default", "managed").getDescriptor(), dataset.getDescriptor());

  DatasetDescriptor updatedDescriptor =
      new DatasetDescriptor.Builder(dataset.getDescriptor())
          .property("kite.writer.cache-size", "34")
          .schemaLiteral("\"string\"")
          .build();

  Dataset<GenericData.Record> updated = external
      .update("default", "managed", updatedDescriptor);
  Assert.assertNotNull("Should update managed dataset with external",
      updated);
  Assert.assertEquals("Should see changes in managed dataset",
      managed.load("default", "managed").getDescriptor(), updated.getDescriptor());

  Assert.assertTrue("Should delete managed tables with external",
      external.delete("default", "managed"));
  TestHelpers.assertThrows("Should delete managed table correctly",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          managed.load("default", "managed");
        }
      });
}
 
Example #5
Source File: TestHBaseDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingDataset() {
  TestHelpers.assertThrows("Should not find dataset: no such dataset",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Dataset<Object> ds = Datasets
              .<Object, Dataset<Object>>load("dataset:hbase:" + zk + "/nosuchdataset", Object.class);
        }
      }
  );
}
 
Example #6
Source File: TestHBaseDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingRepository() {
  TestHelpers.assertThrows("Should not find dataset: unknown storage scheme",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Dataset<Object> ds = Datasets
              .<Object, Dataset<Object>>load("dataset:unknown:" + zk + "/test", Object.class);
        }
      });
}
 
Example #7
Source File: HiveAbstractMetadataProvider.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public DatasetDescriptor load(String namespace, String name) {
  Compatibility.checkDatasetName(namespace, name);

  String resolved = resolveNamespace(namespace, name);
  if (resolved != null) {
    return HiveUtils.descriptorForTable(
        conf, getMetaStoreUtil().getTable(resolved, name));
  }
  throw new DatasetNotFoundException(
      "Hive table not found: " + namespace + "." + name);
}
 
Example #8
Source File: HiveAbstractMetadataProvider.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) {
  Compatibility.checkDatasetName(namespace, name);
  Compatibility.checkDescriptor(descriptor);

  String resolved = resolveNamespace(namespace, name);
  if (resolved != null) {
    Table table = getMetaStoreUtil().getTable(resolved, name);

    Path managerPath = new Path(new Path(table.getSd().getLocation()),
        SCHEMA_DIRECTORY);

    SchemaManager manager = SchemaManager.create(conf, managerPath);

    DatasetDescriptor newDescriptor;

    try {
      URI schemaURI = manager.writeSchema(descriptor.getSchema());

      newDescriptor = new DatasetDescriptor.Builder(descriptor)
          .schemaUri(schemaURI).build();

    } catch (IOException e) {
      throw new DatasetIOException("Unable to create schema", e);
    }

    HiveUtils.updateTableSchema(table, newDescriptor);
    getMetaStoreUtil().alterTable(table);
    return descriptor;
  }
  throw new DatasetNotFoundException(
      "Hive table not found: " + namespace + "." + name);
}
 
Example #9
Source File: HiveAbstractDatasetRepository.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public boolean delete(String namespace, String name) {
  try {
    if (isManaged(namespace, name)) {
      // avoids calling fsRepository.delete, which deletes the data path
      return getMetadataProvider().delete(namespace, name);
    }
    return super.delete(namespace, name);
  } catch (DatasetNotFoundException e) {
    return false;
  }
}
 
Example #10
Source File: HiveAbstractDatasetRepository.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public boolean moveToTrash(String namespace, String name) {
  try {
    if (isManaged(namespace, name)) {
      // avoids calling fsRepository.delete, which deletes the data path
      // managed tables by default go to trash if it is enabled so call delete
      return getMetadataProvider().delete(namespace, name);
    }
    return super.moveToTrash(namespace, name);
  } catch (DatasetNotFoundException e) {
    return false;
  }
}
 
Example #11
Source File: TestManagedExternalHandling.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testManagedWithExternal() {
  HiveAbstractMetadataProvider provider = new HiveManagedMetadataProvider(
      new HiveConf());
  Assert.assertTrue(provider.isExternal("ns", "external"));

  Dataset<GenericData.Record> dataset = managed.load("ns", "external");
  Assert.assertNotNull("Should open external dataset with managed", dataset);
  Assert.assertEquals("Should match external dataset",
      external.load("ns", "external").getDescriptor(), dataset.getDescriptor());

  DatasetDescriptor updatedDescriptor =
      new DatasetDescriptor.Builder(dataset.getDescriptor())
          .property("kite.writer.cache-size", "34")
          .schemaLiteral("\"string\"")
          .build();

  Dataset<GenericData.Record> updated = managed
      .update("ns", "external", updatedDescriptor);
  Assert.assertNotNull("Should update external dataset with managed",
      updated);
  Assert.assertEquals("Should see changes in external dataset",
      external.load("ns", "external").getDescriptor(), updated.getDescriptor());

  Assert.assertTrue("Should delete external tables with managed",
      managed.delete("ns", "external"));
  TestHelpers.assertThrows("Should delete external table correctly",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          external.load("ns", "external");
        }
      });
}
 
Example #12
Source File: MemoryMetadataProvider.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) {
  Preconditions.checkNotNull(namespace, "Namespace cannot be null");
  Preconditions.checkNotNull(name, "Name cannot be null");
  Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");

  if (!exists(namespace, name)) {
    throw new DatasetNotFoundException("Missing dataset:" + name);
  }

  descriptors.get(namespace).put(name, descriptor);

  return descriptor;
}
 
Example #13
Source File: TestExternalBackwardCompatibility.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testLoadChecksDefaultNamespace() {
  Assert.assertNotNull("Should find dataset by checking default db",
      Datasets.load("dataset:hive:/tmp/datasets/test"));
  TestHelpers.assertThrows("Should not load dataset (there isn't one)",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Datasets.load("dataset:hive:/tmp/datasets/test2");
        }
      });
}
 
Example #14
Source File: TestHiveDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingNamespace() {
  TestHelpers.assertThrows("Should not find namespace: no such namespace",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Datasets.load("dataset:hive:/tmp/data/nosuchnamespace/nosuchdataset?" + hdfsQueryArgs, Object.class);
        }
      });
}
 
Example #15
Source File: TestHiveDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingDataset() {
  TestHelpers.assertThrows("Should not find dataset: no such dataset",
      DatasetNotFoundException.class, new Runnable() {
    @Override
    public void run() {
      Datasets.load("dataset:hive:/tmp/data/default/nosuchdataset?" + hdfsQueryArgs, Object.class);
    }
  });
}
 
Example #16
Source File: TestHiveDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testExternalNotEnoughPathComponents() {
  TestHelpers.assertThrows("Should not match URI pattern",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Datasets.load("dataset:hive:/test", Object.class);
        }
      });
}
 
Example #17
Source File: TestHiveDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingRepository() {
  TestHelpers.assertThrows("Should not find dataset: unknown storage scheme",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Datasets.load("dataset:unknown://" + hdfsAuth + "/tmp/data/test", Object.class);
        }
      });
}
 
Example #18
Source File: TestHiveDatasetURIsWithDefaultConfiguration.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingDataset() {
  TestHelpers.assertThrows("Should not find dataset: no such dataset",
      DatasetNotFoundException.class, new Runnable() {
    @Override
    public void run() {
      Datasets.load("dataset:hive:/tmp/data/ns/nosuchdataset");
    }
  });
}
 
Example #19
Source File: TestHiveDatasetURIsWithDefaultConfiguration.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingRepository() {
  TestHelpers.assertThrows("Should not find dataset: unknown storage scheme",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Datasets.load("dataset:unknown:/tmp/data/ns/test");
        }
      });
}
 
Example #20
Source File: TestLocalDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingNamespace() {
  TestHelpers.assertThrows("Should not find dataset: no such namespace",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Dataset<Record> ds = Datasets.<Record, Dataset<Record>>
              load("dataset:file:/tmp/data/nosuchnamespace/test", Record.class);
        }
      });
}
 
Example #21
Source File: TestCSVImportCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingDataset() throws Exception {
  command.targets = Lists.newArrayList(sample, "notadataset");
  TestHelpers.assertThrows("Should complain about missing dataset",
      DatasetNotFoundException.class, new Callable() {
        @Override
        public Object call() throws Exception {
          command.run();
          return null;
        }
      }
  );
  verify(console).trace(contains("repo:file:target/data"));
  verifyNoMoreInteractions(console);
}
 
Example #22
Source File: TestShowRecordsCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingDataset() throws Exception {
  command.datasets = Lists.newArrayList("notadataset");
  TestHelpers.assertThrows("Should complain about missing dataset",
      DatasetNotFoundException.class, new Callable() {
        @Override
        public Object call() throws Exception {
          command.run();
          return null;
        }
      }
  );
  verify(console).trace(contains("repo:file:target/data"));
  verifyNoMoreInteractions(console);
}
 
Example #23
Source File: FileSystemMetadataProvider.java    From kite with Apache License 2.0 5 votes vote down vote up
private boolean deleteWithTrash(String namespace, String name, boolean useTrash){
  Preconditions.checkNotNull(namespace, "Namespace cannot be null");
  Preconditions.checkNotNull(name, "Dataset name cannot be null");

  LOG.debug("Deleting remove metadata name: {}", name);

  Path metadataDirectory;
  try {
    metadataDirectory = find(namespace, name);
  } catch (DatasetNotFoundException _) {
    return false;
  }

  try {
    if (rootFileSystem.exists(metadataDirectory)) {
      if(useTrash){
        if (Trash.moveToAppropriateTrash(rootFileSystem, metadataDirectory, conf)) {
          return true;
        } else {
          throw new IOException("Failed to trash metadata directory:"
                  + metadataDirectory);
        }
      }else {
        if (rootFileSystem.delete(metadataDirectory, true)) {
          return true;
        } else {
          throw new IOException("Failed to delete metadata directory:"
                  + metadataDirectory);
        }
      }
    } else {
      return false;
    }
  } catch (IOException e) {
    throw new DatasetIOException(
            "Unable to find or remove metadata directory:" + metadataDirectory +
                    " for dataset:" + name, e);
  }
}
 
Example #24
Source File: FileSystemMetadataProvider.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public boolean exists(String namespace, String name) {
  Preconditions.checkNotNull(namespace, "Namespace cannot be null");
  Preconditions.checkNotNull(name, "Dataset name cannot be null");

  try {
    find(namespace, name);
    return true;
  } catch (DatasetNotFoundException e) {
    return false;
  }
}
 
Example #25
Source File: FileSystemMetadataProvider.java    From kite with Apache License 2.0 5 votes vote down vote up
/**
 * Precondition-style static validation that a dataset exists
 *
 * @param fs        A FileSystem where the metadata should be stored
 * @param location  The Path where the metadata should be stored
 * @throws org.kitesdk.data.DatasetNotFoundException if the descriptor location is missing
 * @throws org.kitesdk.data.DatasetIOException  if any IOException is thrown
 */
private static void checkExists(FileSystem fs, Path location) {
  try {
    if (!fs.exists(location)) {
      throw new DatasetNotFoundException(
          "Descriptor location does not exist: " + location);
    }
  } catch (IOException ex) {
    throw new DatasetIOException(
        "Cannot access descriptor location: " + location, ex);
  }
}
 
Example #26
Source File: Registration.java    From kite with Apache License 2.0 5 votes vote down vote up
public static Pair<DatasetRepository, Map<String, String>>
    lookupDatasetUri(URI datasetUri) {
  String scheme = datasetUri.getScheme();
  List<String> schemeMatches = Lists.newArrayList();

  for (URIPattern pattern : DATASET_PATTERNS.keySet()) {
    Map<String, String> match = pattern.getMatch(datasetUri);
    if (match != null) {
      return Pair.of(DATASET_PATTERNS.get(pattern).getFromOptions(match), match);
    } else if (pattern.getScheme() != null && pattern.getScheme().equals(scheme)) {
      schemeMatches.add(pattern.getPatternString());
    }
  }

  String message = "Unknown dataset URI pattern: dataset:" + datasetUri;
  if (schemeMatches.isEmpty()) {
    // no known patterns for the scheme, maybe jars are missing
    message += "\nCheck that JARs for " + scheme +
        " datasets are on the classpath";
  } else {
    // show the known patterns in case it's a simple error
    message += "\nKnown patterns for " + scheme + ":\n  dataset:" +
        Joiner.on("\n  dataset:").join(schemeMatches);
  }

  throw new DatasetNotFoundException(message);
}
 
Example #27
Source File: TestLocalDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingDataset() {
  TestHelpers.assertThrows("Should not find dataset: no such dataset",
      DatasetNotFoundException.class, new Runnable() {
    @Override
    public void run() {
      Dataset<Record> ds = Datasets.<Record, Dataset<Record>>
          load("dataset:file:/tmp/data/ns/nosuchdataset", Record.class);
    }
  });
}
 
Example #28
Source File: HdfsOdpsImportJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName,
    String splitByCol) throws ClassNotFoundException, IOException {
  fileType = getInputFileType();

  super.configureInputFormat(job, tableName, tableClassName, splitByCol);

  if (isHCatJob) {
    SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(),
        tableName, job.getConfiguration());
    return;
  } else if (fileType == FileType.AVRO_DATA_FILE) {
    LOG.debug("Configuring for Avro export");
    configureGenericRecordExportInputFormat(job, tableName);
  } else if (fileType == FileType.PARQUET_FILE) {
    LOG.debug("Configuring for Parquet export");
    configureGenericRecordExportInputFormat(job, tableName);
    FileSystem fs = FileSystem.get(job.getConfiguration());
    String uri = "dataset:" + fs.makeQualified(getInputPath());
    Exception caughtException = null;
    try {
      DatasetKeyInputFormat.configure(job).readFrom(uri);
    } catch (DatasetNotFoundException e) {
      LOG.warn(e.getMessage(), e);
      LOG.warn("Trying to get data schema from parquet file directly");
      caughtException = e;
    }
    if (caughtException != null && caughtException instanceof DatasetNotFoundException) {
      DatasetDescriptor descriptor = getDatasetDescriptorFromParquetFile(job, fs, uri);
      Dataset dataset = Datasets.create(uri, descriptor, GenericRecord.class);
      DatasetKeyInputFormat.configure(job).readFrom(dataset);
    }
  }

  FileInputFormat.addInputPath(job, getInputPath());
}
 
Example #29
Source File: TestLocalDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testNotEnoughPathComponents() {
  TestHelpers.assertThrows("Should not match URI pattern",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Dataset<Record> ds = Datasets.<Record, Dataset<Record>>
              load("dataset:file:/test", Record.class);
        }
      });
}
 
Example #30
Source File: TestLocalDatasetURIs.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingRepository() {
  TestHelpers.assertThrows("Should not find dataset: unknown storage scheme",
      DatasetNotFoundException.class, new Runnable() {
        @Override
        public void run() {
          Dataset<Record> ds = Datasets.<Record, Dataset<Record>>
              load("dataset:unknown:/tmp/data/test", Record.class);
        }
      });
}