org.apache.accumulo.core.client.ClientConfiguration Java Examples

The following examples show how to use org.apache.accumulo.core.client.ClientConfiguration. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IngestMetricsSummaryLoader.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    
    Configuration conf = context.getConfiguration();
    String user = conf.get(MetricsConfig.USER);
    String password = conf.get(MetricsConfig.PASS);
    String instance = conf.get(MetricsConfig.INSTANCE);
    String zookeepers = conf.get(MetricsConfig.ZOOKEEPERS);
    
    useHourlyPrecision = HourlyPrecisionHelper.checkForHourlyPrecisionOption(context.getConfiguration(), log);
    
    try {
        ZooKeeperInstance inst = new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
        Connector con = inst.getConnector(user, new PasswordToken(password));
        ingestScanner = con.createScanner(conf.get(MetricsConfig.INGEST_TABLE, MetricsConfig.DEFAULT_INGEST_TABLE), Authorizations.EMPTY);
    } catch (TableNotFoundException | AccumuloException | AccumuloSecurityException e) {
        throw new IOException(e);
    }
}
 
Example #2
Source File: TabletMetadataConsole.java    From timely with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    try (ConfigurableApplicationContext ctx = new SpringApplicationBuilder(SpringBootstrap.class)
            .bannerMode(Banner.Mode.OFF).web(WebApplicationType.NONE).run(args)) {
        Configuration conf = ctx.getBean(Configuration.class);
        HashMap<String, String> apacheConf = new HashMap<>();
        Accumulo accumuloConf = conf.getAccumulo();
        apacheConf.put("instance.name", accumuloConf.getInstanceName());
        apacheConf.put("instance.zookeeper.host", accumuloConf.getZookeepers());
        ClientConfiguration aconf = ClientConfiguration.fromMap(apacheConf);
        Instance instance = new ZooKeeperInstance(aconf);
        Connector con = instance.getConnector(accumuloConf.getUsername(),
                new PasswordToken(accumuloConf.getPassword()));

        TabletMetadataQuery query = new TabletMetadataQuery(con, conf.getMetricsTable());
        TabletMetadataView view = query.run();

        System.out.println(view.toText(TimeUnit.DAYS));
    }
}
 
Example #3
Source File: GetMetricTableSplitPoints.java    From timely with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        try (ConfigurableApplicationContext ctx = new SpringApplicationBuilder(SpringBootstrap.class)
                .bannerMode(Mode.OFF).web(WebApplicationType.NONE).run(args)) {
            Configuration conf = ctx.getBean(Configuration.class);

            final Map<String, String> properties = new HashMap<>();
            Accumulo accumuloConf = conf.getAccumulo();
            properties.put("instance.name", accumuloConf.getInstanceName());
            properties.put("instance.zookeeper.host", accumuloConf.getZookeepers());
            final ClientConfiguration aconf = ClientConfiguration.fromMap(properties);
            final Instance instance = new ZooKeeperInstance(aconf);
            Connector con = instance.getConnector(accumuloConf.getUsername(),
                    new PasswordToken(accumuloConf.getPassword()));
            Scanner s = con.createScanner(conf.getMetaTable(),
                    con.securityOperations().getUserAuthorizations(con.whoami()));
            try {
                s.setRange(new Range(Meta.METRIC_PREFIX, true, Meta.TAG_PREFIX, false));
                for (Entry<Key, Value> e : s) {
                    System.out.println(e.getKey().getRow().toString().substring(Meta.METRIC_PREFIX.length()));
                }
            } finally {
                s.close();
            }
        }
    }
 
Example #4
Source File: DataStore.java    From qonduit with Apache License 2.0 6 votes vote down vote up
public DataStore(Configuration conf) throws QonduitException {

        try {
            final HashMap<String, String> apacheConf = new HashMap<>();
            Configuration.Accumulo accumuloConf = conf.getAccumulo();
            apacheConf.put("instance.name", accumuloConf.getInstanceName());
            apacheConf.put("instance.zookeeper.host", accumuloConf.getZookeepers());
            final ClientConfiguration aconf = ClientConfiguration.fromMap(apacheConf);
            final Instance instance = new ZooKeeperInstance(aconf);
            connector = instance.getConnector(accumuloConf.getUsername(),
                    new PasswordToken(accumuloConf.getPassword()));
        } catch (Exception e) {
            throw new QonduitException(HttpResponseStatus.INTERNAL_SERVER_ERROR.code(), "Error creating DataStoreImpl",
                    e.getMessage(), e);
        }
    }
 
Example #5
Source File: CBMutationOutputFormatterTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Test
public void testSetZooKeeperInstance() {
    
    CBMutationOutputFormatterTest.logger.info("testSetZooKeeperInstance called...");
    
    try {
        
        Job job = createMockJob();
        String instanceName = "localhost";
        String zooKeepers = "zookeeper";
        
        CBMutationOutputFormatter.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zooKeepers));
        
        Assert.assertTrue("CBMutationOutputFormatter#setZooKeeperInstance failed to set 'Type'",
                        CBMutationOutputFormatterTest.mockedConfiguration.containsValue("ZooKeeperInstance"));
        Assert.assertTrue("CBMutationOutputFormatter#setZooKeeperInstance failed to set 'Type'",
                        this.wasPropertySet(AccumuloOutputFormat.class.getSimpleName(), "ZooKeeperInstance"));
        
    } finally {
        
        CBMutationOutputFormatterTest.logger.info("testSetZooKeeperInstance completed.");
    }
}
 
Example #6
Source File: AbstractAccumuloMRTool.java    From rya with Apache License 2.0 6 votes vote down vote up
/**
 * Sets up Accumulo input for a job: the job receives
 * ({@link org.apache.accumulo.core.data.Key},
 * {@link org.apache.accumulo.core.data.Value}) pairs from the table
 * specified by the configuration (using
 * {@link MRUtils#TABLE_PREFIX_PROPERTY} and
 * {@link MRUtils#TABLE_LAYOUT_PROP}).
 * @param   job     MapReduce Job to configure
 * @throws  AccumuloSecurityException if connecting to Accumulo with the
 *          given username and password fails.
 */
protected void setupAccumuloInput(Job job) throws AccumuloSecurityException {
    // set up accumulo input
    if (!hdfsInput) {
        job.setInputFormatClass(AccumuloInputFormat.class);
    } else {
        job.setInputFormatClass(AccumuloHDFSFileInputFormat.class);
    }
    AccumuloInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(rdfTableLayout, tablePrefix);
    AccumuloInputFormat.setInputTableName(job, tableName);
    AccumuloInputFormat.setScanAuthorizations(job, authorizations);
    if (mock) {
        AccumuloInputFormat.setMockInstance(job, instance);
    } else {
        ClientConfiguration clientConfig = ClientConfiguration.loadDefault()
                .withInstance(instance).withZkHosts(zk);
        AccumuloInputFormat.setZooKeeperInstance(job, clientConfig);
    }
    if (ttl != null) {
        IteratorSetting setting = new IteratorSetting(1, "fi", AgeOffFilter.class.getName());
        AgeOffFilter.setTTL(setting, Long.valueOf(ttl));
        AccumuloInputFormat.addIterator(job, setting);
    }
}
 
Example #7
Source File: AbstractAccumuloMRTool.java    From rya with Apache License 2.0 6 votes vote down vote up
/**
 * Sets up Rya output for a job: allows the job to write
 * {@link RyaStatementWritable} data, which will in turn be input into the
 * configured Rya instance. To perform secondary indexing, use the
 * configuration variables in {@link ConfigUtils}.
 * @param   job Job to configure
 * @throws  AccumuloSecurityException if connecting to Accumulo with the
 *          given username and password fails
 */
protected void setupRyaOutput(Job job) throws AccumuloSecurityException {
    job.setOutputFormatClass(RyaOutputFormat.class);
    job.setOutputValueClass(RyaStatementWritable.class);
    // Specify default visibility of output rows, if given
    RyaOutputFormat.setDefaultVisibility(job, conf.get(MRUtils.AC_CV_PROP));
    // Specify named graph, if given
    RyaOutputFormat.setDefaultContext(job, conf.get(MRUtils.NAMED_GRAPH_PROP));
    // Set the output prefix
    RyaOutputFormat.setTablePrefix(job, tablePrefix);
    // Determine which indexers to use based on the config
    RyaOutputFormat.setFreeTextEnabled(job,  ConfigUtils.getUseFreeText(conf));
    RyaOutputFormat.setTemporalEnabled(job,  ConfigUtils.getUseTemporal(conf));
    RyaOutputFormat.setEntityEnabled(job,  ConfigUtils.getUseEntity(conf));
    // Configure the Accumulo connection
    AccumuloOutputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, tablePrefix + RdfCloudTripleStoreConstants.TBL_SPO_SUFFIX);
    if (mock) {
        RyaOutputFormat.setMockInstance(job, instance);
    } else {
        ClientConfiguration clientConfig = ClientConfiguration.loadDefault()
                .withInstance(instance).withZkHosts(zk);
        AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig);
    }
}
 
Example #8
Source File: MergeTool.java    From rya with Apache License 2.0 6 votes vote down vote up
@Override
protected void setupAccumuloInput(final Job job) throws AccumuloSecurityException {
    // set up accumulo input
    if (!hdfsInput) {
        job.setInputFormatClass(AccumuloInputFormat.class);
    } else {
        job.setInputFormatClass(AccumuloHDFSFileInputFormat.class);
    }
    AbstractInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    InputFormatBase.setInputTableName(job, RdfCloudTripleStoreUtils.layoutPrefixToTable(rdfTableLayout, tablePrefix));
    AbstractInputFormat.setScanAuthorizations(job, authorizations);
    if (!mock) {
        AbstractInputFormat.setZooKeeperInstance(job, new ClientConfiguration().withInstance(instance).withZkHosts(zk));
    } else {
        AbstractInputFormat.setMockInstance(job, instance);
    }
    if (ttl != null) {
        final IteratorSetting setting = new IteratorSetting(1, "fi", AgeOffFilter.class);
        AgeOffFilter.setTTL(setting, Long.valueOf(ttl));
        InputFormatBase.addIterator(job, setting);
    }
    for (final IteratorSetting iteratorSetting : AccumuloRyaUtils.COMMON_REG_EX_FILTER_SETTINGS) {
        InputFormatBase.addIterator(job, iteratorSetting);
    }
}
 
Example #9
Source File: AbstractAccumuloMRTool.java    From rya with Apache License 2.0 5 votes vote down vote up
/**
 * Sets up Accumulo output for a job: allows the job to write (String,
 * Mutation) pairs, where the Mutation will be written to the table named by
 * the String.
 * @param   job Job to configure
 * @param   outputTable Default table to send output to
 * @throws  AccumuloSecurityException if connecting to Accumulo with the
 *          given username and password fails
 */
protected void setupAccumuloOutput(Job job, String outputTable) throws AccumuloSecurityException {
    AccumuloOutputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, outputTable);
    if (mock) {
        AccumuloOutputFormat.setMockInstance(job, instance);
    } else {
        ClientConfiguration clientConfig = ClientConfiguration.loadDefault()
                .withInstance(instance).withZkHosts(zk);
        AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig);
    }
    job.setOutputFormatClass(AccumuloOutputFormat.class);
}
 
Example #10
Source File: CopyTool.java    From rya with Apache License 2.0 5 votes vote down vote up
/**
 * Set up job to use AccumuloMultiTableInput format, using the tables/ranges given by a ruleset.
 * @param job The Job to configure
 * @param rules The ruleset mapping a query to the appropriate tables and ranges
 */
protected void setupMultiTableInputFormat(final Job job, final AccumuloQueryRuleset rules) throws AccumuloSecurityException {
    AbstractInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    AbstractInputFormat.setScanAuthorizations(job, authorizations);
    if (!mock) {
        AbstractInputFormat.setZooKeeperInstance(job, new ClientConfiguration().withInstance(instance).withZkHosts(zk));
    } else {
        AbstractInputFormat.setMockInstance(job, instance);
    }
    final Map<String, InputTableConfig> configs = rules.getInputConfigs();
    // Add any relevant iterator settings
    final List<IteratorSetting> additionalSettings = new LinkedList<>(AccumuloRyaUtils.COMMON_REG_EX_FILTER_SETTINGS);
    if (ttl != null) {
        final IteratorSetting ttlSetting = new IteratorSetting(1, "fi", AgeOffFilter.class);
        AgeOffFilter.setTTL(ttlSetting, Long.valueOf(ttl));
        additionalSettings.add(ttlSetting);
    }
    if (startTime != null) {
        final IteratorSetting startTimeSetting = getStartTimeSetting(startTime);
        additionalSettings.add(startTimeSetting);
    }
    for (final Map.Entry<String, InputTableConfig> entry : configs.entrySet()) {
        final List<IteratorSetting> iterators = entry.getValue().getIterators();
        iterators.addAll(additionalSettings);
        entry.getValue().setIterators(iterators);
    }
    // Set the input format
    AccumuloMultiTableInputFormat.setInputTableConfigs(job, configs);
    job.setInputFormatClass(AccumuloMultiTableInputFormat.class);
}
 
Example #11
Source File: MultiRFileOutputFormatter.java    From datawave with Apache License 2.0 5 votes vote down vote up
protected void setTableIdsAndConfigs() throws IOException {
    ZooKeeperInstance instance = new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(conf.get(INSTANCE_NAME))
                    .withZkHosts(conf.get(ZOOKEEPERS)));
    Connector connector = null;
    tableConfigs = new HashMap<>();
    Iterable<String> localityGroupTables = Splitter.on(",").split(conf.get(CONFIGURE_LOCALITY_GROUPS, ""));
    try {
        connector = instance.getConnector(conf.get(USERNAME), new PasswordToken(Base64.decodeBase64(conf.get(PASSWORD))));
        
        tableIds = connector.tableOperations().tableIdMap();
        Set<String> compressionTableBlackList = getCompressionTableBlackList(conf);
        String compressionType = getCompressionType(conf);
        for (String tableName : tableIds.keySet()) {
            ConfigurationCopy tableConfig = new ConfigurationCopy(connector.tableOperations().getProperties(tableName));
            tableConfig.set(Property.TABLE_FILE_COMPRESSION_TYPE.getKey(), (compressionTableBlackList.contains(tableName) ? Compression.COMPRESSION_NONE
                            : compressionType));
            if (Iterables.contains(localityGroupTables, tableName)) {
                Map<String,Set<Text>> localityGroups = connector.tableOperations().getLocalityGroups(tableName);
                // pull the locality groups for this table.
                Map<Text,String> cftlg = Maps.newHashMap();
                Map<String,Set<ByteSequence>> lgtcf = Maps.newHashMap();
                for (Entry<String,Set<Text>> locs : localityGroups.entrySet()) {
                    lgtcf.put(locs.getKey(), new HashSet<>());
                    for (Text loc : locs.getValue()) {
                        cftlg.put(loc, locs.getKey());
                        lgtcf.get(locs.getKey()).add(new ArrayByteSequence(loc.getBytes()));
                    }
                }
                columnFamilyToLocalityGroup.put(tableName, cftlg);
                localityGroupToColumnFamilies.put(tableName, lgtcf);
            }
            tableConfigs.put(tableName, tableConfig);
            
        }
    } catch (AccumuloException | AccumuloSecurityException | TableNotFoundException e) {
        throw new IOException("Unable to get configuration.  Please call MultiRFileOutput.setAccumuloConfiguration with the proper credentials", e);
    }
}
 
Example #12
Source File: AccumuloConfiguration.java    From cognition with Apache License 2.0 5 votes vote down vote up
public AccumuloConfiguration(Instance instance, String accumuloUser, String accumuloPassword,
                             boolean isMock) throws AccumuloSecurityException, IOException {
  //NOTE: new Job(new Configuration()) does not work in scala shell due to the toString method's implementation
  //to get it to work in scala override the toString method and it will work
  
  //initialize fields, these are needed for lazy initialization of connector
  this.zkInstance = instance;
  this.accumuloUser = accumuloUser;
  this.accumuloPassword = accumuloPassword;
  
  this.job = new Job(new Configuration());
  AbstractInputFormat.setConnectorInfo(job, accumuloUser, new PasswordToken(accumuloPassword));
  AccumuloOutputFormat.setConnectorInfo(job, accumuloUser, new PasswordToken(accumuloPassword));

  AbstractInputFormat.setScanAuthorizations(job, new Authorizations());

  if (isMock) {
    AbstractInputFormat.setMockInstance(job, instance.getInstanceName());
    AccumuloOutputFormat.setMockInstance(job, instance.getInstanceName());
  } else {

    this.clientConfig = new ClientConfiguration();
    this.clientConfig.withInstance(instance.getInstanceName());
    this.clientConfig.withZkHosts(instance.getZooKeepers());

    AbstractInputFormat.setZooKeeperInstance(job, clientConfig);
    AccumuloOutputFormat.setZooKeeperInstance(job, this.clientConfig);
  }
}
 
Example #13
Source File: AccumuloGraphConfiguration.java    From vertexium with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public ClientConfiguration getClientConfiguration() {
    ClientConfiguration config = ClientConfiguration.create()
        .withInstance(this.getAccumuloInstanceName())
        .withZkHosts(this.getZookeeperServers());
    for (Map.Entry<String, String> entry : getClientConfigurationProperties().entrySet()) {
        config.setProperty(entry.getKey(), entry.getValue());
    }
    return config;
}
 
Example #14
Source File: PathCalculator.java    From OSTMap with Apache License 2.0 5 votes vote down vote up
/**
 * makes accumulo input accessible by flink DataSet api
 * @param env
 * @return
 * @throws IOException
 * @throws AccumuloSecurityException
 */
// TODO make private after testing
public DataSet<Tuple2<Key,Value>> getDataFromAccumulo(ExecutionEnvironment env) throws IOException, AccumuloSecurityException {
    job = Job.getInstance(new Configuration(), "pathCalculationJob");
    AccumuloInputFormat.setConnectorInfo(job, accumuloUser, new PasswordToken(accumuloPassword));
    AccumuloInputFormat.setScanAuthorizations(job, new Authorizations("standard"));
    ClientConfiguration clientConfig = new ClientConfiguration();
    clientConfig.withInstance(accumuloInstanceName);
    clientConfig.withZkHosts(accumuloZookeeper);
    AccumuloInputFormat.setZooKeeperInstance(job, clientConfig);
    AccumuloInputFormat.setInputTableName(job, inTable);
    return env.createHadoopInput(new AccumuloInputFormat(),Key.class,Value.class, job);
}
 
Example #15
Source File: MetricsDailySummaryReducer.java    From datawave with Apache License 2.0 5 votes vote down vote up
public static void configureJob(Job job, int numDays, String instance, String zookeepers, String userName, String password, String outputTable)
                throws AccumuloSecurityException {
    job.setNumReduceTasks(Math.min(numDays, 100)); // Cap the number of reducers at 100, just in case we have a large day range (shouldn't really happen
                                                   // though)
    job.setReducerClass(MetricsDailySummaryReducer.class);
    job.setOutputFormatClass(AccumuloOutputFormat.class);
    AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
    AccumuloOutputFormat.setConnectorInfo(job, userName, new PasswordToken(password));
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, outputTable);
}
 
Example #16
Source File: Calculator.java    From OSTMap with Apache License 2.0 5 votes vote down vote up
/**
 * makes accumulo input accessible by flink DataSet api
 * @param env
 * @return
 * @throws IOException
 * @throws AccumuloSecurityException
 */
// TODO make private after testing
public DataSet<Tuple2<Key,Value>> getDataFromAccumulo(ExecutionEnvironment env) throws IOException, AccumuloSecurityException {
    job = Job.getInstance(new Configuration(), "areaCalculationJob");
    AccumuloInputFormat.setConnectorInfo(job, accumuloUser, new PasswordToken(accumuloPassword));
    AccumuloInputFormat.setScanAuthorizations(job, new Authorizations("standard"));
    ClientConfiguration clientConfig = new ClientConfiguration();
    clientConfig.withInstance(accumuloInstanceName);
    clientConfig.withZkHosts(accumuloZookeeper);
    AccumuloInputFormat.setZooKeeperInstance(job, clientConfig);
    AccumuloInputFormat.setInputTableName(job, inTable);
    return env.createHadoopInput(new AccumuloInputFormat(),Key.class,Value.class, job);
}
 
Example #17
Source File: GraphXGraphGenerator.java    From rya with Apache License 2.0 4 votes vote down vote up
public RDD<Tuple2<Object, Edge>> getEdgeRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    // Load configuration parameters
    zk = MRUtils.getACZK(conf);
    instance = MRUtils.getACInstance(conf);
    userName = MRUtils.getACUserName(conf);
    pwd = MRUtils.getACPwd(conf);
    mock = MRUtils.getACMock(conf, false);
    tablePrefix = MRUtils.getTablePrefix(conf);
    // Set authorizations if specified
    String authString = conf.get(MRUtils.AC_AUTH_PROP);
    if (authString != null && !authString.isEmpty()) {
        authorizations = new Authorizations(authString.split(","));
        conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
    }
    else {
        authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
    }
    // Set table prefix to the default if not set
    if (tablePrefix == null) {
        tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
        MRUtils.setTablePrefix(conf, tablePrefix);
    }
    // Check for required configuration parameters
    Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
    Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
    Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
    Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
    RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
    // If connecting to real accumulo, set additional parameters and require zookeepers
    if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
    // Ensure consistency between alternative configuration properties
    conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
    conf.set(ConfigUtils.CLOUDBASE_USER, userName);
    conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
    conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);

    Job job = Job.getInstance(conf, sc.appName());

    ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);

    RyaInputFormat.setTableLayout(job, TABLE_LAYOUT.SPO);
    RyaInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    RyaInputFormat.setZooKeeperInstance(job, clientConfig);
    RyaInputFormat.setScanAuthorizations(job, authorizations);
            String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(TABLE_LAYOUT.SPO, tablePrefix);
            InputFormatBase.setInputTableName(job, tableName);
    return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXEdgeInputFormat.class, Object.class, Edge.class);
}
 
Example #18
Source File: GraphXGraphGenerator.java    From rya with Apache License 2.0 4 votes vote down vote up
public RDD<Tuple2<Object, RyaTypeWritable>> getVertexRDD(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{
    // Load configuration parameters
    zk = MRUtils.getACZK(conf);
    instance = MRUtils.getACInstance(conf);
    userName = MRUtils.getACUserName(conf);
    pwd = MRUtils.getACPwd(conf);
    mock = MRUtils.getACMock(conf, false);
    tablePrefix = MRUtils.getTablePrefix(conf);
    // Set authorizations if specified
    String authString = conf.get(MRUtils.AC_AUTH_PROP);
    if (authString != null && !authString.isEmpty()) {
        authorizations = new Authorizations(authString.split(","));
        conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString); // for consistency
    }
    else {
        authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
    }
    // Set table prefix to the default if not set
    if (tablePrefix == null) {
        tablePrefix = RdfCloudTripleStoreConstants.TBL_PRFX_DEF;
        MRUtils.setTablePrefix(conf, tablePrefix);
    }
    // Check for required configuration parameters
    Preconditions.checkNotNull(instance, "Accumulo instance name [" + MRUtils.AC_INSTANCE_PROP + "] not set.");
    Preconditions.checkNotNull(userName, "Accumulo username [" + MRUtils.AC_USERNAME_PROP + "] not set.");
    Preconditions.checkNotNull(pwd, "Accumulo password [" + MRUtils.AC_PWD_PROP + "] not set.");
    Preconditions.checkNotNull(tablePrefix, "Table prefix [" + MRUtils.TABLE_PREFIX_PROPERTY + "] not set.");
    RdfCloudTripleStoreConstants.prefixTables(tablePrefix);
    // If connecting to real accumulo, set additional parameters and require zookeepers
    if (!mock) conf.set(ConfigUtils.CLOUDBASE_ZOOKEEPERS, zk); // for consistency
    // Ensure consistency between alternative configuration properties
    conf.set(ConfigUtils.CLOUDBASE_INSTANCE, instance);
    conf.set(ConfigUtils.CLOUDBASE_USER, userName);
    conf.set(ConfigUtils.CLOUDBASE_PASSWORD, pwd);
    conf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    conf.set(RdfCloudTripleStoreConfiguration.CONF_TBL_PREFIX, tablePrefix);

    Job job = Job.getInstance(conf, sc.appName());

    ClientConfiguration clientConfig = new ClientConfiguration().with(ClientProperty.INSTANCE_NAME, instance).with(ClientProperty.INSTANCE_ZK_HOST, zk);

    GraphXInputFormat.setInputTableName(job, EntityCentricIndex.getTableName(conf));
    GraphXInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
    GraphXInputFormat.setZooKeeperInstance(job, clientConfig);
    GraphXInputFormat.setScanAuthorizations(job, authorizations);

    return sc.newAPIHadoopRDD(job.getConfiguration(), GraphXInputFormat.class, Object.class, RyaTypeWritable.class);
}
 
Example #19
Source File: OSMConversionRunner.java    From geowave with Apache License 2.0 4 votes vote down vote up
@Override
public int run(final String[] args) throws Exception {

  final Configuration conf = getConf();
  final AccumuloRequiredOptions accumuloOptions =
      (AccumuloRequiredOptions) inputStoreOptions.getFactoryOptions();

  // job settings

  final Job job = Job.getInstance(conf, ingestOptions.getJobName() + "NodeConversion");
  job.setJarByClass(OSMConversionRunner.class);

  job.getConfiguration().set("osm_mapping", ingestOptions.getMappingContents());
  job.getConfiguration().set("arguments", ingestOptions.serializeToString());

  if (ingestOptions.getVisibilityOptions().getVisibility() != null) {
    job.getConfiguration().set(
        AbstractMapReduceIngest.GLOBAL_VISIBILITY_KEY,
        ingestOptions.getVisibilityOptions().getVisibility());
  }

  // input format

  AbstractInputFormat.setConnectorInfo(
      job,
      accumuloOptions.getUser(),
      new PasswordToken(accumuloOptions.getPassword()));
  InputFormatBase.setInputTableName(job, ingestOptions.getQualifiedTableName());
  AbstractInputFormat.setZooKeeperInstance(
      job,
      new ClientConfiguration().withInstance(accumuloOptions.getInstance()).withZkHosts(
          accumuloOptions.getZookeeper()));
  AbstractInputFormat.setScanAuthorizations(
      job,
      new Authorizations(ingestOptions.getVisibilityOptions().getVisibility()));

  final IteratorSetting is = new IteratorSetting(50, "WholeRow", WholeRowIterator.class);
  InputFormatBase.addIterator(job, is);
  job.setInputFormatClass(AccumuloInputFormat.class);
  final Range r = new Range();
  // final ArrayList<Pair<Text, Text>> columns = new ArrayList<>();
  InputFormatBase.setRanges(job, Arrays.asList(r));

  // output format
  GeoWaveOutputFormat.setStoreOptions(job.getConfiguration(), inputStoreOptions);
  final AccumuloOptions options = new AccumuloOptions();
  final AdapterStore as =
      new AdapterStoreImpl(
          new AccumuloOperations(
              accumuloOptions.getZookeeper(),
              accumuloOptions.getInstance(),
              accumuloOptions.getUser(),
              accumuloOptions.getPassword(),
              accumuloOptions.getGeoWaveNamespace(),
              options),
          options);
  for (final FeatureDataAdapter fda : FeatureDefinitionSet.featureAdapters.values()) {
    as.addAdapter(fda);
    GeoWaveOutputFormat.addDataAdapter(job.getConfiguration(), fda);
  }

  final Index primaryIndex =
      new SpatialDimensionalityTypeProvider().createIndex(new SpatialOptions());
  GeoWaveOutputFormat.addIndex(job.getConfiguration(), primaryIndex);
  job.getConfiguration().set(AbstractMapReduceIngest.INDEX_NAMES_KEY, primaryIndex.getName());

  job.setOutputFormatClass(GeoWaveOutputFormat.class);
  job.setMapOutputKeyClass(GeoWaveOutputKey.class);
  job.setMapOutputValueClass(SimpleFeature.class);

  // mappper

  job.setMapperClass(OSMConversionMapper.class);

  // reducer
  job.setNumReduceTasks(0);

  return job.waitForCompletion(true) ? 0 : -1;
}
 
Example #20
Source File: OSMRunner.java    From geowave with Apache License 2.0 4 votes vote down vote up
@Override
public int run(final String[] args) throws Exception {

  final Configuration conf = getConf();
  conf.set("tableName", ingestOptions.getQualifiedTableName());
  conf.set("osmVisibility", ingestOptions.getVisibilityOptions().getVisibility());

  // job settings
  final Job job = Job.getInstance(conf, ingestOptions.getJobName());
  job.setJarByClass(OSMRunner.class);

  switch (ingestOptions.getMapperType()) {
    case "NODE": {
      configureSchema(AvroNode.getClassSchema());
      inputAvroFile = ingestOptions.getNodesBasePath();
      job.setMapperClass(OSMNodeMapper.class);
      break;
    }
    case "WAY": {
      configureSchema(AvroWay.getClassSchema());
      inputAvroFile = ingestOptions.getWaysBasePath();
      job.setMapperClass(OSMWayMapper.class);
      break;
    }
    case "RELATION": {
      configureSchema(AvroRelation.getClassSchema());
      inputAvroFile = ingestOptions.getRelationsBasePath();
      job.setMapperClass(OSMRelationMapper.class);
      break;
    }
    default:
      break;
  }
  if ((avroSchema == null) || (inputAvroFile == null)) {
    throw new MissingArgumentException(
        "argument for mapper type must be one of: NODE, WAY, or RELATION");
  }

  enableLocalityGroups(ingestOptions);

  // input format
  job.setInputFormatClass(AvroKeyInputFormat.class);
  FileInputFormat.setInputPaths(job, inputAvroFile);
  AvroJob.setInputKeySchema(job, avroSchema);

  // mappper

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Mutation.class);
  job.setOutputFormatClass(AccumuloOutputFormat.class);
  AccumuloOutputFormat.setConnectorInfo(
      job,
      accumuloOptions.getUser(),
      new PasswordToken(accumuloOptions.getPassword()));
  AccumuloOutputFormat.setCreateTables(job, true);
  AccumuloOutputFormat.setDefaultTableName(job, ingestOptions.getQualifiedTableName());
  AccumuloOutputFormat.setZooKeeperInstance(
      job,
      new ClientConfiguration().withInstance(accumuloOptions.getInstance()).withZkHosts(
          accumuloOptions.getZookeeper()));

  // reducer
  job.setNumReduceTasks(0);

  return job.waitForCompletion(true) ? 0 : -1;
}
 
Example #21
Source File: AccumuloConnectionPoolFactory.java    From datawave with Apache License 2.0 4 votes vote down vote up
public AccumuloConnectionPoolFactory(String username, String password, String zookeepers, String instanceName) {
    this.username = username;
    this.password = password;
    this.instance = new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zookeepers));
}
 
Example #22
Source File: AccumuloHelper.java    From datawave with Apache License 2.0 4 votes vote down vote up
public ClientConfiguration getZookeeperConfig() {
    return ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zooKeepers);
}
 
Example #23
Source File: MetadataHelperUpdateHdfsListener.java    From datawave with Apache License 2.0 4 votes vote down vote up
private void maybeUpdateTypeMetadataInHdfs(final SharedCacheCoordinator watcher, String triStateName, String metadataTableName) throws Exception {
    
    boolean locked = false;
    InterProcessMutex lock = (InterProcessMutex) watcher.getMutex("lock");
    try {
        locked = lock.acquire(this.lockWaitTime, TimeUnit.MILLISECONDS);
        if (!locked)
            log.debug("table:" + metadataTableName + " Unable to acquire lock to update " + metadataTableName
                            + ". Another webserver is updating the typeMetadata.");
        else
            log.debug("table:" + metadataTableName + " Obtained lock on updateTypeMetadata for " + metadataTableName);
    } catch (Exception e) {
        log.warn("table:" + metadataTableName + " Got Exception trying to acquire lock to update " + metadataTableName + ".", e);
    }
    
    try {
        if (locked) {
            try {
                log.debug("table:" + metadataTableName + " checkTriState(" + triStateName + ", " + SharedTriState.STATE.NEEDS_UPDATE);
                if (watcher.checkTriState(triStateName, SharedTriState.STATE.NEEDS_UPDATE)) {
                    if (log.isDebugEnabled()) {
                        log.debug("table:" + metadataTableName + " " + this + " STATE is NEEDS_UPDATE. Will write the TypeMetadata map to hdfs");
                    }
                    watcher.setTriState(triStateName, SharedTriState.STATE.UPDATING);
                    if (log.isDebugEnabled()) {
                        log.debug("table:" + metadataTableName + " " + this + " setTriState to UPDATING");
                    }
                    // get a connection for my MetadataHelper, and get the TypeMetadata map
                    ZooKeeperInstance instance = new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(this.instance)
                                    .withZkHosts(this.zookeepers));
                    Connector connector = instance.getConnector(this.username, new PasswordToken(this.password));
                    TypeMetadataHelper typeMetadataHelper = this.typeMetadataHelperFactory.createTypeMetadataHelper(connector, metadataTableName,
                                    allMetadataAuths, false);
                    typeMetadataWriter.writeTypeMetadataMap(typeMetadataHelper.getTypeMetadataMap(this.allMetadataAuths), metadataTableName);
                    if (log.isDebugEnabled()) {
                        log.debug("table:" + metadataTableName + " " + this + " set the sharedTriState needsUpdate to UPDATED for " + metadataTableName);
                    }
                    watcher.setTriState(triStateName, SharedTriState.STATE.UPDATED);
                } else {
                    if (log.isDebugEnabled()) {
                        log.debug("table:"
                                        + metadataTableName
                                        + " "
                                        + this
                                        + "  STATE is not NEEDS_UPDATE! Someone else may be writing or has already written the TypeMetadata map, just release the lock");
                    }
                }
            } catch (Exception ex) {
                log.warn("table:" + metadataTableName + " Unable to write TypeMetadataMap for " + metadataTableName, ex);
                watcher.setTriState(triStateName, SharedTriState.STATE.NEEDS_UPDATE);
                if (log.isDebugEnabled()) {
                    log.debug("After exception, set the SharedTriState STATE to NEEDS_UPDATE");
                }
                
            }
        }
    } finally {
        if (locked) {
            lock.release();
            if (log.isTraceEnabled())
                log.trace("table:" + metadataTableName + " " + this + " released the lock for " + metadataTableName);
            
        }
    }
}
 
Example #24
Source File: AccumuloRecordWriter.java    From datawave with Apache License 2.0 4 votes vote down vote up
protected static Instance getInstance(Configuration conf) {
    if (conf.getBoolean(MOCK, false)) {
        return new InMemoryInstance(conf.get(INSTANCE_NAME));
    }
    return new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(conf.get(INSTANCE_NAME)).withZkHosts(conf.get(ZOOKEEPERS)));
}
 
Example #25
Source File: Connections.java    From datawave with Apache License 2.0 4 votes vote down vote up
public static Connector warehouseConnection(Configuration c) throws AccumuloException, AccumuloSecurityException {
    final String whZk = c.get(MetricsConfig.WAREHOUSE_ZOOKEEPERS), whInst = c.get(MetricsConfig.WAREHOUSE_INSTANCE), whUser = c
                    .get(MetricsConfig.WAREHOUSE_USERNAME), whPass = c.get(MetricsConfig.WAREHOUSE_PASSWORD);
    return new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(whInst).withZkHosts(whZk)).getConnector(whUser, new PasswordToken(whPass));
}
 
Example #26
Source File: Connections.java    From datawave with Apache License 2.0 4 votes vote down vote up
public static Connector metricsConnection(Configuration c) throws AccumuloException, AccumuloSecurityException {
    final String mtxZk = c.get(MetricsConfig.ZOOKEEPERS), mtxInst = c.get(MetricsConfig.INSTANCE), mtxUser = c.get(MetricsConfig.USER), mtxPass = c
                    .get(MetricsConfig.PASS);
    return new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(mtxInst).withZkHosts(mtxZk)).getConnector(mtxUser, new PasswordToken(
                    mtxPass));
}
 
Example #27
Source File: UpgradeCounterValues.java    From datawave with Apache License 2.0 4 votes vote down vote up
protected void run(String[] args) throws ParseException, AccumuloSecurityException, AccumuloException, TableNotFoundException, IOException {
    parseConfig(args);
    
    ZooKeeperInstance instance = new ZooKeeperInstance(ClientConfiguration.loadDefault().withInstance(instanceName).withZkHosts(zookeepers));
    Connector connector = instance.getConnector(username, new PasswordToken(password));
    Authorizations auths = connector.securityOperations().getUserAuthorizations(connector.whoami());
    
    try (BatchWriter writer = connector.createBatchWriter(tableName, new BatchWriterConfig().setMaxWriteThreads(bwThreads).setMaxMemory(bwMemory)
                    .setMaxLatency(60, TimeUnit.SECONDS));
                    BatchScanner scanner = connector.createBatchScanner(tableName, auths, bsThreads)) {
        scanner.setRanges(ranges);
        
        for (Entry<Key,Value> entry : scanner) {
            Key key = entry.getKey();
            
            ByteArrayDataInput in = ByteStreams.newDataInput(entry.getValue().get());
            Counters counters = new Counters();
            try {
                counters.readFields(in);
            } catch (IOException e) {
                // The IO exception means the counters are in the wrong format. We *assume* that they are in
                // the old (CDH3) format, and de-serialize according to that, and re-write the key with the new value.
                in = ByteStreams.newDataInput(entry.getValue().get());
                int numGroups = in.readInt();
                while (numGroups-- > 0) {
                    String groupName = Text.readString(in);
                    String groupDisplayName = Text.readString(in);
                    CounterGroup group = counters.addGroup(groupName, groupDisplayName);
                    
                    int groupSize = WritableUtils.readVInt(in);
                    for (int i = 0; i < groupSize; i++) {
                        String counterName = Text.readString(in);
                        String counterDisplayName = counterName;
                        if (in.readBoolean())
                            counterDisplayName = Text.readString(in);
                        long value = WritableUtils.readVLong(in);
                        group.addCounter(counterName, counterDisplayName, value);
                    }
                }
                
                ByteArrayDataOutput out = ByteStreams.newDataOutput();
                counters.write(out);
                Mutation m = new Mutation(key.getRow());
                m.put(key.getColumnFamily(), key.getColumnQualifier(), key.getColumnVisibilityParsed(), key.getTimestamp() + 1,
                                new Value(out.toByteArray()));
                writer.addMutation(m);
            }
        }
        
    }
}
 
Example #28
Source File: MetricsIngester.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    _configure(args);
    
    final Configuration conf = getConf();
    String type = conf.get(MetricsConfig.TYPE);
    
    /*
     * if the type is "errors", we want to process all of the errors from the metrics files first and then run the regular ingest metrics process
     */
    // MetricsServer.setServerConf(conf);
    // MetricsServer.initInstance();
    if ("errors".equals(type)) {
        try {
            launchErrorsJob(Job.getInstance(conf), conf);
        } catch (Exception e) {
            log.info("Failed to launch errors job", e);
        }
        type = "ingest";
        conf.set(MetricsConfig.TYPE, type);
    }
    
    /* Type logic so I can differeniate between loader and ingest metrics jobs */
    Class<? extends Mapper<?,?,?,?>> mapperClass;
    String outTable;
    
    Path inputDirectoryPath = new Path(conf.get(MetricsConfig.INPUT_DIRECTORY));
    FileSystem fs = FileSystem.get(inputDirectoryPath.toUri(), conf);
    FileStatus[] fstats = fs.listStatus(inputDirectoryPath);
    Path[] files = FileUtil.stat2Paths(fstats);
    Path[] fileBuffer = new Path[MAX_FILES];
    for (int i = 0; i < files.length;) {
        Job job = Job.getInstance(getConf());
        job.setJarByClass(this.getClass());
        
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        
        if ("ingest".equalsIgnoreCase(type)) {
            mapperClass = IngestMetricsMapper.class;
            outTable = conf.get(MetricsConfig.INGEST_TABLE, MetricsConfig.DEFAULT_INGEST_TABLE);
            job.setInputFormatClass(SequenceFileInputFormat.class);
        } else if ("loader".equalsIgnoreCase(type)) {
            mapperClass = LoaderMetricsMapper.class;
            outTable = conf.get(MetricsConfig.LOADER_TABLE, MetricsConfig.DEFAULT_LOADER_TABLE);
            job.setInputFormatClass(SequenceFileInputFormat.class);
        } else if ("flagmaker".equalsIgnoreCase(type)) {
            mapperClass = FlagMakerMetricsMapper.class;
            outTable = conf.get(MetricsConfig.FLAGMAKER_TABLE, MetricsConfig.DEFAULT_FLAGMAKER_TABLE);
            job.setInputFormatClass(SequenceFileInputFormat.class);
        } else {
            log.error(type + " is not a valid job type. Please use <ingest|loader>.");
            return -1;
        }
        
        job.setJobName("MetricsIngester-" + type);
        
        if (files.length - i > MAX_FILES) {
            System.arraycopy(files, i, fileBuffer, 0, MAX_FILES);
            i += MAX_FILES;
        } else {
            fileBuffer = new Path[files.length - i];
            System.arraycopy(files, i, fileBuffer, 0, fileBuffer.length);
            i += files.length - i;
        }
        
        SequenceFileInputFormat.setInputPaths(job, fileBuffer);
        
        job.setMapperClass(mapperClass);
        
        job.setNumReduceTasks(0);
        
        job.setOutputFormatClass(AccumuloOutputFormat.class);
        AccumuloOutputFormat.setConnectorInfo(job, conf.get(MetricsConfig.USER), new PasswordToken(conf.get(MetricsConfig.PASS, "").getBytes()));
        AccumuloOutputFormat.setCreateTables(job, createTables);
        AccumuloOutputFormat.setDefaultTableName(job, outTable);
        log.info("zookeepers = " + conf.get(MetricsConfig.ZOOKEEPERS));
        log.info("instance = " + conf.get(MetricsConfig.INSTANCE));
        log.info("clientConfuguration = "
                        + ClientConfiguration.loadDefault().withInstance(conf.get(MetricsConfig.INSTANCE)).withZkHosts(conf.get(MetricsConfig.ZOOKEEPERS)));
        AccumuloOutputFormat.setZooKeeperInstance(job,
                        ClientConfiguration.loadDefault().withInstance(conf.get(MetricsConfig.INSTANCE)).withZkHosts(conf.get(MetricsConfig.ZOOKEEPERS)));
        AccumuloOutputFormat.setBatchWriterOptions(job, new BatchWriterConfig().setMaxLatency(25, TimeUnit.MILLISECONDS));
        
        job.submit();
        
        job.waitForCompletion(true);
        
        if (job.isSuccessful()) {
            for (Path p : fileBuffer) {
                fs.delete(p, true);
            }
        }
    }
    
    return 0;
}
 
Example #29
Source File: QueryMetricsSummaryLoader.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Configuration conf = JobSetupUtil.configure(args, getConf(), log);
    
    JobSetupUtil.printConfig(getConf(), log);
    
    Job job = Job.getInstance(conf);
    Configuration jconf = job.getConfiguration();
    job.setJarByClass(this.getClass());
    
    boolean useHourlyPrecision = Boolean.valueOf(jconf.get(MetricsConfig.USE_HOURLY_PRECISION, MetricsConfig.DEFAULT_USE_HOURLY_PRECISION));
    
    if (useHourlyPrecision) {
        job.setJobName("QueryMetricsSummaries (hourly)");
    } else {
        job.setJobName("QueryMetricsSummaries");
    }
    
    try {
        Connections.initTables(conf);
    } catch (AccumuloException | AccumuloSecurityException e) {
        throw new IOException(e);
    }
    
    String inputTable = jconf.get(MetricsConfig.QUERY_METRICS_EVENT_TABLE, MetricsConfig.DEFAULT_QUERY_METRICS_EVENT_TABLE);
    String outputTable = HourlyPrecisionHelper.getOutputTable(jconf, useHourlyPrecision);
    
    String userName = jconf.get(MetricsConfig.WAREHOUSE_USERNAME);
    String password = jconf.get(MetricsConfig.WAREHOUSE_PASSWORD);
    String instance = jconf.get(MetricsConfig.WAREHOUSE_INSTANCE);
    String zookeepers = jconf.get(MetricsConfig.WAREHOUSE_ZOOKEEPERS, "localhost");
    Connector con = Connections.warehouseConnection(jconf);
    Authorizations auths = con.securityOperations().getUserAuthorizations(con.whoami());
    Collection<Range> dayRanges = JobSetupUtil.computeShardedDayRange(jconf, log);
    Range timeRange = JobSetupUtil.computeTimeRange(jconf, log);
    long delta = Long.parseLong(timeRange.getEndKey().getRow().toString()) - Long.parseLong(timeRange.getStartKey().getRow().toString());
    int numDays = (int) Math.max(1, delta / TimeUnit.DAYS.toMillis(1));
    
    job.setMapperClass(QueryMetricsMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);
    job.setInputFormatClass(AccumuloInputFormat.class);
    
    AccumuloInputFormat.setConnectorInfo(job, userName, new PasswordToken(password));
    AccumuloInputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers));
    AccumuloInputFormat.setRanges(job, dayRanges);
    AccumuloInputFormat.setAutoAdjustRanges(job, false);
    AccumuloInputFormat.setInputTableName(job, inputTable);
    AccumuloInputFormat.setScanAuthorizations(job, auths);
    
    IteratorSetting regex = new IteratorSetting(50, RegExFilter.class);
    regex.addOption(RegExFilter.COLF_REGEX, QUERY_METRICS_REGEX);
    AccumuloInputFormat.addIterator(job, regex);
    
    // Ensure all data for a day goes to the same reducer so that we aggregate it correctly before sending to Accumulo
    RowPartitioner.configureJob(job);
    
    // Configure the reducer and output format to write out our metrics
    MetricsDailySummaryReducer.configureJob(job, numDays, jconf.get(MetricsConfig.INSTANCE), jconf.get(MetricsConfig.ZOOKEEPERS),
                    jconf.get(MetricsConfig.USER), jconf.get(MetricsConfig.PASS), outputTable);
    
    job.submit();
    JobSetupUtil.changeJobPriority(job, log);
    
    job.waitForCompletion(true);
    
    return 0;
}
 
Example #30
Source File: FileByteSummaryLoader.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    Configuration conf = JobSetupUtil.configure(args, getConf(), log);
    
    JobSetupUtil.printConfig(getConf(), log);
    
    Job job = new Job(conf);
    Configuration jconf = job.getConfiguration();
    job.setJarByClass(this.getClass());
    job.setJobName("FileByteMetricsSummaries");
    
    try {
        Connections.initTables(conf);
    } catch (AccumuloException | AccumuloSecurityException e) {
        throw new IOException(e);
    }
    
    String inputTable = jconf.get(MetricsConfig.RAW_FILE_INDEX_TABLE, MetricsConfig.DEFAULT_RAW_FILE_INDEX_TABLE);
    String outputTable = jconf.get(MetricsConfig.METRICS_SUMMARY_TABLE, MetricsConfig.DEFAULT_METRICS_SUMMARY_TABLE);
    String userName = jconf.get(MetricsConfig.USER);
    String password = jconf.get(MetricsConfig.PASS);
    String instance = jconf.get(MetricsConfig.INSTANCE);
    String zookeepers = jconf.get(MetricsConfig.ZOOKEEPERS, "localhost");
    Range dayRange = JobSetupUtil.computeTimeRange(jconf, log);
    long delta = Long.parseLong(dayRange.getEndKey().getRow().toString()) - Long.parseLong(dayRange.getStartKey().getRow().toString());
    int numDays = (int) Math.max(1, delta / TimeUnit.DAYS.toMillis(1));
    
    defaultVisibility = jconf.get(MetricsConfig.DEFAULT_VISIBILITY, defaultVisibility);
    
    dayRange = JobSetupUtil.formatReverseSlashedTimeRange(dayRange, log);// convert millisecond epoc timestamp to /YYYY/MM/DD
    
    job.setMapperClass(FileByteMetricsMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);
    job.setInputFormatClass(AccumuloInputFormat.class);
    AccumuloInputFormat.setConnectorInfo(job, userName, new PasswordToken(password));
    AccumuloInputFormat.setInputTableName(job, inputTable);
    AccumuloInputFormat.setScanAuthorizations(job, Authorizations.EMPTY);
    AccumuloInputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instance.trim()).withZkHosts(zookeepers.trim()));
    AccumuloInputFormat.setRanges(job, Collections.singletonList(dayRange));
    // Ensure all data for a day goes to the same reducer so that we aggregate it correctly before sending to Accumulo
    RowPartitioner.configureJob(job);
    
    // Configure the reducer and output format to write out our metrics
    MetricsDailySummaryReducer.configureJob(job, numDays, jconf.get(MetricsConfig.INSTANCE), jconf.get(MetricsConfig.ZOOKEEPERS), userName, password,
                    outputTable);
    
    job.submit();
    JobSetupUtil.changeJobPriority(job, log);
    
    job.waitForCompletion(true);
    
    return 0;
}