org.elasticsearch.hadoop.mr.LinkedMapWritable Java Examples

The following examples show how to use org.elasticsearch.hadoop.mr.LinkedMapWritable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopFormatIOElasticIT.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * This test reads data from the Elasticsearch instance and verifies whether data is read
 * successfully.
 */
@Test
public void testHifIOWithElastic() throws SecurityException {
  // Expected hashcode is evaluated during insertion time one time and hardcoded here.
  final long expectedRowCount = 1000L;
  String expectedHashCode = "42e254c8689050ed0a617ff5e80ea392";
  Configuration conf = getConfiguration(options);
  PCollection<KV<Text, LinkedMapWritable>> esData =
      pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
  // Verify that the count of objects fetched using HIFInputFormat IO is correct.
  PCollection<Long> count = esData.apply(Count.globally());
  PAssert.thatSingleton(count).isEqualTo(expectedRowCount);
  PCollection<LinkedMapWritable> values = esData.apply(Values.create());
  PCollection<String> textValues = values.apply(transformFunc);
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  pipeline.run().waitUntilFinish();
}
 
Example #2
Source File: HadoopFormatIOElasticIT.java    From beam with Apache License 2.0 6 votes vote down vote up
private String convertMapWRowToString(LinkedMapWritable mapw) {
  String rowValue = "";
  rowValue = addFieldValuesToRow(rowValue, mapw, "User_Name");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Item_Code");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Txn_ID");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Item_ID");
  rowValue = addFieldValuesToRow(rowValue, mapw, "last_updated");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Price");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Title");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Description");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Age");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Item_Name");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Item_Price");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Availability");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Batch_Num");
  rowValue = addFieldValuesToRow(rowValue, mapw, "Last_Ordered");
  rowValue = addFieldValuesToRow(rowValue, mapw, "City");
  return rowValue;
}
 
Example #3
Source File: AbstractMROldApiSearchTest.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
private JobConf createJobConf() throws IOException {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(EsInputFormat.class);
    conf.setOutputFormat(PrintStreamOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    boolean type = random.nextBoolean();
    Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class);
    conf.setOutputValueClass(mapType);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.set(ConfigurationOptions.ES_QUERY, query);
    conf.setNumReduceTasks(0);

    conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata));
    conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true));
    conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson));

    new QueryTestParams(tempFolder).provisionQueries(conf);
    FileInputFormat.setInputPaths(conf, new Path(MRSuite.testData.sampleArtistsDatUri()));

    HdpBootstrap.addProperties(conf, TestSettings.TESTING_PROPS, false);
    return conf;
}
 
Example #4
Source File: HadoopFormatIOElasticIT.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Returns Hadoop configuration for reading data from Elasticsearch. Configuration object should
 * have InputFormat class, key class and value class to be set. Mandatory fields for ESInputFormat
 * to be set are es.resource, es.nodes, es.port, es.internal.es.version, es.nodes.wan.only. Please
 * refer <a href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html"
 * >Elasticsearch Configuration</a> for more details.
 */
private static Configuration getConfiguration(HadoopFormatIOTestOptions options) {
  Configuration conf = new Configuration();
  conf.set(ConfigurationOptions.ES_NODES, options.getElasticServerIp());
  conf.set(ConfigurationOptions.ES_PORT, options.getElasticServerPort().toString());
  conf.set(ConfigurationOptions.ES_NODES_WAN_ONLY, TRUE);
  // Set username and password if Elasticsearch is configured with security.
  conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_USER, options.getElasticUserName());
  conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_PASS, options.getElasticPassword());
  conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE);
  conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION);
  conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE);
  conf.setClass(
      "mapreduce.job.inputformat.class",
      org.elasticsearch.hadoop.mr.EsInputFormat.class,
      InputFormat.class);
  conf.setClass("key.class", Text.class, Object.class);
  conf.setClass("value.class", LinkedMapWritable.class, Object.class);
  // Optimizations added to change the max docs per partition, scroll size and batch size of
  // bytes to improve the test time for large data
  conf.set("es.input.max.docs.per.partition", "50000");
  conf.set("es.scroll.size", "400");
  conf.set("es.batch.size.bytes", "8mb");
  return conf;
}
 
Example #5
Source File: AbstractExtraMRTests.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
private JobConf createReadJobConf() throws IOException {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(EsInputFormat.class);
    conf.setOutputFormat(PrintStreamOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    boolean type = random.nextBoolean();
    Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class);
    conf.setOutputValueClass(MapWritable.class);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.setNumReduceTasks(0);

    conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(random.nextBoolean()));
    conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true));
    conf.set(ConfigurationOptions.ES_OUTPUT_JSON, "true");

    FileInputFormat.setInputPaths(conf, new Path(MRSuite.testData.gibberishDat(conf)));
    return conf;
}
 
Example #6
Source File: AbstractMRNewApiSearchTest.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
private Configuration createConf() throws IOException {
    Configuration conf = HdpBootstrap.hadoopConfig();
    HadoopCfgUtils.setGenericOptions(conf);
    Job job = new Job(conf);
    job.setInputFormatClass(EsInputFormat.class);
    job.setOutputFormatClass(PrintStreamOutputFormat.class);
    job.setOutputKeyClass(Text.class);

    boolean type = random.nextBoolean();
    Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class);

    job.setOutputValueClass(mapType);
    conf.set(ConfigurationOptions.ES_QUERY, query);

    conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata));
    conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson));

    new QueryTestParams(tempFolder).provisionQueries(conf);
    job.setNumReduceTasks(0);
    //PrintStreamOutputFormat.stream(conf, Stream.OUT);

    Configuration cfg = job.getConfiguration();
    HdpBootstrap.addProperties(cfg, TestSettings.TESTING_PROPS, false);
    return cfg;
}
 
Example #7
Source File: HadoopFormatIOElasticTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Test to read data from embedded Elasticsearch instance and verify whether data is read
 * successfully.
 */
@Test
public void testHifIOWithElastic() {
  // Expected hashcode is evaluated during insertion time one time and hardcoded here.
  String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc";
  Configuration conf = getConfiguration();
  PCollection<KV<Text, LinkedMapWritable>> esData =
      pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
  PCollection<Long> count = esData.apply(Count.globally());
  // Verify that the count of objects fetched using HIFInputFormat IO is correct.
  PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT);
  PCollection<LinkedMapWritable> values = esData.apply(Values.create());
  PCollection<String> textValues = values.apply(transformFunc);
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  pipeline.run().waitUntilFinish();
}
 
Example #8
Source File: UtilESTest.java    From deep-spark with Apache License 2.0 6 votes vote down vote up
private LinkedMapWritable createJsonTest() {
    LinkedMapWritable json = new LinkedMapWritable();

    LinkedMapWritable metadata = new LinkedMapWritable();
    metadata.put(new Text("author"), new Text(AUTHOR));
    metadata.put(new Text("title"), new Text(TITLE));
    metadata.put(new Text("source"), new Text(SOURCE));

    LinkedMapWritable cantoI = new LinkedMapWritable();

    cantoI.put(new Text("canto"), new Text(CANTO_I));
    cantoI.put(new Text("text"), new Text(TEXT_I));

    LinkedMapWritable cantoII = new LinkedMapWritable();
    cantoII.put(new Text("canto"), new Text(CANTO_II));
    cantoII.put(new Text("text"), new Text(TEXT_II));

    LinkedMapWritable[] writableArrary = new LinkedMapWritable[] { cantoI, cantoII };

    ArrayWritable cantosList = new ArrayWritable(LinkedMapWritable.class, writableArrary);

    json.put(new Text("metadata"), metadata);
    json.put(new Text("cantos"), cantosList);

    return json;
}
 
Example #9
Source File: UtilES.java    From deep-spark with Apache License 2.0 5 votes vote down vote up
/**
 * converts from JSONObject to cell class
 *
 * @param jsonObject
 * @return
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws InvocationTargetException
 */
public static Cells getCellFromJson(LinkedMapWritable jsonObject, String tableName) throws IllegalAccessException,
        InstantiationException, InvocationTargetException, NoSuchMethodException {

    Cells cells = tableName != null ? new Cells(tableName) : new Cells();

    Set<Map.Entry<Writable, Writable>> entryJson = jsonObject.entrySet();

    for (Map.Entry<Writable, Writable> entry : entryJson) {

        if (LinkedMapWritable.class.isAssignableFrom(entry.getValue().getClass())) {
            Cells innerCells = getCellFromJson((LinkedMapWritable) entry.getValue(), null);
            cells.add(Cell.create(entry.getKey().toString(), innerCells));
        } else if (ArrayWritable.class.isAssignableFrom(entry.getValue().getClass())) {
            Writable[] writetable = ((ArrayWritable) entry.getValue()).get();
            List innerCell = new ArrayList<>();
            for (int i = 0; i < writetable.length; i++) {
                if(writetable[i] instanceof LinkedMapWritable){
                    innerCell.add(getCellFromJson((LinkedMapWritable) writetable[i], null));
                }else{
                    innerCell.add(getObjectFromWritable(entry.getValue()));
                }

            }
            cells.add(Cell.create(entry.getKey().toString(), innerCell));
        } else {

            cells.add(Cell.create(entry.getKey().toString(), getObjectFromWritable(entry.getValue())));
        }

    }
    return cells;
}
 
Example #10
Source File: LoadToES.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    LinkedMapWritable record = new LinkedMapWritable();
    String line = value.toString();
    Iterator<Text> fieldNameIter = fieldNames.iterator();
    for (StringTokenizer tokenizer = new StringTokenizer(line, "\t"); tokenizer.hasMoreTokens(); ) {
        if (fieldNameIter.hasNext()) {
            Text fieldName = fieldNameIter.next();
            String field = tokenizer.nextToken();
            record.put(fieldName, new Text(field));
        }
    }
    context.write(NullWritable.get(), record);
}
 
Example #11
Source File: LoadToES.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
    if (getConf().get(CONF_FIELD_NAMES, null) == null) {
        throw new IllegalArgumentException("Must include configuration '" + CONF_FIELD_NAMES + "'");
    }

    Job job = Job.getInstance(getConf(), "LoadToES");
    // DO NOT SET JAR BY CLASS HERE
    //
    // job.setJarByClass(getClass());

    EsMapReduceUtil.initCredentials(job);

    TextInputFormat.addInputPath(job, new Path(args[0]));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(EsOutputFormat.class);

    job.setMapperClass(MapperImpl.class);
    // Secure Hadoop CANNOT perform shuffle phases without native libraries
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LinkedMapWritable.class);

    if (!job.waitForCompletion(true)) {
        return 1;
    }
    return 0;
}
 
Example #12
Source File: WritableTypeToJsonTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testMap() {
    LinkedMapWritable map = new LinkedMapWritable();
    map.put(new Text("key"), new IntWritable(1));
    map.put(new BooleanWritable(Boolean.TRUE), new ArrayWritable(new String[] { "one", "two" }));
    writableTypeToJson(map);
}
 
Example #13
Source File: AbstractMROldApiSaveTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Parameters
public static Collection<Object[]> configs() throws Exception {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(SplittableTextInputFormat.class);
    conf.setOutputFormat(EsOutputFormat.class);
    conf.setReducerClass(IdentityReducer.class);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.setNumMapTasks(2);
    conf.setInt("actual.splits", 2);
    conf.setNumReduceTasks(0);


    JobConf standard = new JobConf(conf);
    standard.setMapperClass(TabMapper.class);
    standard.setMapOutputValueClass(LinkedMapWritable.class);
    standard.set(ConfigurationOptions.ES_INPUT_JSON, "false");
    FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.sampleArtistsDat(conf)));

    JobConf json = new JobConf(conf);
    json.setMapperClass(IdentityMapper.class);
    json.setMapOutputValueClass(Text.class);
    json.set(ConfigurationOptions.ES_INPUT_JSON, "true");
    FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.sampleArtistsJson(conf)));

    return Arrays.asList(new Object[][] {
            { standard, "" },
            { json, "json-" }
    });
}
 
Example #14
Source File: AbstractMRNewApiSaveTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Parameters
public static Collection<Object[]> configs() throws IOException {
    Configuration conf = HdpBootstrap.hadoopConfig();
    HadoopCfgUtils.setGenericOptions(conf);

    Job job = new Job(conf);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(EsOutputFormat.class);
    job.setMapOutputValueClass(LinkedMapWritable.class);
    job.setMapperClass(TabMapper.class);
    job.setNumReduceTasks(0);


    Job standard = new Job(job.getConfiguration());
    File fl = MRSuite.testData.sampleArtistsDatFile();
    long splitSize = fl.length() / 3;
    TextInputFormat.setMaxInputSplitSize(standard, splitSize);
    TextInputFormat.setMinInputSplitSize(standard, 50);

    standard.setMapperClass(TabMapper.class);
    standard.setMapOutputValueClass(LinkedMapWritable.class);
    TextInputFormat.addInputPath(standard, new Path(MRSuite.testData.sampleArtistsDat(conf)));

    Job json = new Job(job.getConfiguration());
    json.setMapperClass(Mapper.class);
    json.setMapOutputValueClass(Text.class);
    json.getConfiguration().set(ConfigurationOptions.ES_INPUT_JSON, "true");
    TextInputFormat.addInputPath(json, new Path(MRSuite.testData.sampleArtistsJson(conf)));

    return Arrays.asList(new Object[][] {
            { standard, "" },
            { json, "json-" } });
}
 
Example #15
Source File: AbstractExtraMRTests.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Parameters
public static Collection<Object[]> configs() throws IOException {
    JobConf conf = HdpBootstrap.hadoopConfig();

    conf.setInputFormat(SplittableTextInputFormat.class);
    conf.setOutputFormat(EsOutputFormat.class);
    conf.setReducerClass(IdentityReducer.class);
    HadoopCfgUtils.setGenericOptions(conf);
    conf.setNumMapTasks(2);
    conf.setInt("actual.splits", 2);
    conf.setNumReduceTasks(0);


    JobConf standard = new JobConf(conf);
    standard.setMapperClass(TabMapper.class);
    standard.setMapOutputValueClass(LinkedMapWritable.class);
    standard.set(ConfigurationOptions.ES_INPUT_JSON, "false");
    FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.gibberishDat(conf)));

    JobConf json = new JobConf(conf);
    json.setMapperClass(IdentityMapper.class);
    json.setMapOutputValueClass(Text.class);
    json.set(ConfigurationOptions.ES_INPUT_JSON, "true");
    FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.gibberishJson(conf)));

    return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } });
}
 
Example #16
Source File: HiveSerializationEventConverterTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void generateEventHiveRecord() throws Exception {
    Map<Writable, Writable> map = new LinkedMapWritable();
    map.put(new Text("one"), new IntWritable(1));
    map.put(new Text("two"), new IntWritable(2));
    map.put(new Text("three"), new IntWritable(3));

    HiveType tuple = new HiveType(map, TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(
            TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo)));

    SerializationEventConverter eventConverter = new SerializationEventConverter();

    SerializationFailure iaeFailure = new SerializationFailure(new IllegalArgumentException("garbage"), tuple, new ArrayList<String>());

    String rawEvent = eventConverter.getRawEvent(iaeFailure);
    assertThat(rawEvent, startsWith("HiveType{object={one=1, two=2, three=3}, " +
            "inspector=org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector@"));
    String timestamp = eventConverter.getTimestamp(iaeFailure);
    assertTrue(StringUtils.hasText(timestamp));
    assertTrue(DateUtils.parseDate(timestamp).getTime().getTime() > 1L);
    String exceptionType = eventConverter.renderExceptionType(iaeFailure);
    assertEquals("illegal_argument_exception", exceptionType);
    String exceptionMessage = eventConverter.renderExceptionMessage(iaeFailure);
    assertEquals("garbage", exceptionMessage);
    String eventMessage = eventConverter.renderEventMessage(iaeFailure);
    assertEquals("Could not construct bulk entry from record", eventMessage);
}
 
Example #17
Source File: UtilES.java    From deep-spark with Apache License 2.0 5 votes vote down vote up
/**
 * converts from an entity class with deep's anotations to JSONObject.
 *
 * @param t   an instance of an object of type T to convert to JSONObject.
 * @param <T> the type of the object to convert.
 * @return the provided object converted to JSONObject.
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws InvocationTargetException
 */
public static <T> LinkedMapWritable getLinkedMapWritableFromObject(T t)
        throws IllegalAccessException, InstantiationException, InvocationTargetException {
    Field[] fields = AnnotationUtils.filterDeepFields(t.getClass());

    LinkedMapWritable linkedMapWritable = new LinkedMapWritable();

    for (Field field : fields) {
        Method method = Utils.findGetter(field.getName(), t.getClass());
        Object object = method.invoke(t);
        if (object != null) {
            if (Collection.class.isAssignableFrom(field.getType())) {
                Collection c = (Collection) object;
                Iterator iterator = c.iterator();
                List<LinkedMapWritable> innerJsonList = new ArrayList<>();

                while (iterator.hasNext()) {
                    innerJsonList.add(getLinkedMapWritableFromObject((IDeepType) iterator.next()));
                }
                // linkedMapWritable.put(new Text(AnnotationUtils.deepFieldName(field)), new
                // LinkedMapWritable[innerJsonList.size()]);
            } else if (IDeepType.class.isAssignableFrom(field.getType())) {
                linkedMapWritable.put(new Text(AnnotationUtils.deepFieldName(field)),
                        getLinkedMapWritableFromObject((IDeepType) object));
            } else {
                linkedMapWritable
                        .put(new Text(AnnotationUtils.deepFieldName(field)), getWritableFromObject(object));
            }
        }
    }

    return linkedMapWritable;
}
 
Example #18
Source File: UtilES.java    From deep-spark with Apache License 2.0 5 votes vote down vote up
private static <T> Object subDocumentListCase(Type type, ArrayWritable arrayWritable)
        throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException {
    ParameterizedType listType = (ParameterizedType) type;

    Class<?> listClass = (Class<?>) listType.getActualTypeArguments()[0];

    List list = new ArrayList();
    Writable[] writetable = arrayWritable.get();

    for (int i = 0; i < writetable.length; i++) {
        list.add(getObjectFromJson(listClass, (LinkedMapWritable) writetable[i]));
    }

    return list;
}
 
Example #19
Source File: ESCellExtractor.java    From deep-spark with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public Cells transformElement(Tuple2<Object, LinkedMapWritable> tuple,
                              DeepJobConfig<Cells, ? extends DeepJobConfig> config) {

    try {
        return UtilES.getCellFromJson(tuple._2(), deepJobConfig.getNameSpace());
    } catch (Exception e) {
        LOG.error("Cannot convert JSON: ", e);
        throw new DeepTransformException("Could not transform from Json to Cell " + e.getMessage());
    }
}
 
Example #20
Source File: ESEntityExtractor.java    From deep-spark with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public T transformElement(Tuple2<Object, LinkedMapWritable> tuple,
                          DeepJobConfig<T, ? extends DeepJobConfig> config) {

    try {
        return (T) UtilES.getObjectFromJson(config.getEntityClass(), tuple._2());
    } catch (Exception e) {
        LOG.error("Cannot convert JSON: ", e);
        throw new DeepTransformException("Could not transform from Json to Entity " + e.getMessage());
    }

}
 
Example #21
Source File: HadoopFormatIOElasticIT.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * This test reads data from the Elasticsearch instance based on a query and verifies if data is
 * read successfully.
 */
@Test
public void testHifIOWithElasticQuery() {
  String expectedHashCode = "d7a7e4e42c2ca7b83ef7c1ad1ebce000";
  Long expectedRecordsCount = 1L;
  Configuration conf = getConfiguration(options);
  String query =
      "{"
          + "  \"query\": {"
          + "  \"match\" : {"
          + "    \"Title\" : {"
          + "      \"query\" : \"Title9\","
          + "      \"type\" : \"boolean\""
          + "    }"
          + "  }"
          + "  }"
          + "}";
  conf.set(ConfigurationOptions.ES_QUERY, query);
  PCollection<KV<Text, LinkedMapWritable>> esData =
      pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
  PCollection<Long> count = esData.apply(Count.globally());
  // Verify that the count of objects fetched using HIFInputFormat IO is correct.
  PAssert.thatSingleton(count).isEqualTo(expectedRecordsCount);
  PCollection<LinkedMapWritable> values = esData.apply(Values.create());
  PCollection<String> textValues = values.apply(transformFunc);
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  pipeline.run().waitUntilFinish();
}
 
Example #22
Source File: HadoopFormatIOElasticTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Set the Elasticsearch configuration parameters in the Hadoop configuration object.
 * Configuration object should have InputFormat class, key class and value class set. Mandatory
 * fields for ESInputFormat to be set are es.resource, es.nodes, es.port, es.internal.es.version.
 * Please refer to <a
 * href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html"
 * >Elasticsearch Configuration</a> for more details.
 */
private Configuration getConfiguration() {
  Configuration conf = new Configuration();
  conf.set(ConfigurationOptions.ES_NODES, ELASTIC_IN_MEM_HOSTNAME);
  conf.set(ConfigurationOptions.ES_PORT, String.format("%s", port));
  conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE);
  conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION);
  conf.set(ConfigurationOptions.ES_NODES_DISCOVERY, TRUE);
  conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE);
  conf.setClass("mapreduce.job.inputformat.class", EsInputFormat.class, InputFormat.class);
  conf.setClass("key.class", Text.class, Object.class);
  conf.setClass("value.class", LinkedMapWritable.class, Object.class);
  return conf;
}
 
Example #23
Source File: HadoopFormatIOElasticTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Test to read data from embedded Elasticsearch instance based on query and verify whether data
 * is read successfully.
 */
@Test
public void testHifIOWithElasticQuery() {
  long expectedRowCount = 1L;
  String expectedHashCode = "cfbf3e5c993d44e57535a114e25f782d";
  Configuration conf = getConfiguration();
  String fieldValue = ELASTIC_TYPE_ID_PREFIX + "2";
  String query =
      "{"
          + "  \"query\": {"
          + "  \"match\" : {"
          + "    \"id\" : {"
          + "      \"query\" : \""
          + fieldValue
          + "\","
          + "      \"type\" : \"boolean\""
          + "    }"
          + "  }"
          + "  }"
          + "}";
  conf.set(ConfigurationOptions.ES_QUERY, query);
  PCollection<KV<Text, LinkedMapWritable>> esData =
      pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf));
  PCollection<Long> count = esData.apply(Count.globally());
  // Verify that the count of objects fetched using HIFInputFormat IO is correct.
  PAssert.thatSingleton(count).isEqualTo(expectedRowCount);
  PCollection<LinkedMapWritable> values = esData.apply(Values.create());
  PCollection<String> textValues = values.apply(transformFunc);
  // Verify the output values using checksum comparison.
  PCollection<String> consolidatedHashcode =
      textValues.apply(Combine.globally(new HashingFn()).withoutDefaults());
  PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode);
  pipeline.run().waitUntilFinish();
}
 
Example #24
Source File: UtilES.java    From deep-spark with Apache License 2.0 4 votes vote down vote up
/**
 * converts from JSONObject to an entity class with deep's anotations
 *
 * @param classEntity the entity name.
 * @param jsonObject  the instance of the JSONObject to convert.
 * @param <T>         return type.
 * @return the provided JSONObject converted to an instance of T.
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws java.lang.reflect.InvocationTargetException
 */
public static <T> T getObjectFromJson(Class<T> classEntity, LinkedMapWritable jsonObject)
        throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException {
    T t = classEntity.newInstance();

    Field[] fields = AnnotationUtils.filterDeepFields(classEntity);

    Object insert;

    for (Field field : fields) {
        Method method = Utils.findSetter(field.getName(), classEntity, field.getType());

        Class<?> classField = field.getType();
        String key = AnnotationUtils.deepFieldName(field);
        Text text = new org.apache.hadoop.io.Text(key);
        Writable currentJson = jsonObject.get(text);
        if (currentJson != null) {

            if (Iterable.class.isAssignableFrom(classField)) {
                Type type = field.getGenericType();
                insert = subDocumentListCase(type, (ArrayWritable) currentJson);
                method.invoke(t, (insert));

            } else if (IDeepType.class.isAssignableFrom(classField)) {
                insert = getObjectFromJson(classField, (LinkedMapWritable) currentJson);
                method.invoke(t, (insert));
            } else {
                insert = currentJson;
                try {
                    method.invoke(t, getObjectFromWritable((Writable) insert));
                } catch (Exception e) {
                    LOG.error("impossible to convert field " + t + " :" + field + " error: " + e.getMessage());
                    method.invoke(t, Utils.castNumberType(getObjectFromWritable((Writable) insert), t.getClass()));
                }

            }

        }
    }

    return t;
}
 
Example #25
Source File: HadoopFormatIOElasticIT.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public String apply(LinkedMapWritable mapw) {
  String rowValue = "";
  rowValue = convertMapWRowToString(mapw);
  return rowValue;
}
 
Example #26
Source File: HadoopFormatIOElasticTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public String apply(LinkedMapWritable mapw) {
  return mapw.get(new Text("id")) + "|" + mapw.get(new Text("scientist"));
}
 
Example #27
Source File: UtilESTest.java    From deep-spark with Apache License 2.0 3 votes vote down vote up
@Test
public void testGetObjectFromBson()
        throws UnknownHostException, NoSuchFieldException, IllegalAccessException, InvocationTargetException,
        InstantiationException, NoSuchMethodException {

    LinkedMapWritable json = createJsonTest();

    BookEntity bookEntity = UtilES.getObjectFromJson(BookEntity.class, json);

    MetadataEntity metadata = bookEntity.getMetadataEntity();

    assertEquals(metadata.getAuthor(), AUTHOR);

    assertEquals(metadata.getTitle(), TITLE);

    assertEquals(metadata.getSource(), SOURCE);

    List<CantoEntity> cantoEntityList = bookEntity.getCantoEntities();

    assertEquals(cantoEntityList.get(0).getNumber(), CANTO_I);

    assertEquals(cantoEntityList.get(0).getText(), TEXT_I);

    assertEquals(cantoEntityList.get(1).getNumber(), CANTO_II);

    assertEquals(cantoEntityList.get(1).getText(), TEXT_II);

}
 
Example #28
Source File: UtilESTest.java    From deep-spark with Apache License 2.0 3 votes vote down vote up
@Test
public void testGetCellFromJson()
        throws UnknownHostException, NoSuchFieldException, IllegalAccessException, InvocationTargetException,
        InstantiationException, NoSuchMethodException {

    LinkedMapWritable bson = createJsonTest();

    Cells cells = UtilES.getCellFromJson(bson, "book");

    Map<Writable, Writable> mapMetadata = (Map<Writable, Writable>) bson.get(new Text("metadata"));

    assertEquals(mapMetadata.get(new Text("author")).toString(),
            ((Cells) cells.getCellByName("metadata").getCellValue()).getCellByName("author").getCellValue());
    assertEquals(mapMetadata.get(new Text("title")).toString(),
            ((Cells) cells.getCellByName("metadata").getCellValue()).getCellByName("title").getCellValue());
    assertEquals(mapMetadata.get(new Text("source")).toString(),
            ((Cells) cells.getCellByName("metadata").getCellValue()).getCellByName("source").getCellValue());

    // Check list Oject

    List<Cells> list = (List<Cells>) cells.getCellByName("cantos").getCellValue();

    LinkedMapWritable[] mapCantos = (LinkedMapWritable[]) ((ArrayWritable) bson.get(new Text("cantos"))).get();

    assertEquals(mapCantos[0].get(new Text("canto")).toString(), list.get(0).getCellByName("canto").getCellValue());
    assertEquals(mapCantos[0].get(new Text("text")).toString(), list.get(0).getCellByName("text").getCellValue());

    assertEquals(mapCantos[1].get(new Text("canto")).toString(), list.get(1).getCellByName("canto").getCellValue());
    assertEquals(mapCantos[1].get(new Text("text")).toString(), list.get(1).getCellByName("text").getCellValue());

}