org.elasticsearch.hadoop.mr.LinkedMapWritable Java Examples
The following examples show how to use
org.elasticsearch.hadoop.mr.LinkedMapWritable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 6 votes |
/** * This test reads data from the Elasticsearch instance and verifies whether data is read * successfully. */ @Test public void testHifIOWithElastic() throws SecurityException { // Expected hashcode is evaluated during insertion time one time and hardcoded here. final long expectedRowCount = 1000L; String expectedHashCode = "42e254c8689050ed0a617ff5e80ea392"; Configuration conf = getConfiguration(options); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PCollection<Long> count = esData.apply(Count.globally()); PAssert.thatSingleton(count).isEqualTo(expectedRowCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #2
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 6 votes |
private String convertMapWRowToString(LinkedMapWritable mapw) { String rowValue = ""; rowValue = addFieldValuesToRow(rowValue, mapw, "User_Name"); rowValue = addFieldValuesToRow(rowValue, mapw, "Item_Code"); rowValue = addFieldValuesToRow(rowValue, mapw, "Txn_ID"); rowValue = addFieldValuesToRow(rowValue, mapw, "Item_ID"); rowValue = addFieldValuesToRow(rowValue, mapw, "last_updated"); rowValue = addFieldValuesToRow(rowValue, mapw, "Price"); rowValue = addFieldValuesToRow(rowValue, mapw, "Title"); rowValue = addFieldValuesToRow(rowValue, mapw, "Description"); rowValue = addFieldValuesToRow(rowValue, mapw, "Age"); rowValue = addFieldValuesToRow(rowValue, mapw, "Item_Name"); rowValue = addFieldValuesToRow(rowValue, mapw, "Item_Price"); rowValue = addFieldValuesToRow(rowValue, mapw, "Availability"); rowValue = addFieldValuesToRow(rowValue, mapw, "Batch_Num"); rowValue = addFieldValuesToRow(rowValue, mapw, "Last_Ordered"); rowValue = addFieldValuesToRow(rowValue, mapw, "City"); return rowValue; }
Example #3
Source File: AbstractMROldApiSearchTest.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private JobConf createJobConf() throws IOException { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(EsInputFormat.class); conf.setOutputFormat(PrintStreamOutputFormat.class); conf.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); conf.setOutputValueClass(mapType); HadoopCfgUtils.setGenericOptions(conf); conf.set(ConfigurationOptions.ES_QUERY, query); conf.setNumReduceTasks(0); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata)); conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson)); new QueryTestParams(tempFolder).provisionQueries(conf); FileInputFormat.setInputPaths(conf, new Path(MRSuite.testData.sampleArtistsDatUri())); HdpBootstrap.addProperties(conf, TestSettings.TESTING_PROPS, false); return conf; }
Example #4
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 6 votes |
/** * Returns Hadoop configuration for reading data from Elasticsearch. Configuration object should * have InputFormat class, key class and value class to be set. Mandatory fields for ESInputFormat * to be set are es.resource, es.nodes, es.port, es.internal.es.version, es.nodes.wan.only. Please * refer <a href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html" * >Elasticsearch Configuration</a> for more details. */ private static Configuration getConfiguration(HadoopFormatIOTestOptions options) { Configuration conf = new Configuration(); conf.set(ConfigurationOptions.ES_NODES, options.getElasticServerIp()); conf.set(ConfigurationOptions.ES_PORT, options.getElasticServerPort().toString()); conf.set(ConfigurationOptions.ES_NODES_WAN_ONLY, TRUE); // Set username and password if Elasticsearch is configured with security. conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_USER, options.getElasticUserName()); conf.set(ConfigurationOptions.ES_NET_HTTP_AUTH_PASS, options.getElasticPassword()); conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE); conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION); conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE); conf.setClass( "mapreduce.job.inputformat.class", org.elasticsearch.hadoop.mr.EsInputFormat.class, InputFormat.class); conf.setClass("key.class", Text.class, Object.class); conf.setClass("value.class", LinkedMapWritable.class, Object.class); // Optimizations added to change the max docs per partition, scroll size and batch size of // bytes to improve the test time for large data conf.set("es.input.max.docs.per.partition", "50000"); conf.set("es.scroll.size", "400"); conf.set("es.batch.size.bytes", "8mb"); return conf; }
Example #5
Source File: AbstractExtraMRTests.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private JobConf createReadJobConf() throws IOException { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(EsInputFormat.class); conf.setOutputFormat(PrintStreamOutputFormat.class); conf.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); conf.setOutputValueClass(MapWritable.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumReduceTasks(0); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(random.nextBoolean())); conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, "true"); FileInputFormat.setInputPaths(conf, new Path(MRSuite.testData.gibberishDat(conf))); return conf; }
Example #6
Source File: AbstractMRNewApiSearchTest.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private Configuration createConf() throws IOException { Configuration conf = HdpBootstrap.hadoopConfig(); HadoopCfgUtils.setGenericOptions(conf); Job job = new Job(conf); job.setInputFormatClass(EsInputFormat.class); job.setOutputFormatClass(PrintStreamOutputFormat.class); job.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); job.setOutputValueClass(mapType); conf.set(ConfigurationOptions.ES_QUERY, query); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson)); new QueryTestParams(tempFolder).provisionQueries(conf); job.setNumReduceTasks(0); //PrintStreamOutputFormat.stream(conf, Stream.OUT); Configuration cfg = job.getConfiguration(); HdpBootstrap.addProperties(cfg, TestSettings.TESTING_PROPS, false); return cfg; }
Example #7
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 6 votes |
/** * Test to read data from embedded Elasticsearch instance and verify whether data is read * successfully. */ @Test public void testHifIOWithElastic() { // Expected hashcode is evaluated during insertion time one time and hardcoded here. String expectedHashCode = "a62a85f5f081e3840baf1028d4d6c6bc"; Configuration conf = getConfiguration(); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo((long) TEST_DATA_ROW_COUNT); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #8
Source File: UtilESTest.java From deep-spark with Apache License 2.0 | 6 votes |
private LinkedMapWritable createJsonTest() { LinkedMapWritable json = new LinkedMapWritable(); LinkedMapWritable metadata = new LinkedMapWritable(); metadata.put(new Text("author"), new Text(AUTHOR)); metadata.put(new Text("title"), new Text(TITLE)); metadata.put(new Text("source"), new Text(SOURCE)); LinkedMapWritable cantoI = new LinkedMapWritable(); cantoI.put(new Text("canto"), new Text(CANTO_I)); cantoI.put(new Text("text"), new Text(TEXT_I)); LinkedMapWritable cantoII = new LinkedMapWritable(); cantoII.put(new Text("canto"), new Text(CANTO_II)); cantoII.put(new Text("text"), new Text(TEXT_II)); LinkedMapWritable[] writableArrary = new LinkedMapWritable[] { cantoI, cantoII }; ArrayWritable cantosList = new ArrayWritable(LinkedMapWritable.class, writableArrary); json.put(new Text("metadata"), metadata); json.put(new Text("cantos"), cantosList); return json; }
Example #9
Source File: UtilES.java From deep-spark with Apache License 2.0 | 5 votes |
/** * converts from JSONObject to cell class * * @param jsonObject * @return * @throws IllegalAccessException * @throws InstantiationException * @throws InvocationTargetException */ public static Cells getCellFromJson(LinkedMapWritable jsonObject, String tableName) throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException { Cells cells = tableName != null ? new Cells(tableName) : new Cells(); Set<Map.Entry<Writable, Writable>> entryJson = jsonObject.entrySet(); for (Map.Entry<Writable, Writable> entry : entryJson) { if (LinkedMapWritable.class.isAssignableFrom(entry.getValue().getClass())) { Cells innerCells = getCellFromJson((LinkedMapWritable) entry.getValue(), null); cells.add(Cell.create(entry.getKey().toString(), innerCells)); } else if (ArrayWritable.class.isAssignableFrom(entry.getValue().getClass())) { Writable[] writetable = ((ArrayWritable) entry.getValue()).get(); List innerCell = new ArrayList<>(); for (int i = 0; i < writetable.length; i++) { if(writetable[i] instanceof LinkedMapWritable){ innerCell.add(getCellFromJson((LinkedMapWritable) writetable[i], null)); }else{ innerCell.add(getObjectFromWritable(entry.getValue())); } } cells.add(Cell.create(entry.getKey().toString(), innerCell)); } else { cells.add(Cell.create(entry.getKey().toString(), getObjectFromWritable(entry.getValue()))); } } return cells; }
Example #10
Source File: LoadToES.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { LinkedMapWritable record = new LinkedMapWritable(); String line = value.toString(); Iterator<Text> fieldNameIter = fieldNames.iterator(); for (StringTokenizer tokenizer = new StringTokenizer(line, "\t"); tokenizer.hasMoreTokens(); ) { if (fieldNameIter.hasNext()) { Text fieldName = fieldNameIter.next(); String field = tokenizer.nextToken(); record.put(fieldName, new Text(field)); } } context.write(NullWritable.get(), record); }
Example #11
Source File: LoadToES.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { if (getConf().get(CONF_FIELD_NAMES, null) == null) { throw new IllegalArgumentException("Must include configuration '" + CONF_FIELD_NAMES + "'"); } Job job = Job.getInstance(getConf(), "LoadToES"); // DO NOT SET JAR BY CLASS HERE // // job.setJarByClass(getClass()); EsMapReduceUtil.initCredentials(job); TextInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(EsOutputFormat.class); job.setMapperClass(MapperImpl.class); // Secure Hadoop CANNOT perform shuffle phases without native libraries job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LinkedMapWritable.class); if (!job.waitForCompletion(true)) { return 1; } return 0; }
Example #12
Source File: WritableTypeToJsonTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void testMap() { LinkedMapWritable map = new LinkedMapWritable(); map.put(new Text("key"), new IntWritable(1)); map.put(new BooleanWritable(Boolean.TRUE), new ArrayWritable(new String[] { "one", "two" })); writableTypeToJson(map); }
Example #13
Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Parameters public static Collection<Object[]> configs() throws Exception { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(SplittableTextInputFormat.class); conf.setOutputFormat(EsOutputFormat.class); conf.setReducerClass(IdentityReducer.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumMapTasks(2); conf.setInt("actual.splits", 2); conf.setNumReduceTasks(0); JobConf standard = new JobConf(conf); standard.setMapperClass(TabMapper.class); standard.setMapOutputValueClass(LinkedMapWritable.class); standard.set(ConfigurationOptions.ES_INPUT_JSON, "false"); FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.sampleArtistsDat(conf))); JobConf json = new JobConf(conf); json.setMapperClass(IdentityMapper.class); json.setMapOutputValueClass(Text.class); json.set(ConfigurationOptions.ES_INPUT_JSON, "true"); FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.sampleArtistsJson(conf))); return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } }); }
Example #14
Source File: AbstractMRNewApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Parameters public static Collection<Object[]> configs() throws IOException { Configuration conf = HdpBootstrap.hadoopConfig(); HadoopCfgUtils.setGenericOptions(conf); Job job = new Job(conf); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(EsOutputFormat.class); job.setMapOutputValueClass(LinkedMapWritable.class); job.setMapperClass(TabMapper.class); job.setNumReduceTasks(0); Job standard = new Job(job.getConfiguration()); File fl = MRSuite.testData.sampleArtistsDatFile(); long splitSize = fl.length() / 3; TextInputFormat.setMaxInputSplitSize(standard, splitSize); TextInputFormat.setMinInputSplitSize(standard, 50); standard.setMapperClass(TabMapper.class); standard.setMapOutputValueClass(LinkedMapWritable.class); TextInputFormat.addInputPath(standard, new Path(MRSuite.testData.sampleArtistsDat(conf))); Job json = new Job(job.getConfiguration()); json.setMapperClass(Mapper.class); json.setMapOutputValueClass(Text.class); json.getConfiguration().set(ConfigurationOptions.ES_INPUT_JSON, "true"); TextInputFormat.addInputPath(json, new Path(MRSuite.testData.sampleArtistsJson(conf))); return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } }); }
Example #15
Source File: AbstractExtraMRTests.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Parameters public static Collection<Object[]> configs() throws IOException { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(SplittableTextInputFormat.class); conf.setOutputFormat(EsOutputFormat.class); conf.setReducerClass(IdentityReducer.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumMapTasks(2); conf.setInt("actual.splits", 2); conf.setNumReduceTasks(0); JobConf standard = new JobConf(conf); standard.setMapperClass(TabMapper.class); standard.setMapOutputValueClass(LinkedMapWritable.class); standard.set(ConfigurationOptions.ES_INPUT_JSON, "false"); FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.gibberishDat(conf))); JobConf json = new JobConf(conf); json.setMapperClass(IdentityMapper.class); json.setMapOutputValueClass(Text.class); json.set(ConfigurationOptions.ES_INPUT_JSON, "true"); FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.gibberishJson(conf))); return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } }); }
Example #16
Source File: HiveSerializationEventConverterTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void generateEventHiveRecord() throws Exception { Map<Writable, Writable> map = new LinkedMapWritable(); map.put(new Text("one"), new IntWritable(1)); map.put(new Text("two"), new IntWritable(2)); map.put(new Text("three"), new IntWritable(3)); HiveType tuple = new HiveType(map, TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo))); SerializationEventConverter eventConverter = new SerializationEventConverter(); SerializationFailure iaeFailure = new SerializationFailure(new IllegalArgumentException("garbage"), tuple, new ArrayList<String>()); String rawEvent = eventConverter.getRawEvent(iaeFailure); assertThat(rawEvent, startsWith("HiveType{object={one=1, two=2, three=3}, " + "inspector=org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector@")); String timestamp = eventConverter.getTimestamp(iaeFailure); assertTrue(StringUtils.hasText(timestamp)); assertTrue(DateUtils.parseDate(timestamp).getTime().getTime() > 1L); String exceptionType = eventConverter.renderExceptionType(iaeFailure); assertEquals("illegal_argument_exception", exceptionType); String exceptionMessage = eventConverter.renderExceptionMessage(iaeFailure); assertEquals("garbage", exceptionMessage); String eventMessage = eventConverter.renderEventMessage(iaeFailure); assertEquals("Could not construct bulk entry from record", eventMessage); }
Example #17
Source File: UtilES.java From deep-spark with Apache License 2.0 | 5 votes |
/** * converts from an entity class with deep's anotations to JSONObject. * * @param t an instance of an object of type T to convert to JSONObject. * @param <T> the type of the object to convert. * @return the provided object converted to JSONObject. * @throws IllegalAccessException * @throws InstantiationException * @throws InvocationTargetException */ public static <T> LinkedMapWritable getLinkedMapWritableFromObject(T t) throws IllegalAccessException, InstantiationException, InvocationTargetException { Field[] fields = AnnotationUtils.filterDeepFields(t.getClass()); LinkedMapWritable linkedMapWritable = new LinkedMapWritable(); for (Field field : fields) { Method method = Utils.findGetter(field.getName(), t.getClass()); Object object = method.invoke(t); if (object != null) { if (Collection.class.isAssignableFrom(field.getType())) { Collection c = (Collection) object; Iterator iterator = c.iterator(); List<LinkedMapWritable> innerJsonList = new ArrayList<>(); while (iterator.hasNext()) { innerJsonList.add(getLinkedMapWritableFromObject((IDeepType) iterator.next())); } // linkedMapWritable.put(new Text(AnnotationUtils.deepFieldName(field)), new // LinkedMapWritable[innerJsonList.size()]); } else if (IDeepType.class.isAssignableFrom(field.getType())) { linkedMapWritable.put(new Text(AnnotationUtils.deepFieldName(field)), getLinkedMapWritableFromObject((IDeepType) object)); } else { linkedMapWritable .put(new Text(AnnotationUtils.deepFieldName(field)), getWritableFromObject(object)); } } } return linkedMapWritable; }
Example #18
Source File: UtilES.java From deep-spark with Apache License 2.0 | 5 votes |
private static <T> Object subDocumentListCase(Type type, ArrayWritable arrayWritable) throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException { ParameterizedType listType = (ParameterizedType) type; Class<?> listClass = (Class<?>) listType.getActualTypeArguments()[0]; List list = new ArrayList(); Writable[] writetable = arrayWritable.get(); for (int i = 0; i < writetable.length; i++) { list.add(getObjectFromJson(listClass, (LinkedMapWritable) writetable[i])); } return list; }
Example #19
Source File: ESCellExtractor.java From deep-spark with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public Cells transformElement(Tuple2<Object, LinkedMapWritable> tuple, DeepJobConfig<Cells, ? extends DeepJobConfig> config) { try { return UtilES.getCellFromJson(tuple._2(), deepJobConfig.getNameSpace()); } catch (Exception e) { LOG.error("Cannot convert JSON: ", e); throw new DeepTransformException("Could not transform from Json to Cell " + e.getMessage()); } }
Example #20
Source File: ESEntityExtractor.java From deep-spark with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public T transformElement(Tuple2<Object, LinkedMapWritable> tuple, DeepJobConfig<T, ? extends DeepJobConfig> config) { try { return (T) UtilES.getObjectFromJson(config.getEntityClass(), tuple._2()); } catch (Exception e) { LOG.error("Cannot convert JSON: ", e); throw new DeepTransformException("Could not transform from Json to Entity " + e.getMessage()); } }
Example #21
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 5 votes |
/** * This test reads data from the Elasticsearch instance based on a query and verifies if data is * read successfully. */ @Test public void testHifIOWithElasticQuery() { String expectedHashCode = "d7a7e4e42c2ca7b83ef7c1ad1ebce000"; Long expectedRecordsCount = 1L; Configuration conf = getConfiguration(options); String query = "{" + " \"query\": {" + " \"match\" : {" + " \"Title\" : {" + " \"query\" : \"Title9\"," + " \"type\" : \"boolean\"" + " }" + " }" + " }" + "}"; conf.set(ConfigurationOptions.ES_QUERY, query); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo(expectedRecordsCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #22
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 5 votes |
/** * Set the Elasticsearch configuration parameters in the Hadoop configuration object. * Configuration object should have InputFormat class, key class and value class set. Mandatory * fields for ESInputFormat to be set are es.resource, es.nodes, es.port, es.internal.es.version. * Please refer to <a * href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html" * >Elasticsearch Configuration</a> for more details. */ private Configuration getConfiguration() { Configuration conf = new Configuration(); conf.set(ConfigurationOptions.ES_NODES, ELASTIC_IN_MEM_HOSTNAME); conf.set(ConfigurationOptions.ES_PORT, String.format("%s", port)); conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE); conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION); conf.set(ConfigurationOptions.ES_NODES_DISCOVERY, TRUE); conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE); conf.setClass("mapreduce.job.inputformat.class", EsInputFormat.class, InputFormat.class); conf.setClass("key.class", Text.class, Object.class); conf.setClass("value.class", LinkedMapWritable.class, Object.class); return conf; }
Example #23
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 5 votes |
/** * Test to read data from embedded Elasticsearch instance based on query and verify whether data * is read successfully. */ @Test public void testHifIOWithElasticQuery() { long expectedRowCount = 1L; String expectedHashCode = "cfbf3e5c993d44e57535a114e25f782d"; Configuration conf = getConfiguration(); String fieldValue = ELASTIC_TYPE_ID_PREFIX + "2"; String query = "{" + " \"query\": {" + " \"match\" : {" + " \"id\" : {" + " \"query\" : \"" + fieldValue + "\"," + " \"type\" : \"boolean\"" + " }" + " }" + " }" + "}"; conf.set(ConfigurationOptions.ES_QUERY, query); PCollection<KV<Text, LinkedMapWritable>> esData = pipeline.apply(HadoopFormatIO.<Text, LinkedMapWritable>read().withConfiguration(conf)); PCollection<Long> count = esData.apply(Count.globally()); // Verify that the count of objects fetched using HIFInputFormat IO is correct. PAssert.thatSingleton(count).isEqualTo(expectedRowCount); PCollection<LinkedMapWritable> values = esData.apply(Values.create()); PCollection<String> textValues = values.apply(transformFunc); // Verify the output values using checksum comparison. PCollection<String> consolidatedHashcode = textValues.apply(Combine.globally(new HashingFn()).withoutDefaults()); PAssert.that(consolidatedHashcode).containsInAnyOrder(expectedHashCode); pipeline.run().waitUntilFinish(); }
Example #24
Source File: UtilES.java From deep-spark with Apache License 2.0 | 4 votes |
/** * converts from JSONObject to an entity class with deep's anotations * * @param classEntity the entity name. * @param jsonObject the instance of the JSONObject to convert. * @param <T> return type. * @return the provided JSONObject converted to an instance of T. * @throws IllegalAccessException * @throws InstantiationException * @throws java.lang.reflect.InvocationTargetException */ public static <T> T getObjectFromJson(Class<T> classEntity, LinkedMapWritable jsonObject) throws IllegalAccessException, InstantiationException, InvocationTargetException, NoSuchMethodException { T t = classEntity.newInstance(); Field[] fields = AnnotationUtils.filterDeepFields(classEntity); Object insert; for (Field field : fields) { Method method = Utils.findSetter(field.getName(), classEntity, field.getType()); Class<?> classField = field.getType(); String key = AnnotationUtils.deepFieldName(field); Text text = new org.apache.hadoop.io.Text(key); Writable currentJson = jsonObject.get(text); if (currentJson != null) { if (Iterable.class.isAssignableFrom(classField)) { Type type = field.getGenericType(); insert = subDocumentListCase(type, (ArrayWritable) currentJson); method.invoke(t, (insert)); } else if (IDeepType.class.isAssignableFrom(classField)) { insert = getObjectFromJson(classField, (LinkedMapWritable) currentJson); method.invoke(t, (insert)); } else { insert = currentJson; try { method.invoke(t, getObjectFromWritable((Writable) insert)); } catch (Exception e) { LOG.error("impossible to convert field " + t + " :" + field + " error: " + e.getMessage()); method.invoke(t, Utils.castNumberType(getObjectFromWritable((Writable) insert), t.getClass())); } } } } return t; }
Example #25
Source File: HadoopFormatIOElasticIT.java From beam with Apache License 2.0 | 4 votes |
@Override public String apply(LinkedMapWritable mapw) { String rowValue = ""; rowValue = convertMapWRowToString(mapw); return rowValue; }
Example #26
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 4 votes |
@Override public String apply(LinkedMapWritable mapw) { return mapw.get(new Text("id")) + "|" + mapw.get(new Text("scientist")); }
Example #27
Source File: UtilESTest.java From deep-spark with Apache License 2.0 | 3 votes |
@Test public void testGetObjectFromBson() throws UnknownHostException, NoSuchFieldException, IllegalAccessException, InvocationTargetException, InstantiationException, NoSuchMethodException { LinkedMapWritable json = createJsonTest(); BookEntity bookEntity = UtilES.getObjectFromJson(BookEntity.class, json); MetadataEntity metadata = bookEntity.getMetadataEntity(); assertEquals(metadata.getAuthor(), AUTHOR); assertEquals(metadata.getTitle(), TITLE); assertEquals(metadata.getSource(), SOURCE); List<CantoEntity> cantoEntityList = bookEntity.getCantoEntities(); assertEquals(cantoEntityList.get(0).getNumber(), CANTO_I); assertEquals(cantoEntityList.get(0).getText(), TEXT_I); assertEquals(cantoEntityList.get(1).getNumber(), CANTO_II); assertEquals(cantoEntityList.get(1).getText(), TEXT_II); }
Example #28
Source File: UtilESTest.java From deep-spark with Apache License 2.0 | 3 votes |
@Test public void testGetCellFromJson() throws UnknownHostException, NoSuchFieldException, IllegalAccessException, InvocationTargetException, InstantiationException, NoSuchMethodException { LinkedMapWritable bson = createJsonTest(); Cells cells = UtilES.getCellFromJson(bson, "book"); Map<Writable, Writable> mapMetadata = (Map<Writable, Writable>) bson.get(new Text("metadata")); assertEquals(mapMetadata.get(new Text("author")).toString(), ((Cells) cells.getCellByName("metadata").getCellValue()).getCellByName("author").getCellValue()); assertEquals(mapMetadata.get(new Text("title")).toString(), ((Cells) cells.getCellByName("metadata").getCellValue()).getCellByName("title").getCellValue()); assertEquals(mapMetadata.get(new Text("source")).toString(), ((Cells) cells.getCellByName("metadata").getCellValue()).getCellByName("source").getCellValue()); // Check list Oject List<Cells> list = (List<Cells>) cells.getCellByName("cantos").getCellValue(); LinkedMapWritable[] mapCantos = (LinkedMapWritable[]) ((ArrayWritable) bson.get(new Text("cantos"))).get(); assertEquals(mapCantos[0].get(new Text("canto")).toString(), list.get(0).getCellByName("canto").getCellValue()); assertEquals(mapCantos[0].get(new Text("text")).toString(), list.get(0).getCellByName("text").getCellValue()); assertEquals(mapCantos[1].get(new Text("canto")).toString(), list.get(1).getCellByName("canto").getCellValue()); assertEquals(mapCantos[1].get(new Text("text")).toString(), list.get(1).getCellByName("text").getCellValue()); }