package tutorial.storm.trident.example.elasticsearch; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.SearchHit; import storm.trident.state.State; import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Set; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; /** * This implementation of {@link State} basically provides facilities to * index and search on stored tweets. * * @author Davide Palmisano ([email protected]) */ public class ElasticSearchState implements State { private Client client; public ElasticSearchState(Client client) { this.client = client; } /** * We're not using bulk transaction ids here. But basically, this is * called by Trident when starting operations on a bulk. * * @param txid */ @Override public void beginCommit(Long txid) {} /** * We're not using bulk transaction ids here, but this method is called by * Trident when a bulk transaction is done. * * @param txid */ @Override public void commit(Long txid) {} /** * Given that streams are processed in bulks, we're making use of * the ElasticSearch capability to index bulks of documents. It takes a list * of ids and a list of texts. * * @param tweetIds * @param tweets */ public void bulkIndex(List<Long> tweetIds, List<String> tweets) { BulkRequestBuilder requestBuilder = client.prepareBulk(); for(int i = 0; i < tweetIds.size(); i++) { XContentBuilder builder; try { builder = jsonBuilder() .startObject() .field("text", tweets.get(i)) .field("id", tweetIds.get(i)) .endObject(); } catch (IOException e) { continue; } IndexRequestBuilder request = client.prepareIndex("hackaton", "tweets") .setIndex("hackaton") .setType("tweets") .setSource(builder); requestBuilder.add(request); } BulkResponse bulkResponse = requestBuilder.execute().actionGet(); int items = bulkResponse.getItems().length; System.err.print("indexed [" + items + "] items, with failures? [" + bulkResponse.hasFailures() + "]"); } /** * It basically searches and returns a set of tweet ids for a given keyword. * * @param keyword * @return */ public Set<String> search(String keyword) { SearchResponse response; try { response = client.prepareSearch() .setIndices("hackaton") .setTypes("tweets") .addFields("id", "text") .setQuery(QueryBuilders.fieldQuery("text", keyword)).execute().actionGet(); } catch (Throwable e) { return new HashSet<String>(); } Set<String> result = new HashSet<String>(); for (SearchHit hit : response.getHits()) { Long id = hit.field("id").<Long>getValue(); result.add(String.valueOf(id)); } return result; } }