java source code of ElasticsearchClient

/**
 *  ElasticsearchClient
 *  Copyright 18.02.2016 by Michael Peter Christen, @0rb1t3r
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General private
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General private License for more details.
 *
 *  You should have received a copy of the GNU Lesser General private License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package net.yacy.grid.io.index;

import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.apache.lucene.search.Explanation;
import org.elasticsearch.action.DocWriteResponse;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.stats.ClusterStatsAction;
import org.elasticsearch.action.admin.cluster.stats.ClusterStatsNodes;
import org.elasticsearch.action.admin.cluster.stats.ClusterStatsRequest;
import org.elasticsearch.action.admin.cluster.stats.ClusterStatsRequestBuilder;
import org.elasticsearch.action.admin.cluster.stats.ClusterStatsResponse;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.get.MultiGetItemResponse;
import org.elasticsearch.action.get.MultiGetRequestBuilder;
import org.elasticsearch.action.get.MultiGetResponse;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.NoNodeAvailableException;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;

import net.yacy.grid.mcp.Data;

/**
 * To get data out of the elasticsearch index which is written with this client, try:
 * http://localhost:9200/web/_search?q=*:*
 * http://localhost:9200/crawler/_search?q=*:*
 *
 */
public class ElasticsearchClient {

    private static final TimeValue scrollKeepAlive = TimeValue.timeValueSeconds(60);
    private static long throttling_time_threshold = 2000L; // update time high limit
    private static long throttling_ops_threshold = 1000L; // messages per second low limit
    private static double throttling_factor = 1.0d; // factor applied on update duration if both thresholds are passed

    private String[] addresses;
    private String clusterName;
    private Client elasticsearchClient;

    /**
     * create a elasticsearch transport client (remote elasticsearch)
     * @param addresses an array of host:port addresses
     * @param clusterName
     */
    public ElasticsearchClient(final String[] addresses, final String clusterName) {
        Data.logger.info("ElasticsearchClient initiated client, address: " + addresses[0] + ", clusterName: " + clusterName);
        this.addresses = addresses;
        this.clusterName = clusterName;
        connect();
    }

    private void connect() {
        // create default settings and add cluster name
        Settings.Builder settings = Settings.builder()
                .put("cluster.routing.allocation.enable", "all")
                .put("cluster.routing.allocation.allow_rebalance", "always");
        if (clusterName != null) settings.put("cluster.name", this.clusterName);

        // create a client
        System.setProperty("es.set.netty.runtime.available.processors", "false"); // patch which prevents io.netty.util.NettyRuntime$AvailableProcessorsHolder.setAvailableProcessors from failing
        TransportClient newClient = null;
        while (true) try {
        	newClient = new PreBuiltTransportClient(settings.build());
            break;
        } catch (Exception e) {
            Data.logger.warn("failed to create an elastic client, retrying...", e);
            try { Thread.sleep(10000); } catch (InterruptedException e1) {}
        }

        for (String address: addresses) {
            String a = address.trim();
            int p = a.indexOf(':');
            if (p >= 0) try {
                InetAddress i = InetAddress.getByName(a.substring(0, p));
                int port = Integer.parseInt(a.substring(p + 1));
                //tc.addTransportAddress(new InetSocketTransportAddress(i, port));
                newClient.addTransportAddress(new TransportAddress(i, port));
            } catch (UnknownHostException e) {
                Data.logger.warn("", e);
            }
        }

        // replace old client with new client
        final Client oldClient = this.elasticsearchClient;
        this.elasticsearchClient = newClient; // just switch out without closeing the old one first
        // because closing may cause blocking, we close this concurrently
        if (oldClient != null) new Thread() {
            public void run() {
                this.setName("temporary client close job " + clusterName);
                try {
                    oldClient.close();
                } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {}
            }
        }.start();
    }

    @SuppressWarnings("unused")
    private ClusterStatsNodes getClusterStatsNodes() {
        ClusterStatsRequest clusterStatsRequest =
            new ClusterStatsRequestBuilder(elasticsearchClient.admin().cluster(), ClusterStatsAction.INSTANCE).request();
        ClusterStatsResponse clusterStatsResponse =
            elasticsearchClient.admin().cluster().clusterStats(clusterStatsRequest).actionGet();
        ClusterStatsNodes clusterStatsNodes = clusterStatsResponse.getNodesStats();
        return clusterStatsNodes;
    }

    private boolean clusterReadyCache = false;

    @SuppressWarnings("unused")
    private boolean clusterReady() {
        if (clusterReadyCache) return true;
        ClusterHealthResponse chr = elasticsearchClient.admin().cluster().prepareHealth().get();
        clusterReadyCache = chr.getStatus() != ClusterHealthStatus.RED;
        return clusterReadyCache;
    }

    @SuppressWarnings("unused")
    private boolean wait_ready(long maxtimemillis, ClusterHealthStatus status) {
        // wait for yellow status
        long start = System.currentTimeMillis();
        boolean is_ready;
        do {
            // wait for yellow status
            ClusterHealthResponse health = elasticsearchClient.admin().cluster().prepareHealth().setWaitForStatus(status).execute().actionGet();
            is_ready = !health.isTimedOut();
            if (!is_ready && System.currentTimeMillis() - start > maxtimemillis) return false; 
        } while (!is_ready);
        return is_ready;
    }

    public void refresh(String indexName) {
        new RefreshRequest(indexName);
    }

    public void settings(String indexName) {
        UpdateSettingsRequest request = new UpdateSettingsRequest(indexName);
        String settingKey = "index.mapping.total_fields.limit";
        int settingValue = 10000;
        Settings.Builder settingsBuilder =
                Settings.builder()
                .put(settingKey, settingValue);
        request.settings(settingsBuilder); 
        CreateIndexRequest updateSettingsResponse =
                this.elasticsearchClient.admin().indices().prepareCreate(indexName).setSettings(settingsBuilder).request();
    }
    
    /**
     * create a new index. This method must be called to ensure that an elasticsearch index is available and can be used.
     * @param indexName
     * @param shards
     * @param replicas
     * @throws NoNodeAvailableException | IllegalStateException in case that no elasticsearch server can be contacted.
     */
    public void createIndexIfNotExists(String indexName, final int shards, final int replicas) {
        // create an index if not existent
        if (!this.elasticsearchClient.admin().indices().prepareExists(indexName).execute().actionGet().isExists()) {
            Settings.Builder settings = Settings.builder()
                    .put("number_of_shards", shards)
                    .put("number_of_replicas", replicas);
            this.elasticsearchClient.admin().indices().prepareCreate(indexName)
                .setSettings(settings)
                .execute().actionGet();
        } else {
            //LOGGER.debug("Index with name {} already exists", indexName);
        }
    }

    public void setMapping(String indexName, String mapping) {
        try {
            this.elasticsearchClient.admin().indices().preparePutMapping(indexName)
                .setSource(mapping, XContentType.JSON)
                .setType("_default_").execute().actionGet();
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.warn("", e);
        };
    }

    /**
     * Close the connection to the remote elasticsearch client. This should only be called when the application is
     * terminated.
     * Please avoid to open and close the ElasticsearchClient for the same cluster and index more than once.
     * To avoid that this method is called more than once, the elasticsearch_client object is set to null
     * as soon this was called the first time. This is needed because the finalize method calls this
     * method as well.
     */
    public void close() {
        if (this.elasticsearchClient != null) {
            this.elasticsearchClient.close();
            this.elasticsearchClient = null;
        }
    }

    /**
     * A finalize method is added to ensure that close() is always called.
     */
    public void finalize() {
        this.close(); // will not cause harm if this is the second call to close()
    }

    /**
     * Retrieve a statistic object from the connected elasticsearch cluster
     * 
     * @return cluster stats from connected cluster
     */
    @SuppressWarnings("unused")
    private ClusterStatsNodes getStats() {
        final ClusterStatsRequest clusterStatsRequest =
            new ClusterStatsRequestBuilder(elasticsearchClient.admin().cluster(), ClusterStatsAction.INSTANCE).request();
        final ClusterStatsResponse clusterStatsResponse =
            elasticsearchClient.admin().cluster().clusterStats(clusterStatsRequest).actionGet();
        final ClusterStatsNodes clusterStatsNodes = clusterStatsResponse.getNodesStats();
        return clusterStatsNodes;
    }

    /**
     * Get the number of documents in the search index
     * 
     * @return the count of all documents in the index
     */
    private long count(String indexName) {
        return count(QueryBuilders.constantScoreQuery(QueryBuilders.matchAllQuery()), indexName);
    }

    /**
     * Get the number of documents in the search index for a given search query
     * 
     * @param q
     *            the query
     * @return the count of all documents in the index which matches with the query
     */
    public long count(final QueryBuilder q, final String indexName) {
        while (true) try {
            return countInternal(q, indexName);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient count failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
        }
    }

    public long countInternal(final QueryBuilder q, final String indexName) {
        SearchResponse response = elasticsearchClient.prepareSearch(indexName).setQuery(q).setSize(0).execute().actionGet();
        return response.getHits().getTotalHits();
    }

    /**
     * Get the document for a given id.
     * @param indexName the name of the index
     * @param id the unique identifier of a document
     * @return the document, if it exists or null otherwise;
     */
    public boolean exist(String indexName, final String id) {
        while (true) try {
            return existInternal(indexName, id);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient exist failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
        }
    }

    public boolean existInternal(String indexName, final String id) {
        GetResponse getResponse = elasticsearchClient
                .prepareGet(indexName, null, id)
                .setFetchSource(false)
                //.setOperationThreaded(false)
                .execute()
                .actionGet();
        return getResponse.isExists();
    }

    public Set<String> existBulk(String indexName, final Collection<String> ids) {
        while (true) try {
            return existBulkInternal(indexName, ids);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient existBulk failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
            continue;
        }
    }

    private Set<String> existBulkInternal(String indexName, final Collection<String> ids) {
        if (ids == null || ids.size() == 0) return new HashSet<>();
        MultiGetResponse multiGetItemResponses = elasticsearchClient.prepareMultiGet()
                .add(indexName, null, ids)
                .get();
        Set<String> er = new HashSet<>();
        for (MultiGetItemResponse itemResponse : multiGetItemResponses) { 
            GetResponse response = itemResponse.getResponse();
            if (response.isExists()) {
                er.add(response.getId());
            }
        }
        return er;
    }

    /**
     * Get the type name of a document or null if the document does not exist.
     * This is a replacement of the exist() method which does exactly the same as exist()
     * but is able to return the type name in case that exist is successful.
     * Please read the comment to exist() for details.
     * @param indexName
     *            the name of the index
     * @param id
     *            the unique identifier of a document
     * @return the type name of the document if it exists, null otherwise
     */
    @SuppressWarnings("unused")
    private String getType(String indexName, final String id) {
        GetResponse getResponse = elasticsearchClient.prepareGet(indexName, null, id).execute().actionGet();
        return getResponse.isExists() ? getResponse.getType() : null;
    }

    /**
     * Delete a document for a given id.
     * ATTENTION: deleted documents cannot be re-inserted again if version number
     * checking is used and the new document does not comply to the version number
     * rule. The information which document was deleted persists for one minute and
     * then inserting documents with the same version number as before is possible.
     * To modify this behavior, change the configuration setting index.gc_deletes
     * 
     * @param id
     *            the unique identifier of a document
     * @return true if the document existed and was deleted, false otherwise
     */
    public boolean delete(String indexName, String typeName, final String id) {
        while (true) try {
            return deleteInternal(indexName, typeName, id);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient delete failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
            continue;
        }
    }

    private boolean deleteInternal(String indexName, String typeName, final String id) {
        DeleteResponse response = elasticsearchClient.prepareDelete(indexName, typeName, id).get();
        return response.getResult() == DocWriteResponse.Result.DELETED;
    }

    /**
     * Delete documents using a query. Check what would be deleted first with a normal search query!
     * Elasticsearch once provided a native prepareDeleteByQuery method, but this was removed
     * in later versions. Instead, there is a plugin which iterates over search results,
     * see https://www.elastic.co/guide/en/elasticsearch/plugins/current/plugins-delete-by-query.html
     * We simulate the same behaviour here without the need of that plugin.
     * 
     * @param q
     * @return delete document count
     */
    public int deleteByQuery(String indexName, final QueryBuilder q) {
        while (true) try {
            return deleteByQueryInternal(indexName, q);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient deleteByQuery failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
            continue;
        }
    }

    private int deleteByQueryInternal(String indexName, final QueryBuilder q) {
        Map<String, String> ids = new TreeMap<>();
        SearchRequestBuilder request = elasticsearchClient.prepareSearch(indexName);
        request
            .setSearchType(SearchType.QUERY_THEN_FETCH)
            .setScroll(scrollKeepAlive)
            .setQuery(q)
            .setSize(100);
        SearchResponse response = request.execute().actionGet();
        while (true) {
            // accumulate the ids here, don't delete them right now to prevent an interference of the delete with the
            // scroll
            for (SearchHit hit : response.getHits().getHits()) {
                ids.put(hit.getId(), hit.getType());
            }
            // termination
            if (response.getHits().getHits().length == 0) break;
            // scroll
            response = elasticsearchClient.prepareSearchScroll(response.getScrollId()).setScroll(scrollKeepAlive).execute().actionGet();
        }
        return deleteBulk(indexName, ids);
    }

    /**
     * Delete a list of documents for a given set of ids
     * ATTENTION: read about the time-out of version number checking in the method above.
     * 
     * @param ids
     *            a map from the unique identifier of a document to the document type
     * @return the number of deleted documents
     */
    private int deleteBulk(String indexName, Map<String, String> ids) {
        // bulk-delete the ids
        if (ids == null || ids.size() == 0) return 0;
        BulkRequestBuilder bulkRequest = elasticsearchClient.prepareBulk();
        for (Map.Entry<String, String> id : ids.entrySet()) {
            bulkRequest.add(new DeleteRequest().id(id.getKey()).index(indexName).type(id.getValue()));
        }
        bulkRequest.execute().actionGet();
        return ids.size();
    }

    /**
     * Read a document from the search index for a given id.
     * This is the cheapest document retrieval from the '_source' field because
     * elasticsearch does not do any json transformation or parsing. We
     * get simply the text from the '_source' field. This might be useful to
     * make a dump from the index content.
     * 
     * @param id
     *            the unique identifier of a document
     * @return the document as source text
     */
    @SuppressWarnings("unused")
    private byte[] readSource(String indexName, final String id) {
        GetResponse response = elasticsearchClient.prepareGet(indexName, null, id).execute().actionGet();
        return response.getSourceAsBytes();
    }

    /**
     * Read a json document from the search index for a given id.
     * Elasticsearch reads the '_source' field and parses the content as json.
     * 
     * @param id
     *            the unique identifier of a document
     * @return the document as json, matched on a Map<String, Object> object instance
     */
    public Map<String, Object> readMap(final String indexName, final String id) {
        while (true) try {
            return readMapInternal(indexName, id);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient readMap failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
            continue;
        }
    }

    private Map<String, Object> readMapInternal(final String indexName, final String id) {
        GetResponse response = elasticsearchClient.prepareGet(indexName, null, id).execute().actionGet();
        Map<String, Object> map = getMap(response);
        return map;
    }

    public Map<String, Map<String, Object>> readMapBulk(final String indexName, final Collection<String> ids) {
        while (true) try {
            return readMapBulkInternal(indexName, ids);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient readMapBulk failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
            continue;
        }
    }

    private Map<String, Map<String, Object>> readMapBulkInternal(final String indexName, final Collection<String> ids) {
        MultiGetRequestBuilder mgrb = elasticsearchClient.prepareMultiGet();
        ids.forEach(id -> mgrb.add(indexName, null, id).execute().actionGet());
        MultiGetResponse response = mgrb.execute().actionGet();
        Map<String, Map<String, Object>> bulkresponse = new HashMap<>();
        for (MultiGetItemResponse r: response.getResponses()) {
            GetResponse gr = r.getResponse();
            if (gr != null) {
                Map<String, Object> map = getMap(gr);
                bulkresponse.put(r.getId(), map);
            }
        }
        return bulkresponse;
    }

    protected static Map<String, Object> getMap(GetResponse response) {
        Map<String, Object> map = null;
        if (response.isExists() && (map = response.getSourceAsMap()) != null) {
            if (!map.containsKey("id")) map.put("id", response.getId());
            if (!map.containsKey("type")) map.put("type", response.getType());
        }
        return map;
    }

    /**
     * Write a json document into the search index. The id must be calculated by the calling environment.
     * This id should be unique for the json. The best way to calculate this id is, to use an existing
     * field from the jsonMap which contains a unique identifier for the jsonMap.
     * 
     * @param indexName the name of the index
     * @param typeName the type of the index
     * @param id the unique identifier of a document
     * @param jsonMap the json document to be indexed in elasticsearch
     * @return true if the document with given id did not exist before, false if it existed and was overwritten
     */
    public boolean writeMap(String indexName, String typeName, String id, final Map<String, Object> jsonMap) {
        while (true) try {
            return writeMapInternal(indexName, typeName, id, jsonMap);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient writeMap failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
            continue;
        }
    }

    // internal method used for a re-try after NoNodeAvailableException | IllegalStateException
    private boolean writeMapInternal(String indexName, String typeName, String id, final Map<String, Object> jsonMap) {
        long start = System.currentTimeMillis();
        // get the version number out of the json, if any is given
        Long version = (Long) jsonMap.remove("_version");
        // put this to the index
        UpdateResponse r = elasticsearchClient
            .prepareUpdate(indexName, typeName, id)
            .setDoc(jsonMap)
            .setUpsert(jsonMap)
            //.setVersion(version == null ? 1 : version.longValue())
            //.setVersionType(VersionType.EXTERNAL_GTE)
            .execute()
            .actionGet();
        if (version != null) jsonMap.put("_version", version); // to prevent side effects
        // documentation about the versioning is available at
        // https://www.elastic.co/blog/elasticsearch-versioning-support
        // TODO: error handling
        boolean created = r != null && r.status() == RestStatus.CREATED; // true means created, false means updated
        long duration = Math.max(1, System.currentTimeMillis() - start);
        Data.logger.info("ElasticsearchClient write entry to index " + indexName + ": " + (created ? "created":"updated") + ", " + duration + " ms");
        return created;
    }

    /**
     * bulk message write
     * @param jsonMapList
     *            a list of json documents to be indexed
     * @param indexName
     *            the name of the index
     * @param typeName
     *            the type of the index
     * @return a list with error messages.
     *            The key is the id of the document, the value is an error string.
     *            The method was only successful if this list is empty.
     *            This must be a list, because keys may appear several times.
     */
    public BulkWriteResult writeMapBulk(final String indexName, final List<BulkEntry> jsonMapList) {
        while (true) try {
            return writeMapBulkInternal(indexName, jsonMapList);
        } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
            Data.logger.info("ElasticsearchClient writeMapBulk failed with " + e.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException ee) {}
            connect();
            continue;
        }
    }

    private BulkWriteResult writeMapBulkInternal(final String indexName, final List<BulkEntry> jsonMapList) {
        long start = System.currentTimeMillis();
        BulkRequestBuilder bulkRequest = elasticsearchClient.prepareBulk();
        for (BulkEntry be: jsonMapList) {
            if (be.id == null) continue;
            bulkRequest.add(
                    elasticsearchClient.prepareIndex(indexName, be.type, be.id).setSource(be.jsonMap)
                        .setCreate(false) // enforces OpType.INDEX
                        .setVersionType(VersionType.INTERNAL));
        }
        BulkResponse bulkResponse = bulkRequest.get();
        BulkWriteResult result = new BulkWriteResult();
        for (BulkItemResponse r: bulkResponse.getItems()) {
            String id = r.getId();
            DocWriteResponse response = r.getResponse();
            if (response == null) {
                String err = r.getFailureMessage();
                if (err != null) {
                    result.errors.put(id, err);
                }
            } else {
                if (response.getResult() == DocWriteResponse.Result.CREATED) result.created.add(id);
            }
        }
        long duration = Math.max(1, System.currentTimeMillis() - start);
        long regulator = 0;
        int created = result.created.size();
        long ops = created * 1000 / duration;
        if (duration > throttling_time_threshold && ops < throttling_ops_threshold) {
            regulator = (long) (throttling_factor * duration);
            try {Thread.sleep(regulator);} catch (InterruptedException e) {}
        }
        Data.logger.info("ElasticsearchClient write bulk to index " + indexName + ": " + jsonMapList.size() + " entries, " + result.created.size() + " created, " + result.errors.size() + " errors, " + duration + " ms" + (regulator == 0 ? "" : ", throttled with " + regulator + " ms") + ", " + ops + " objects/second");
        return result;
    }

    public static class BulkWriteResult {
        private Map<String, String> errors;
        private Set<String> created;
        public BulkWriteResult() {
            this.errors = new LinkedHashMap<>();
            this.created = new LinkedHashSet<>();
        }
        public Map<String, String> getErrors() {
            return this.errors;
        }
        public Set<String> getCreated() {
            return this.created;
        }
    }

    private final static DateTimeFormatter utcFormatter = ISODateTimeFormat.dateTime().withZoneUTC();

    public static class BulkEntry {
        private String id;
        private String type;
        //private Long version;
        private Map<String, Object> jsonMap;

        /**
         * initialize entry for bulk writes
         * @param id the id of the entry
         * @param type the type name
         * @param timestamp_fieldname the name of the timestamp field, null for unused. If a name is given here, then this field is filled with the current time
         * @param version the version number >= 0 for external versioning or null for forced updates without versioning
         * @param jsonMap the payload object
         */
        public BulkEntry(final String id, final String type, final String timestamp_fieldname, final Map<String, Object> jsonMap) {
            this.id = id;
            this.type = type;
            //this.version = version;
            this.jsonMap = jsonMap;
            if (timestamp_fieldname != null && !this.jsonMap.containsKey(timestamp_fieldname)) this.jsonMap.put(timestamp_fieldname, utcFormatter.print(System.currentTimeMillis()));
        }
    }

    public Query query(final String indexName, final QueryBuilder queryBuilder, final QueryBuilder postFilter, final Sort sort, final HighlightBuilder hb, int timezoneOffset, int from, int resultCount, int aggregationLimit, boolean explain, WebMapping... aggregationFields) {
        Exception ee = null;
        while (true) {
            for (int t = 0; t < 10; t++) try {
                return new Query(indexName,  queryBuilder, postFilter, sort, hb, timezoneOffset, from, resultCount, aggregationLimit, explain, aggregationFields);
            } catch (NoNodeAvailableException | IllegalStateException | ClusterBlockException | SearchPhaseExecutionException e) {
                ee = e;
                Data.logger.info("ElasticsearchClient query failed with " + e.getMessage() + ", retrying attempt " + t + " ...");
                try {Thread.sleep(100);} catch (InterruptedException eee) {}
                continue;
            }
            Data.logger.info("ElasticsearchClient query failed with " + ee.getMessage() + ", retrying to connect node...");
            try {Thread.sleep(1000);} catch (InterruptedException eee) {}
            connect();
            continue;
        }
    }

    public class Query {
        public List<Map<String, Object>> results;
        public List<String> explanations;
        public List<Map<String, HighlightField>> highlights;
        public int hitCount;
        public Map<String, List<Map.Entry<String, Long>>> aggregations;

        /**
         * Searches using a elasticsearch query.
         * @param indexName the name of the search index
         * @param queryBuilder a query for the search
         * @param postFilter a filter that does not affect aggregations
         * @param timezoneOffset - an offset in minutes that is applied on dates given in the query of the form since:date until:date
         * @param from - a filter that is applied on the document date and excludes all documents older than from
         * @param resultCount - the number of messages in the result; can be zero if only aggregations are wanted
         * @param aggregationLimit - the maximum count of facet entities, not search results
         * @param aggregationFields - names of the aggregation fields. If no aggregation is wanted, pass no (zero) field(s)
         */
        private Query(final String indexName, final QueryBuilder queryBuilder, final QueryBuilder postFilter, final Sort sort, final HighlightBuilder hb, int timezoneOffset, int from, int resultCount, int aggregationLimit, boolean explain, WebMapping... aggregationFields) {
            // prepare request
            SearchRequestBuilder request = elasticsearchClient.prepareSearch(indexName);
            request
                    .setExplain(explain)
                    .setSearchType(SearchType.QUERY_THEN_FETCH)
                    .setQuery(queryBuilder)
                    .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) // DFS_QUERY_THEN_FETCH is slower but provides stability of search results
                    .setFrom(from)
                    .setSize(resultCount);
            if (hb != null) request.highlighter(hb);
            //HighlightBuilder hb = new HighlightBuilder().field("message").preTags("<foo>").postTags("<bar>");
            if (postFilter != null) request.setPostFilter(postFilter);
            request.clearRescorers();
            for (WebMapping field: aggregationFields) {
                request.addAggregation(AggregationBuilders.terms(field.getMapping().name()).field(field.getMapping().name()).minDocCount(1).size(aggregationLimit));
            }
            // apply sort
            request = sort.sort(request);
            // get response
            SearchResponse response = request.execute().actionGet();
            SearchHits searchHits = response.getHits();
            hitCount = (int) searchHits.getTotalHits();

            // evaluate search result
            //long totalHitCount = response.getHits().getTotalHits();
            SearchHit[] hits = searchHits.getHits();
            this.results = new ArrayList<Map<String, Object>>(hitCount);
            this.explanations = new ArrayList<String>(hitCount);
            this.highlights = new ArrayList<Map<String, HighlightField>>(hitCount);
            for (SearchHit hit: hits) {
                Map<String, Object> map = hit.getSourceAsMap();
                if (!map.containsKey("id")) map.put("id", hit.getId());
                if (!map.containsKey("type")) map.put("type", hit.getType());
                this.results.add(map);
                this.highlights.add(hit.getHighlightFields());
                if (explain) {
                    Explanation explanation = hit.getExplanation();
                    this.explanations.add(explanation.toString());
                } else {
                    this.explanations.add("");
                }
            }

            // evaluate aggregation
            // collect results: fields
            this.aggregations = new HashMap<>();
            for (WebMapping field: aggregationFields) {
                Terms fieldCounts = response.getAggregations().get(field.getMapping().name());
                List<? extends Bucket> buckets = fieldCounts.getBuckets();
                // aggregate double-tokens (matching lowercase)
                Map<String, Long> checkMap = new HashMap<>();
                for (Bucket bucket: buckets) {
                    String key = bucket.getKeyAsString().trim();
                    if (key.length() > 0) {
                        String k = key.toLowerCase();
                        Long v = checkMap.get(k);
                        checkMap.put(k, v == null ? bucket.getDocCount() : v + bucket.getDocCount());
                    }
                }
                ArrayList<Map.Entry<String, Long>> list = new ArrayList<>(buckets.size());
                for (Bucket bucket: buckets) {
                    String key = bucket.getKeyAsString().trim();
                    if (key.length() > 0) {
                        Long v = checkMap.remove(key.toLowerCase());
                        if (v == null) continue;
                        list.add(new AbstractMap.SimpleEntry<String, Long>(key, v));
                    }
                }
                aggregations.put(field.getMapping().name(), list);
                //if (field.equals("place_country")) {
                    // special handling of country aggregation: add the country center as well
                //}
            }
        }
    }

    @SuppressWarnings("unused")
    private List<Map<String, Object>> queryWithConstraints(final String indexName, final String fieldName, final String fieldValue, final Map<String, String> constraints, boolean latest) throws IOException {
        SearchRequestBuilder request = this.elasticsearchClient.prepareSearch(indexName)
                .setSearchType(SearchType.QUERY_THEN_FETCH)
                .setFrom(0);

        BoolQueryBuilder bFilter = QueryBuilders.boolQuery();
        bFilter.must(QueryBuilders.constantScoreQuery(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery(fieldName, fieldValue))));
        for (Object o : constraints.entrySet()) {
            @SuppressWarnings("rawtypes")
            Map.Entry entry = (Map.Entry) o;
            bFilter.must(QueryBuilders.constantScoreQuery(QueryBuilders.termQuery((String) entry.getKey(), ((String) entry.getValue()).toLowerCase())));
        }
        request.setQuery(bFilter);

        // get response
        SearchResponse response = request.execute().actionGet();

        // evaluate search result
        ArrayList<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
        SearchHit[] hits = response.getHits().getHits();
        for (SearchHit hit: hits) {
            Map<String, Object> map = hit.getSourceAsMap();
            result.add(map);
        }

        return result;
    }

    public static void main(String[] args) {
        ElasticsearchClient client = new ElasticsearchClient(new String[]{"localhost:9300"}, "");
        // check access
        client.createIndexIfNotExists("test", 1, 0);
        System.out.println(client.count("test"));
        // upload a schema
        try {
            String mapping = new String(Files.readAllBytes(Paths.get("conf/mappings/web.json")));
            client.setMapping("test", mapping);
        } catch (IOException e) {
            Data.logger.warn("", e);
        }

        client.close();
    }
}