/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;

@SuppressCodecs({ "SimpleText", "Direct" })
public class TestLongPostings extends LuceneTestCase {

  // Produces a realistic unicode random string that
  // survives MockAnalyzer unchanged:
  private String getRandomTerm(String other) throws IOException {
    Analyzer a = new MockAnalyzer(random());
    while(true) {
      String s = TestUtil.randomRealisticUnicodeString(random());
      if (other != null && s.equals(other)) {
        continue;
      }
      try (TokenStream ts = a.tokenStream("foo", s)) {
        final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
        ts.reset();

        int count = 0;
        boolean changed = false;

        while(ts.incrementToken()) {
          final BytesRef termBytes = termAtt.getBytesRef();
          if (count == 0 && !termBytes.utf8ToString().equals(s)) {
            // The value was changed during analysis.  Keep iterating so the
            // tokenStream is exhausted.
            changed = true;
          }
          count++;
        }

        ts.end();
        // Did we iterate just once and the value was unchanged?
        if (!changed && count == 1) {
          return s;
        }
      }
    }
  }

  public void testLongPostings() throws Exception {
    // Don't use _TestUtil.getTempDir so that we own the
    // randomness (ie same seed will point to same dir):
    Directory dir = newFSDirectory(createTempDir("longpostings" + "." + random().nextLong()));

    final int NUM_DOCS = atLeast(1000);

    if (VERBOSE) {
      System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
    }

    final String s1 = getRandomTerm(null);
    final String s2 = getRandomTerm(s1);

    if (VERBOSE) {
      System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
      /*
      for(int idx=0;idx<s1.length();idx++) {
        System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
      }
      for(int idx=0;idx<s2.length();idx++) {
        System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
      }
      */
    }

    final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
    for(int idx=0;idx<NUM_DOCS;idx++) {
      if (random().nextBoolean()) {
        isS1.set(idx);
      }
    }

    final IndexReader r;
    final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
      .setMergePolicy(newLogMergePolicy());
    iwc.setRAMBufferSizeMB(16.0 + 16.0 * random().nextDouble());
    iwc.setMaxBufferedDocs(-1);
    final RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);

    for(int idx=0;idx<NUM_DOCS;idx++) {
      final Document doc = new Document();
      String s = isS1.get(idx) ? s1 : s2;
      final Field f = newTextField("field", s, Field.Store.NO);
      final int count = TestUtil.nextInt(random(), 1, 4);
      for(int ct=0;ct<count;ct++) {
        doc.add(f);
      }
      riw.addDocument(doc);
    }

    r = riw.getReader();
    riw.close();

    /*
    if (VERBOSE) {
      System.out.println("TEST: terms");
      TermEnum termEnum = r.terms();
      while(termEnum.next()) {
        System.out.println("  term=" + termEnum.term() + " len=" + termEnum.term().text().length());
        assertTrue(termEnum.docFreq() > 0);
        System.out.println("    s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
        System.out.println("    s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
        final String s = termEnum.term().text();
        for(int idx=0;idx<s.length();idx++) {
          System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
        }
      }
    }
    */

    assertEquals(NUM_DOCS, r.numDocs());
    assertTrue(r.docFreq(new Term("field", s1)) > 0);
    assertTrue(r.docFreq(new Term("field", s2)) > 0);

    int num = atLeast(1000);
    for(int iter=0;iter<num;iter++) {

      final String term;
      final boolean doS1;
      if (random().nextBoolean()) {
        term = s1;
        doS1 = true;
      } else {
        term = s2;
        doS1 = false;
      }

      if (VERBOSE) {
        System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1);
      }
        
      final PostingsEnum postings = MultiTerms.getTermPostingsEnum(r, "field", new BytesRef(term));

      int docID = -1;
      while(docID < DocIdSetIterator.NO_MORE_DOCS) {
        final int what = random().nextInt(3);
        if (what == 0) {
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do next()");
          }
          // nextDoc
          int expected = docID+1;
          while(true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }
          docID = postings.nextDoc();
          if (VERBOSE) {
            System.out.println("  got docID=" + docID);
          }
          assertEquals(expected, docID);
          if (docID == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }

          if (random().nextInt(6) == 3) {
            if (VERBOSE) {
              System.out.println("    check positions");
            }
            final int freq = postings.freq();
            assertTrue(freq >=1 && freq <= 4);
            for(int pos=0;pos<freq;pos++) {
              assertEquals(pos, postings.nextPosition());
              if (random().nextBoolean()) {
                postings.getPayload();
                if (random().nextBoolean()) {
                  postings.getPayload(); // get it again
                }
              }
            }
          }
        } else {
          // advance
          final int targetDocID;
          if (docID == -1) {
            targetDocID = random().nextInt(NUM_DOCS+1);
          } else {
            targetDocID = docID + TestUtil.nextInt(random(), 1, NUM_DOCS - docID);
          }
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
          }
          int expected = targetDocID;
          while(true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }
          
          docID = postings.advance(targetDocID);
          if (VERBOSE) {
            System.out.println("  got docID=" + docID);
          }
          assertEquals(expected, docID);
          if (docID == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }
          
          if (random().nextInt(6) == 3) {
            final int freq = postings.freq();
            assertTrue(freq >=1 && freq <= 4);
            for(int pos=0;pos<freq;pos++) {
              assertEquals(pos, postings.nextPosition());
              if (random().nextBoolean()) {
                postings.getPayload();
                if (random().nextBoolean()) {
                  postings.getPayload(); // get it again
                }
              }
            }
          }
        }
      }
    }
    r.close();
    dir.close();
  }
  
  // a weaker form of testLongPostings, that doesnt check positions
  public void testLongPostingsNoPositions() throws Exception {
    doTestLongPostingsNoPositions(IndexOptions.DOCS);
    doTestLongPostingsNoPositions(IndexOptions.DOCS_AND_FREQS);
  }
  
  public void doTestLongPostingsNoPositions(IndexOptions options) throws Exception {
    // Don't use _TestUtil.getTempDir so that we own the
    // randomness (ie same seed will point to same dir):
    Directory dir = newFSDirectory(createTempDir("longpostings" + "." + random().nextLong()));

    final int NUM_DOCS = atLeast(1000);

    if (VERBOSE) {
      System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
    }

    final String s1 = getRandomTerm(null);
    final String s2 = getRandomTerm(s1);

    if (VERBOSE) {
      System.out.println("\nTEST: s1=" + s1 + " s2=" + s2);
      /*
      for(int idx=0;idx<s1.length();idx++) {
        System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
      }
      for(int idx=0;idx<s2.length();idx++) {
        System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
      }
      */
    }

    final FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);
    for(int idx=0;idx<NUM_DOCS;idx++) {
      if (random().nextBoolean()) {
        isS1.set(idx);
      }
    }

    final IndexReader r;
    if (true) { 
      final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
        .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        .setMergePolicy(newLogMergePolicy());
      iwc.setRAMBufferSizeMB(16.0 + 16.0 * random().nextDouble());
      iwc.setMaxBufferedDocs(-1);
      final RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);

      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
      ft.setIndexOptions(options);
      for(int idx=0;idx<NUM_DOCS;idx++) {
        final Document doc = new Document();
        String s = isS1.get(idx) ? s1 : s2;
        final Field f = newField("field", s, ft);
        final int count = TestUtil.nextInt(random(), 1, 4);
        for(int ct=0;ct<count;ct++) {
          doc.add(f);
        }
        riw.addDocument(doc);
      }

      r = riw.getReader();
      riw.close();
    } else {
      r = DirectoryReader.open(dir);
    }

    /*
    if (VERBOSE) {
      System.out.println("TEST: terms");
      TermEnum termEnum = r.terms();
      while(termEnum.next()) {
        System.out.println("  term=" + termEnum.term() + " len=" + termEnum.term().text().length());
        assertTrue(termEnum.docFreq() > 0);
        System.out.println("    s1?=" + (termEnum.term().text().equals(s1)) + " s1len=" + s1.length());
        System.out.println("    s2?=" + (termEnum.term().text().equals(s2)) + " s2len=" + s2.length());
        final String s = termEnum.term().text();
        for(int idx=0;idx<s.length();idx++) {
          System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
        }
      }
    }
    */

    assertEquals(NUM_DOCS, r.numDocs());
    assertTrue(r.docFreq(new Term("field", s1)) > 0);
    assertTrue(r.docFreq(new Term("field", s2)) > 0);

    int num = atLeast(1000);
    for(int iter=0;iter<num;iter++) {

      final String term;
      final boolean doS1;
      if (random().nextBoolean()) {
        term = s1;
        doS1 = true;
      } else {
        term = s2;
        doS1 = false;
      }

      if (VERBOSE) {
        System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term);
      }
        
      final PostingsEnum docs;
      final PostingsEnum postings;

      if (options == IndexOptions.DOCS) {
        docs = TestUtil.docs(random(), r, "field", new BytesRef(term), null, PostingsEnum.NONE);
        postings = null;
      } else {
        docs = postings = TestUtil.docs(random(), r, "field", new BytesRef(term), null, PostingsEnum.FREQS);
        assert postings != null;
      }
      assert docs != null;

      int docID = -1;
      while(docID < DocIdSetIterator.NO_MORE_DOCS) {
        final int what = random().nextInt(3);
        if (what == 0) {
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do next()");
          }
          // nextDoc
          int expected = docID+1;
          while(true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }
          docID = docs.nextDoc();
          if (VERBOSE) {
            System.out.println("  got docID=" + docID);
          }
          assertEquals(expected, docID);
          if (docID == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }

          if (random().nextInt(6) == 3 && postings != null) {
            final int freq = postings.freq();
            assertTrue(freq >=1 && freq <= 4);
          }
        } else {
          // advance
          final int targetDocID;
          if (docID == -1) {
            targetDocID = random().nextInt(NUM_DOCS+1);
          } else {
            targetDocID = docID + TestUtil.nextInt(random(), 1, NUM_DOCS - docID);
          }
          if (VERBOSE) {
            System.out.println("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
          }
          int expected = targetDocID;
          while(true) {
            if (expected == NUM_DOCS) {
              expected = Integer.MAX_VALUE;
              break;
            } else if (isS1.get(expected) == doS1) {
              break;
            } else {
              expected++;
            }
          }
          
          docID = docs.advance(targetDocID);
          if (VERBOSE) {
            System.out.println("  got docID=" + docID);
          }
          assertEquals(expected, docID);
          if (docID == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }
          
          if (random().nextInt(6) == 3 && postings != null) {
            final int freq = postings.freq();
            assertTrue("got invalid freq=" + freq, freq >=1 && freq <= 4);
          }
        }
      }
    }
    r.close();
    dir.close();
  }
}