/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.nutch.crawl;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.logging.Logger;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.apache.nutch.util.NutchConfiguration;

import junit.framework.TestCase;

public class TestLinkDbMerger extends TestCase {
  private static final Logger LOG = Logger.getLogger(TestLinkDbMerger.class.getName());
  
  String url10 = "http://example.com/foo";
  String[] urls10 = new String[] {
          "http://example.com/100",
          "http://example.com/101"
        };

  String url11 = "http://example.com/";
  String[] urls11 = new String[] {
          "http://example.com/110",
          "http://example.com/111"
        };
  
  String url20 = "http://example.com/";
  String[] urls20 = new String[] {
          "http://foo.com/200",
          "http://foo.com/201"
        };
  String url21 = "http://example.com/bar";
  String[] urls21 = new String[] {
          "http://foo.com/210",
          "http://foo.com/211"
        };
  
  String[] urls10_expected = urls10;
  String[] urls11_expected = new String[] {
          urls11[0],
          urls11[1],
          urls20[0],
          urls20[1]
  };
  String[] urls20_expected = urls11_expected;
  String[] urls21_expected = urls21;
  
  TreeMap init1 = new TreeMap();
  TreeMap init2 = new TreeMap();
  HashMap expected = new HashMap();
  Configuration conf;
  Path testDir;
  FileSystem fs;
  LinkDbReader reader;
  
  public void setUp() throws Exception {
    init1.put(url10, urls10);
    init1.put(url11, urls11);
    init2.put(url20, urls20);
    init2.put(url21, urls21);
    expected.put(url10, urls10_expected);
    expected.put(url11, urls11_expected);
    expected.put(url20, urls20_expected);
    expected.put(url21, urls21_expected);
    conf = NutchConfiguration.create();
    fs = FileSystem.get(conf);
    testDir = new Path("build/test/test-linkdb-" +
            new java.util.Random().nextInt());
    fs.mkdirs(testDir);
  }
  
  public void tearDown() {
    try {
      if (fs.exists(testDir))
        fs.delete(testDir, true);
    } catch (Exception e) { }
    try {
      reader.close();
    } catch (Exception e) { }
  }

  public void testMerge() throws Exception {
    Configuration conf = NutchConfiguration.create();
    FileSystem fs = FileSystem.get(conf);
    fs.mkdirs(testDir);
    Path linkdb1 = new Path(testDir, "linkdb1");
    Path linkdb2 = new Path(testDir, "linkdb2");
    Path output = new Path(testDir, "output");
    createLinkDb(conf, fs, linkdb1, init1);
    createLinkDb(conf, fs, linkdb2, init2);
    LinkDbMerger merger = new LinkDbMerger(conf);
    LOG.fine("* merging linkdbs to " + output);
    merger.merge(output, new Path[]{linkdb1, linkdb2}, false, false);
    LOG.fine("* reading linkdb: " + output);
    reader = new LinkDbReader(conf, output);
    Iterator it = expected.keySet().iterator();
    while (it.hasNext()) {
      String url = (String)it.next();
      LOG.fine("url=" + url);
      String[] vals = (String[])expected.get(url);
      Inlinks inlinks = reader.getInlinks(new Text(url));
      // may not be null
      assertNotNull(inlinks);
      ArrayList links = new ArrayList();
      Iterator it2 = inlinks.iterator();
      while (it2.hasNext()) {
        Inlink in = (Inlink)it2.next();
        links.add(in.getFromUrl());
      }
      for (int i = 0; i < vals.length; i++) {
        LOG.fine(" -> " + vals[i]);
        assertTrue(links.contains(vals[i]));
      }
    }
    reader.close();
    fs.delete(testDir, true);
  }
  
  private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap init) throws Exception {
    LOG.fine("* creating linkdb: " + linkdb);
    Path dir = new Path(linkdb, LinkDb.CURRENT_NAME);
    MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, Inlinks.class);
    Iterator it = init.keySet().iterator();
    while (it.hasNext()) {
      String key = (String)it.next();
      Inlinks inlinks = new Inlinks();
      String[] vals = (String[])init.get(key);
      for (int i = 0; i < vals.length; i++) {
        Inlink in = new Inlink(vals[i], vals[i]);
        inlinks.add(in);
      }
      writer.append(new Text(key), inlinks);
    }
    writer.close();
  }
}