Java Code Examples for org.apache.pig.pigunit.PigTest#runScript()

The following examples show how to use org.apache.pig.pigunit.PigTest#runScript() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ChaoShenEntropyTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test  
public void singleElemInputBagChaoShenEntropoyTest() throws Exception
{
  PigTest test = createPigTestFromString(entropy, "type=chaosh", "base=log");
  
  writeLinesToFile("input",
                   "98.94791");

  test.runScript();
  
  /* Add expected values, computed using R:
   * 
   * e.g.
   * 
   * > count=c(1)
   * > library(entropy)
   * > entropy(count,count/sum(count),c("CS"),c("log"))
   * [1] 0
   * 
   */
  List<Double> expectedOutput = new ArrayList<Double>();
  expectedOutput.add(0.0);
  
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  verifyEqualEntropyOutput(expectedOutput, output, 5);
}
 
Example 2
Source File: QuantileTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void quantile3Test() throws Exception {
  PigTest test = createPigTestFromString(quantileTest,
                               "QUANTILES='0.0013','0.0228','0.1587','0.5','0.8413','0.9772','0.9987'");

  List<String> input = new ArrayList<String>();
  for (int i=100000; i>=0; i--)
  {
    input.add(Integer.toString(i));
  }
  
  writeLinesToFile("input", input.toArray(new String[0]));
      
  test.runScript();
  
  List<Tuple> output = getLinesForAlias(test, "data_out", true);
  
  assertEquals(output.size(),1);
  assertEquals(output.get(0).toString(), "(130.0,2280.0,15870.0,50000.0,84130.0,97720.0,99870.0)");
}
 
Example 3
Source File: MacroTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void countTest() throws Exception
{
  PigTest test = createPigTestFromString(countTest);

  writeLinesToFile("input",
                   "A1\t1","A1\t4","A1\t4","A1\t4",
                   "A2\t4","A2\t4",
                   "A3\t3","A3\t1","A3\t77",
                   "A4\t3","A4\t3","A4\t59","A4\t29",
                   "A5\t4",
                   "A6\t3","A6\t55","A6\t1",
                   "A7\t39","A7\t27","A7\t85",
                   "A8\t4","A8\t45",
                   "A9\t92", "A9\t42","A9\t1","A9\t0",
                   "A10\t7","A10\t23","A10\t1","A10\t41","A10\t52");

  test.runScript();

  assertOutput(test, "cnt", "(31)");
}
 
Example 4
Source File: MarkovPairTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void markovPairDefaultTest() throws Exception
{
  PigTest test = createPigTestFromString(markovPairDefault,
                               "schema=(data: bag {t: tuple(val:int)})");
  
  writeLinesToFile("input", "{(10),(20),(30),(40),(50),(60)}");
  
  String[] expectedOutput = {
      "({((10),(20)),((20),(30)),((30),(40)),((40),(50)),((50),(60))})"
    };
  
  test.runScript();
  
  Iterator<Tuple> actualOutput = test.getAlias("data_out");
  
  assertTuplesMatch(expectedOutput, actualOutput);
}
 
Example 5
Source File: BagTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void bagSplitTest() throws Exception
{
  PigTest test = createPigTestFromString(bagSplitTest,
                               "MAX=5");

  writeLinesToFile("input",
                   "{(1,11),(2,22),(3,33),(4,44),(5,55),(6,66),(7,77),(8,88),(9,99),(10,1010),(11,1111),(12,1212)}");

  test.runScript();

  assertOutput(test, "data3",
               "({(1,11),(2,22),(3,33),(4,44),(5,55)})",
               "({(6,66),(7,77),(8,88),(9,99),(10,1010)})",
               "({(11,1111),(12,1212)})");
}
 
Example 6
Source File: CondEntropyTests.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void noOrderEmpiricalCondEntropoyTest() throws Exception
{
  PigTest test = createPigTestFromString(noOrderCondEntropy); 
  
  writeLinesToFile("input",
                   "98.94791	click",
                   "38.61010	view",
                   "97.10575	view",
                   "62.28313	click",
                   "38.83960	click",
                   "32.05370	view",
                   "96.10962	view",
                   "28.72388	click",
                   "96.65888	view",
                   "20.41135	click");

  try {
       test.runScript();
       List<Tuple> output = this.getLinesForAlias(test, "data_out");
       fail( "Testcase should fail");
  } catch(Exception ex) {}
}
 
Example 7
Source File: CondEntropyTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void dupValEmpiricalCondEntropoyTest() throws Exception
{
  PigTest test = createPigTestFromString(condEntropy); 
  
  writeLinesToFile("input",
                   "98.94791	click",
                   "38.61010	click",
                   "97.10575	view",
                   "62.28313	view",
                   "38.61010	view",
                   "32.05370	view",
                   "96.10962	click",
                   "38.61010	click",
                   "96.10962	view",
                   "20.41135	click");
      
  test.runScript();
 
  /*
   * library(infotheo)
   * X=c("98.94791","38.61010","97.10575","62.28313","38.61010","32.05370","96.10962","38.61010","96.10962","20.41135")
   * Y=c("click","click","view","view","view","view","click","click","view","click")
   * condentropy(Y,X)
   * [1] 0.3295837 
   */    
  List<Double> expectedOutput = new ArrayList<Double>();
  expectedOutput.add(0.3295837);
  
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  verifyEqualEntropyOutput(expectedOutput, output, 5);
}
 
Example 8
Source File: EntropyTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void uniqValEmpiricalEntropoyTest() throws Exception
{
  PigTest test = createPigTestFromString(entropy);
  
  writeLinesToFile("input",
                   "98.94791",
                   "38.61010",
                   "97.10575",
                   "62.28313",
                   "38.83960",
                   "32.05370",
                   "96.10962",
                   "28.72388",
                   "96.65888",
                   "20.41135");
      
  test.runScript();
  
  /* Add expected values, computed using R:
   * 
   * e.g.
   * 
   * > v=c(98.94791,38.61010,97.10575,62.28313,38.83960,32.05370,96.10962,28.72388,96.65888,20.41135) 
   * > table(v)
   * v
   * 20.41135 28.72388  32.0537  38.6101  38.8396 62.28313 96.10962 96.65888 97.10575 98.94791 
   * 1        1        1        1        1        1        1        1        1        1 
   * > count=c(1,1,1,1,1,1,1,1,1,1)
   * > library(entropy)
   * > entropy(count)
   * [1] 2.302585
   * 
   */
  List<Double> expectedOutput = new ArrayList<Double>();
  expectedOutput.add(2.302585);
  
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  verifyEqualEntropyOutput(expectedOutput, output, 5);
}
 
Example 9
Source File: EmpiricalCountEntropyTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void uniqValEntropyTest() throws Exception
{
  PigTest test = createPigTestFromString(entropy);
  
  writeLinesToFile("input",
                   "98.94791",
                   "38.61010",
                   "97.10575",
                   "62.28313",
                   "38.83960",
                   "32.05370",
                   "96.10962",
                   "28.72388",
                   "96.65888",
                   "20.41135");
      
  test.runScript();
  
  /* Add expected values, computed using R:
   * 
   * e.g.
   * 
   * > v=c(98.94791,38.61010,97.10575,62.28313,38.83960,32.05370,96.10962,28.72388,96.65888,20.41135) 
   * > table(v)
   * v
   * 20.41135 28.72388  32.0537  38.6101  38.8396 62.28313 96.10962 96.65888 97.10575 98.94791 
   * 1        1        1        1        1        1        1        1        1        1 
   * > count=c(1,1,1,1,1,1,1,1,1,1)
   * > library(entropy)
   * > entropy(count)
   * [1] 2.302585
   * 
   */
  List<Double> expectedOutput = new ArrayList<Double>();
  expectedOutput.add(2.302585);
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  verifyEqualEntropyOutput(expectedOutput, output, 5); 
}
 
Example 10
Source File: BagTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void distinctByMultiComplexFieldTest() throws Exception
{
  PigTest test = createPigTestFromString(distinctByMultiComplexFieldTest);

  writeLinesToFile("input",
                   "({(a-b,[b#1],{(a-b,0),(a-b,1)}),(a-c,[b#1],{(a-b,0),(a-b,1)}),(a-d,[b#0],{(a-b,1),(a-b,2)})})");

  test.runScript();

  assertOutput(test, "data2",
               "({(a-b,[b#1],{(a-b,0),(a-b,1)}),(a-d,[b#0],{(a-b,1),(a-b,2)})})");
}
 
Example 11
Source File: WeightedReservoirSamplingTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void invalidWeightFieldSchemaTest() throws Exception
{
  PigTest test = createPigTestFromString(invalidWeightFieldSchemaTest);

  writeLinesToFile("input",
              "a\t100","b\t1","c\t5","d\t2");
  try {
       test.runScript();
       List<Tuple> output = this.getLinesForAlias(test, "sampled");
       Assert.fail( "Testcase should fail");
  } catch (Exception ex) {
       Assert.assertTrue(ex.getMessage().indexOf("Expect the type of the weight field of the input tuple to be of ([int, long, float, double]), but instead found (chararray), weight field: 0") >= 0);
  }
}
 
Example 12
Source File: BagTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void distinctByDelimTest() throws Exception
{
  PigTest test = createPigTestFromString(distinctByDelimTest);

  writeLinesToFile("input",
                   "({(a-b,c),(a-b,d)})");

  test.runScript();

  assertOutput(test, "data2",
               "({(a-b,c),(a-b,d)})");
}
 
Example 13
Source File: LSHPigTest.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void testSparseVectors() throws IOException, ParseException
{
  RandomGenerator rg = new JDKRandomGenerator();
  rg.setSeed(0);
  RandomData rd = new RandomDataImpl(rg);
  int n = 20;
  List<RealVector> vectors = LSHTest.getVectors(rd, 1000, n);
  PigTest test = createPigTestFromString(sparseVectorTest);
  writeLinesToFile("input", getSparseLines(vectors));
  test.runScript();
  List<Tuple> neighbors = this.getLinesForAlias(test, "PTS");
  Assert.assertEquals(neighbors.size(), n);
  int idx = 0;
  for(Tuple t : neighbors)
  {
    Assert.assertTrue(t.get(0) instanceof DataBag);
    Assert.assertEquals(t.size(), 1);
    RealVector interpreted = DataTypeUtil.INSTANCE.convert(t, 3);
    RealVector original = vectors.get(idx);
    Assert.assertEquals(original.getDimension(), interpreted.getDimension());
    for(int i = 0;i < interpreted.getDimension();++i)
    {
      double originalField = original.getEntry(i);
      double interpretedField = interpreted.getEntry(i);
      Assert.assertTrue(Math.abs(originalField - interpretedField) < 1e-5);
    }

    idx++;
  }
}
 
Example 14
Source File: EmpiricalCountEntropyTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void singleValEntropyTest() throws Exception
{
  PigTest test = createPigTestFromString(entropy);
  
  writeLinesToFile("input",
                   "98.94791",
                   "98.94791",
                   "98.94791",
                   "98.94791",
                   "98.94791",
                   "98.94791",
                   "98.94791",
                   "98.94791",
                   "98.94791",
                   "98.94791");
      
  test.runScript();
  
  /* Add expected values, computed using R:
   * 
   * e.g.
   * 
   * > v=c(98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791,98.94791) 
   * > table(v)
   * v
   * 98.94791 
   * 10 
   * > count=(10)
   * > library(entropy)
   * > entropy(count)
   * [1] 0
   * 
   */
  List<Double> expectedOutput = new ArrayList<Double>();
  expectedOutput.add(0.0);
  
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  verifyEqualEntropyOutput(expectedOutput, output, 5); 
}
 
Example 15
Source File: ZipBagsTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = FrontendException.class)
public void duplicateAliasTest() throws Exception
{
    PigTest test = createPigTestFromString(duplicateAliasTest);
    writeLinesToFile("input", "{(1,2),(3,4),(5,6)}\t{(7,8),(9,10),(11,12)}");
    test.runScript();
}
 
Example 16
Source File: BagTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test
public void distinctByTest() throws Exception
{
  PigTest test = createPigTestFromString(distinctByTest);

  writeLinesToFile("input",
                   "({(Z,1,0),(A,1,0),(A,1,0),(B,2,0),(B,22,1),(C,3,0),(D,4,0),(E,5,0)})",
                   "({(A,10,2),(M,50,3),(A,34,49), (A,24,42), (Z,49,22),(B,1,1)},(B,2,2))");

  test.runScript();

  assertOutput(test, "data2",
               "({(Z,1,0),(A,1,0),(B,2,0),(C,3,0),(D,4,0),(E,5,0)})",
               "({(A,10,2),(M,50,3),(Z,49,22),(B,1,1)})");
}
 
Example 17
Source File: ChaoShenEntropyTests.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Test  
public void emptyInputBagChaoShenEntropoyTest() throws Exception
{
  PigTest test = createPigTestFromString(entropy, "type=chaosh", "base=log");
  
  writeLinesToFile("input"
                   );

  test.runScript();
  
  /* Add expected values, computed using R:
   * 
   * e.g.
   * 
   * > v=c() 
   * > table(v)
   * < table of extent 0 > 
   * > count=c()
   * > library(entropy)
   * > entropy(count,count/sum(count),c("CS"),c("log"))
   * [1] 0 
   * 
   */
  List<Double> expectedOutput = new ArrayList<Double>();
  
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  verifyEqualEntropyOutput(expectedOutput, output, 5);
}
 
Example 18
Source File: SamplingTests.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Test
public void sampleByKeyMultipleKeyTest() throws Exception
{
  PigTest test = createPigTestFromString(sampleByKeyMultipleKeyTest);

  writeLinesToFile("input",
                   "A1\tB1\t1","A1\tB1\t4",
                   "A1\tB3\t4",
                   "A1\tB4\t4",
                   "A2\tB1\t4",
                   "A2\tB2\t4",
                   "A3\tB1\t3","A3\tB1\t1",
                   "A3\tB3\t77",
                   "A4\tB1\t3",
                   "A4\tB2\t3",
                   "A4\tB3\t59",
                   "A4\tB4\t29",
                   "A5\tB1\t4",
                   "A6\tB2\t3","A6\tB2\t55",
                   "A6\tB3\t1",
                   "A7\tB1\t39",
                   "A7\tB2\t27",
                   "A7\tB3\t85",
                   "A8\tB1\t4",
                   "A8\tB2\t45",
                   "A9\tB3\t92","A9\tB3\t0",
                   "A9\tB6\t42","A9\tB5\t1",
                   "A10\tB1\t7",
                   "A10\tB2\t23","A10\tB2\t1","A10\tB2\t31",
                   "A10\tB6\t41",
                   "A10\tB7\t52");
  test.runScript();
  assertOutput(test, "sampled",
               "(A1,B1,1)","(A1,B1,4)",
               "(A1,B4,4)",
               "(A2,B1,4)",
               "(A2,B2,4)",
               "(A3,B1,3)","(A3,B1,1)",
               "(A4,B4,29)",
               "(A5,B1,4)",
               "(A6,B3,1)",
               "(A7,B1,39)",
               "(A8,B1,4)",
               "(A9,B3,92)","(A9,B3,0)",
               "(A10,B2,23)","(A10,B2,1)","(A10,B2,31)"
               );

}
 
Example 19
Source File: ChaoShenEntropyTests.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Test  
public void dupValChaoShenEntropoyLog2Test() throws Exception
{
  PigTest test = createPigTestFromString(entropy,"type=chaosh", "base=log2");
  
  writeLinesToFile("input",
                   "98.94791",
                   "38.61010",
                   "97.10575",
                   "62.28313",
                   "38.61010",
                   "32.05370",
                   "96.10962",
                   "38.61010",
                   "96.10962",
                   "20.41135");
      
  test.runScript();
  
  /* Add expected values, computed using R:
   * 
   * e.g.
   * 
   * > v=c(98.94791,38.61010,97.10575,62.28313,38.61010,32.05370,96.10962,38.61010,96.10962,20.41135) 
   * > table(v)
   * v
   * 20.41135 28.72388  32.0537  38.6101  38.8396 62.28313 96.10962 96.65888 97.10575 98.94791 
   * 1        1        3        1        2        1        1 
   * > count=c(1,1,3,1,2,1,1)
   * > freqs=count/sum(count)
   * > library(entropy)
   * > entropy(count,count/sum(count),c("CS"),c("log2"))
   * [1] 3.713915 
   * 
   */
  List<Double> expectedOutput = new ArrayList<Double>();
  expectedOutput.add(3.713915);
  
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  verifyEqualEntropyOutput(expectedOutput, output, 5);
}
 
Example 20
Source File: WilsonBinConfTests.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Test
public void wilsonTest() throws Exception
{
  PigTest test = createPigTestFromString(wilsonBinConf,
                               "alpha=0.05"); // alpha is 0.05 for 95% confidence
  
  writeLinesToFile("input",
                   "1\t1",
                   "1\t2",
                   "50\t100",
                   "500\t1000",
                   "999\t1000",
                   "1000\t1000",
                   "998\t1000");
      
  test.runScript();
  
  /* Add expected values, computed using R:
   * 
   * e.g.
   * 
   * library(Hmisc)
   * 
   * binconf(50,100)
   * binconf(500,1000)
   * 
   */
  List<String> expectedOutput = new ArrayList<String>();
  expectedOutput.add("0.05129,1.00000");
  expectedOutput.add("0.02565,0.97435");
  expectedOutput.add("0.40383,0.59617");
  expectedOutput.add("0.46907,0.53093");
  expectedOutput.add("0.99436,0.99995");
  expectedOutput.add("0.99617,1.00000");
  expectedOutput.add("0.99274,0.99945");
  
  List<Tuple> output = this.getLinesForAlias(test, "data_out");
  Iterator<String> expectationIterator = expectedOutput.iterator();
  for (Tuple t : output)
  {
    assertTrue(expectationIterator.hasNext());
    Double lower = (Double)t.get(0);
    Double upper = (Double)t.get(1);
    assertEquals(String.format("%.5f,%.5f",lower,upper),expectationIterator.next());
  }
}