Java Code Examples for org.apache.pig.PigServer#registerCode()

The following examples show how to use org.apache.pig.PigServer#registerCode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUDFGroovy.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testEvalFunc_NonStatic() throws Exception {
  String[] groovyStatements = {
      "import org.apache.pig.builtin.OutputSchema;",
      "class GroovyUDF {",
      "  private final long multiplicator;",
      "  public GroovyUDF() {",
      "    this.multiplicator = 42L;",
      "  }",
      "  @OutputSchema('x:long')",
      "  long mul(long x) {",
      "    return x*this.multiplicator;",
      "  }",
      "}"
  };

  File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
  tmpScriptFile.deleteOnExit();
  FileWriter writer = new FileWriter(tmpScriptFile);
  for (String line : groovyStatements) {
    writer.write(line + "\n");
  }
  writer.close();

  PigServer pigServer = new PigServer(ExecType.LOCAL);

  pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");

  Data data = resetData(pigServer);
  data.set("foo1",
      tuple(1)
      );

  pigServer.registerQuery("A = LOAD 'foo1' USING mock.Storage();");
  pigServer.registerQuery("B = FOREACH A GENERATE groovyudfs.mul($0);");
  pigServer.registerQuery("STORE B INTO 'bar1' USING mock.Storage();");

  List<Tuple> out = data.get("bar1");
  assertEquals(tuple(42L), out.get(0));
}
 
Example 2
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegisterRemoteScript() throws Throwable {
    String scriptName = "script.py";
    File scriptFile = File.createTempFile("tmp", "");
    PrintWriter pw = new PrintWriter(new FileWriter(scriptFile));
    pw.println("@outputSchema(\"word:chararray\")\ndef helloworld():\n    return 'Hello, World'");
    pw.close();

    FileSystem fs = cluster.getFileSystem();
    fs.copyFromLocalFile(new Path(scriptFile.getAbsolutePath()), new Path(scriptName));

    // find the absolute path for the directory so that it does not
    // depend on configuration
    String absPath = fs.getFileStatus(new Path(scriptName)).getPath().toString();

    Util.createInputFile(cluster, "testRegisterRemoteScript_input", new String[]{"1", "2"});
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerCode(absPath, "jython", "pig");
    pig.registerQuery("a = load 'testRegisterRemoteScript_input';");
    pig.registerQuery("b = foreach a generate pig.helloworld($0);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertTrue(iter.hasNext());
    Tuple t = iter.next();
    assertTrue(t.size() > 0);
    assertEquals("Hello, World", t.get(0));

    assertTrue(iter.hasNext());
    t = iter.next();
    assertTrue(t.size() > 0);
    assertEquals("Hello, World", t.get(0));

    assertFalse(iter.hasNext());
}
 
Example 3
Source File: TestUDFGroovy.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testEvalFunc_Static() throws Exception {
  String[] groovyStatements = {
      "import org.apache.pig.builtin.OutputSchema;",
      "class GroovyUDF {",
      "  @OutputSchema('x:long')",
      "  static long square(long x) {",
      "    return x*x;",
      "  }",
      "}"
  };

  File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
  tmpScriptFile.deleteOnExit();
  FileWriter writer = new FileWriter(tmpScriptFile);
  for (String line : groovyStatements) {
    writer.write(line + "\n");
  }
  writer.close();

  PigServer pigServer = new PigServer(ExecType.LOCAL);

  pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");

  Data data = resetData(pigServer);
  data.set("foo0",
      tuple(1),
      tuple(2),
      tuple(3),
      tuple(4)
      );

  pigServer.registerQuery("A = LOAD 'foo0' USING mock.Storage();");
  pigServer.registerQuery("B = FOREACH A GENERATE groovyudfs.square($0);");
  pigServer.registerQuery("STORE B INTO 'bar0' USING mock.Storage();");

  List<Tuple> out = data.get("bar0");
  assertEquals(tuple(1L), out.get(0));
  assertEquals(tuple(4L), out.get(1));
  assertEquals(tuple(9L), out.get(2));
  assertEquals(tuple(16L), out.get(3));
}
 
Example 4
Source File: TestUDFGroovy.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testAlgebraicEvalFunc() throws Exception {
  String[] groovyStatements = {
      "import org.apache.pig.scripting.groovy.AlgebraicInitial;",
      "import org.apache.pig.scripting.groovy.AlgebraicIntermed;",
      "import org.apache.pig.scripting.groovy.AlgebraicFinal;",
      "class GroovyUDFs {",
      "  @AlgebraicFinal('sumalg')",
      "  public static long algFinal(Tuple t) {",
      "    long x = 0;",
      "    for (Object o: t[1]) {",
      "      x = x + o;",
      "    }",
      "    return x;",
      "  }",
      "  @AlgebraicInitial('sumalg')",
      "  public static Tuple algInitial(Tuple t) {",
      "    long x = 0;",
      "    for (Object o: t[1]) {",
      "      x = x + o[0];",
      "    }",
      "    return [x];",
      "  }",
      "  @AlgebraicIntermed('sumalg')",
      "  public static Tuple algIntermed(Tuple t) {",
      "    long x = 0;",
      "    for (Object o: t[1]) {",
      "      x = x + o;",
      "    }",
      "    return [x];",
      "  }",
      "}"
  };

  File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
  tmpScriptFile.deleteOnExit();
  FileWriter writer = new FileWriter(tmpScriptFile);
  for (String line : groovyStatements) {
    writer.write(line + "\n");
  }
  writer.close();

  PigServer pigServer = new PigServer(ExecType.LOCAL);

  pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");

  Data data = resetData(pigServer);
  data.set("foo2",
      tuple(1),
      tuple(2),
      tuple(3),
      tuple(4)
      );

  pigServer.registerQuery("A = LOAD 'foo2' USING mock.Storage();");
  pigServer.registerQuery("B = GROUP A ALL;");
  pigServer.registerQuery("C = FOREACH B GENERATE groovyudfs.sumalg(A);");
  pigServer.registerQuery("STORE C INTO 'bar2' USING mock.Storage();");

  List<Tuple> out = data.get("bar2");
  assertEquals(tuple(10L), out.get(0));
}
 
Example 5
Source File: TestUDFGroovy.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testAccumulatorEvalFunc() throws Exception {
  String[] groovyStatements = {
      "import org.apache.pig.builtin.OutputSchema;",
      "import org.apache.pig.scripting.groovy.AccumulatorAccumulate;",
      "import org.apache.pig.scripting.groovy.AccumulatorGetValue;",
      "import org.apache.pig.scripting.groovy.AccumulatorCleanup;",
      "class GroovyUDFs {",
      "  private int sum = 0;",
      "  @AccumulatorAccumulate('sumacc')",
      "  public void accuAccumulate(Tuple t) {",
      "    for (Object o: t[1]) {",
      "      sum += o[0]",
      "    }",
      "  }",
      "  @AccumulatorGetValue('sumacc')",
      "  @OutputSchema('sum: long')",
      "  public long accuGetValue() {",
      "    return this.sum;",
      "  }",
      "  @AccumulatorCleanup('sumacc')",
      "  public void accuCleanup() {",
      "    this.sum = 0L;",
      "  }",
      "}"
  };

  File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
  tmpScriptFile.deleteOnExit();
  FileWriter writer = new FileWriter(tmpScriptFile);
  for (String line : groovyStatements) {
    writer.write(line + "\n");
  }
  writer.close();

  PigServer pigServer = new PigServer(ExecType.LOCAL);

  pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");

  Data data = resetData(pigServer);
  data.set("foo3",
      tuple(1),
      tuple(2),
      tuple(3),
      tuple(4)
      );

  pigServer.registerQuery("A = LOAD 'foo3' USING mock.Storage();");
  pigServer.registerQuery("B = GROUP A ALL;");
  pigServer.registerQuery("C = FOREACH B GENERATE groovyudfs.sumacc(A) AS sum1,groovyudfs.sumacc(A) AS sum2;");
  pigServer.registerQuery("STORE C INTO 'bar3' USING mock.Storage();");

  List<Tuple> out = data.get("bar3");
  assertEquals(tuple(10L,10L), out.get(0));
}
 
Example 6
Source File: TestUDFGroovy.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testOutputSchemaFunction() throws Exception {
  String[] groovyStatements = {
      "import org.apache.pig.scripting.groovy.OutputSchemaFunction;",
      "class GroovyUDFs {",
      "  @OutputSchemaFunction('squareSchema')",
      "  public static square(x) {",
      "    return x * x;",
      "  }",
      "  public static squareSchema(input) {",
      "    return input;",
      "  }",
      "}"
  };

  File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
  tmpScriptFile.deleteOnExit();
  FileWriter writer = new FileWriter(tmpScriptFile);
  for (String line : groovyStatements) {
    writer.write(line + "\n");
  }
  writer.close();

  PigServer pigServer = new PigServer(ExecType.LOCAL);

  pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");

  Data data = resetData(pigServer);
  data.set("foo4",
      tuple(1,1L,1.0F,1.0D),
      tuple(2,2L,2.0F,2.0D)
      );

  pigServer.registerQuery("A = LOAD 'foo4' USING mock.Storage() AS (i: int, l: long, f: float, d: double);");
  pigServer.registerQuery("B = FOREACH A GENERATE groovyudfs.square(i),groovyudfs.square(l),groovyudfs.square(f),groovyudfs.square(d);");
  pigServer.registerQuery("STORE B INTO 'bar4' USING mock.Storage();");

  List<Tuple> out = data.get("bar4");
  // Multiplying two floats leads to a double in Groovy, this is reflected here.
  assertEquals(tuple(1,1L,1.0D,1.0D), out.get(0));
  assertEquals(tuple(4,4L,4.0D,4.0D), out.get(1));
}