Java Code Examples for org.apache.pig.data.Tuple#getMemorySize()

The following examples show how to use org.apache.pig.data.Tuple#getMemorySize() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GetMemNumRows.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * @param  in - input tuple
 * @return - tuple having size in memory of this tuple and numRows if this
 * is specially marked tuple having number of rows field 
 */    
public Tuple exec(Tuple in) throws IOException {
	if (in == null) {
	    return null;
	}
	long memSize = in.getMemorySize();
	long numRows = 0;

	
	//  if this is specially marked tuple, get the number of rows
    int tSize = in.size();
	if(tSize >=2 && 
	    PoissonSampleLoader.NUMROWS_TUPLE_MARKER.equals(in.get(tSize-2)) ){
	    numRows = (Long)in.get(tSize-1);
	}
	
	//create tuple to be returned
	Tuple t = factory.newTuple(2);
	t.set(0, memSize);
	t.set(1, numRows);
	return t;
}
 
Example 2
Source File: PoissonSampleLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Update the average tuple size base on newly sampled tuple t
 * and recalculate skipInterval
 * @param t - tuple
 */
private void updateSkipInterval(Tuple t) {
    avgTupleMemSz =
        ((avgTupleMemSz*numRowsSampled) + t.getMemorySize())/(numRowsSampled + 1);
    skipInterval = memToSkipPerSample/avgTupleMemSz;

    // skipping fewer number of rows the first few times, to reduce the
    // probability of first tuples size (if much smaller than rest)
    // resulting in very few samples being sampled. Sampling a little extra
    // is OK
    if(numRowsSampled < 5)
        skipInterval = skipInterval/(10-numRowsSampled);
    ++numRowsSampled;

}
 
Example 3
Source File: ExecutableManager.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
    try {
        // Read tuples from the executable and send it to
        // Queue of POStream
        Tuple tuple = null;
        while ((tuple = outputHandler.getNext()) != null) {
            processOutput(tuple);
            outputBytes += tuple.getMemorySize();
        }
        // output from binary is done
        processOutput(null);
        outputHandler.close();
    } catch (Throwable t) {
        // Note that an error occurred
        outerrThreadsError = t;
        LOG.error("Caught Exception in OutputHandler of Streaming binary, " +
                "sending error signal to pipeline", t);
        // send ERROR to POStream
        try {
            Result res = new Result();
            res.result = "Error reading output from Streaming binary:" +
                    "'" + command.toString() + "':" + t.getMessage();
            res.returnStatus = POStatus.STATUS_ERR;
            sendOutput(binaryOutputQueue, res);
            killProcess(process);
        } catch (Exception e) {
            LOG.error("Error while trying to signal Error status to pipeline", e);
        }
    }
}
 
Example 4
Source File: POPoissonSample.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Update the average tuple size base on newly sampled tuple t
 * and recalculate skipInterval
 * @param t - tuple
 */
private void updateSkipInterval(Tuple t) {
    avgTupleMemSz =
        ((avgTupleMemSz*numRowsSampled) + t.getMemorySize())/(numRowsSampled + 1);
    skipInterval = memToSkipPerSample/avgTupleMemSz;

    // skipping fewer number of rows the first few times, to reduce the
    // probability of first tuples size (if much smaller than rest)
    // resulting in very few samples being sampled. Sampling a little extra
    // is OK
    if(numRowsSampled < 5) {
        skipInterval = skipInterval/(10-numRowsSampled);
    }
    ++numRowsSampled;
}
 
Example 5
Source File: POPartialAgg.java    From spork with Apache License 2.0 5 votes vote down vote up
private void estimateMemThresholds() {
    if (!mapAggDisabled()) {
        LOG.info("Getting mem limits; considering " + ALL_POPARTS.size()
                + " POPArtialAgg objects." + " with memory percentage "
                + percentUsage);
        MemoryLimits memLimits = new MemoryLimits(ALL_POPARTS.size(), percentUsage);
        int estTotalMem = 0;
        int estTuples = 0;
        for (Map.Entry<Object, List<Tuple>> entry : rawInputMap.entrySet()) {
            for (Tuple t : entry.getValue()) {
                estTuples += 1;
                int mem = (int) t.getMemorySize();
                estTotalMem += mem;
                memLimits.addNewObjSize(mem);
            }
        }
        avgTupleSize = estTotalMem / estTuples;
        long totalTuples = memLimits.getCacheLimit();
        LOG.info("Estimated total tuples to buffer, based on " + estTuples + " tuples that took up " + estTotalMem + " bytes: " + totalTuples);
        firstTierThreshold = (int) (0.5 + totalTuples * (1f - (1f / sizeReduction)));
        secondTierThreshold = (int) (0.5 + totalTuples *  (1f / sizeReduction));
        LOG.info("Setting thresholds. Primary: " + firstTierThreshold + ". Secondary: " + secondTierThreshold);
        // The second tier should at least allow one tuple before it tries to aggregate.
        // This code retains the total number of tuples in the buffer while guaranteeing
        // the second tier has at least one tuple.
        if (secondTierThreshold == 0) {
            secondTierThreshold += 1;
            firstTierThreshold -= 1;
        }
    }
    estimatedMemThresholds = true;
}
 
Example 6
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
// See PIG-1443
public void testTupleSizeWithString() {
    Tuple t = Util.createTuple(new String[] {"1234567", "bar"});
    long size = t.getMemorySize();
    assertEquals("tuple size", 192, size);
}
 
Example 7
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleSizeWithByteArrays() {
    Tuple t = mTupleFactory.newTuple();
    t.append(new DataByteArray("1234567"));
    t.append(new DataByteArray("bar"));
    long size = t.getMemorySize();
    assertEquals("tuple size", 160, size);
}
 
Example 8
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleSizeWithDoubles() {
    Tuple t = mTupleFactory.newTuple();
    t.append(new Double(0.1));
    t.append(new Double(2000.10001));
    long size = t.getMemorySize();
    assertEquals("tuple size", 120, size);
}
 
Example 9
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleSizeWithFloats() {
    Tuple t = mTupleFactory.newTuple();
    t.append(new Float(0.1F));
    t.append(new Float(2000.10001F));
    long size = t.getMemorySize();
    assertEquals("tuple size", 120, size);
}
 
Example 10
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleSizeWithLongs() {
    Tuple t = mTupleFactory.newTuple();
    t.append(new Long(100));
    t.append(new Long(2000));
    long size = t.getMemorySize();
    assertEquals("tuple size", 120, size);
}
 
Example 11
Source File: TestTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTupleSizeWithBooleans() {
    Tuple t = mTupleFactory.newTuple();
    t.append(new Boolean(true));
    t.append(new Boolean(false));
    long size = t.getMemorySize();
    assertEquals("tuple size", 120, size);
}
 
Example 12
Source File: TestTuple.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testEmptyTupleSize() {
    Tuple t = mTupleFactory.newTuple();
    long size = t.getMemorySize();
    assertEquals("tuple size", 88, size);
}