Java Code Examples for org.apache.hadoop.hdfs.inotify.EventBatch

The following examples show how to use org.apache.hadoop.hdfs.inotify.EventBatch. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: localization_nifi   Source File: GetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
private EventBatch getEventBatch(DFSInotifyEventInputStream eventStream, long duration, TimeUnit timeUnit, int retries) throws IOException, InterruptedException, MissingEventsException {
    // According to the inotify API we should retry a few times if poll throws an IOException.
    // Please see org.apache.hadoop.hdfs.DFSInotifyEventInputStream#poll for documentation.
    int i = 0;
    while (true) {
        try {
            i += 1;
            return eventStream.poll(duration, timeUnit);
        } catch (IOException e) {
            if (i > retries) {
                getLogger().debug("Failed to poll for event batch. Reached max retry times.", e);
                throw e;
            } else {
                getLogger().debug("Attempt {} failed to poll for event batch. Retrying.", new Object[]{i});
            }
        }
    }
}
 
Example 2
Source Project: localization_nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void onTriggerShouldProperlyHandleAnEmptyEventBatch() throws Exception {
    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(new Event[]{});

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path");
    runner.setProperty(GetHDFSEvents.NUMBER_OF_RETRIES_FOR_POLL, "5");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(0, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 3
Source Project: localization_nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(3, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 4
Source Project: localization_nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void onTriggerShouldOnlyProcessEventsWithSpecificPath() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/create(/)?");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(1, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 5
Source Project: hadoop   Source File: DFSInotifyEventInputStream.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns the next batch of events in the stream, waiting indefinitely if
 * a new batch  is not immediately available.
 *
 * @throws IOException see {@link DFSInotifyEventInputStream#poll()}
 * @throws MissingEventsException see
 * {@link DFSInotifyEventInputStream#poll()}
 * @throws InterruptedException if the calling thread is interrupted
 */
public EventBatch take() throws IOException, InterruptedException,
    MissingEventsException {
  TraceScope scope = Trace.startSpan("inotifyTake", traceSampler);
  EventBatch next = null;
  try {
    int nextWaitMin = INITIAL_WAIT_MS;
    while ((next = poll()) == null) {
      // sleep for a random period between nextWaitMin and nextWaitMin * 2
      // to avoid stampedes at the NN if there are multiple clients
      int sleepTime = nextWaitMin + rng.nextInt(nextWaitMin);
      LOG.debug("take(): poll() returned null, sleeping for {} ms", sleepTime);
      Thread.sleep(sleepTime);
      // the maximum sleep is 2 minutes
      nextWaitMin = Math.min(60000, nextWaitMin * 2);
    }
  } finally {
    scope.close();
  }

  return next;
}
 
Example 6
Source Project: big-c   Source File: DFSInotifyEventInputStream.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns the next batch of events in the stream, waiting indefinitely if
 * a new batch  is not immediately available.
 *
 * @throws IOException see {@link DFSInotifyEventInputStream#poll()}
 * @throws MissingEventsException see
 * {@link DFSInotifyEventInputStream#poll()}
 * @throws InterruptedException if the calling thread is interrupted
 */
public EventBatch take() throws IOException, InterruptedException,
    MissingEventsException {
  TraceScope scope = Trace.startSpan("inotifyTake", traceSampler);
  EventBatch next = null;
  try {
    int nextWaitMin = INITIAL_WAIT_MS;
    while ((next = poll()) == null) {
      // sleep for a random period between nextWaitMin and nextWaitMin * 2
      // to avoid stampedes at the NN if there are multiple clients
      int sleepTime = nextWaitMin + rng.nextInt(nextWaitMin);
      LOG.debug("take(): poll() returned null, sleeping for {} ms", sleepTime);
      Thread.sleep(sleepTime);
      // the maximum sleep is 2 minutes
      nextWaitMin = Math.min(60000, nextWaitMin * 2);
    }
  } finally {
    scope.close();
  }

  return next;
}
 
Example 7
Source Project: nifi   Source File: GetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
private EventBatch getEventBatch(DFSInotifyEventInputStream eventStream, long duration, TimeUnit timeUnit, int retries) throws IOException, InterruptedException, MissingEventsException {
    // According to the inotify API we should retry a few times if poll throws an IOException.
    // Please see org.apache.hadoop.hdfs.DFSInotifyEventInputStream#poll for documentation.
    int i = 0;
    while (true) {
        try {
            i += 1;
            return eventStream.poll(duration, timeUnit);
        } catch (IOException e) {
            if (i > retries) {
                getLogger().debug("Failed to poll for event batch. Reached max retry times.", e);
                throw e;
            } else {
                getLogger().debug("Attempt {} failed to poll for event batch. Retrying.", new Object[]{i});
            }
        }
    }
}
 
Example 8
Source Project: nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void onTriggerShouldProperlyHandleAnEmptyEventBatch() throws Exception {
    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(new Event[]{});

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path");
    runner.setProperty(GetHDFSEvents.NUMBER_OF_RETRIES_FOR_POLL, "5");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(0, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 9
Source Project: nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(3, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 10
Source Project: nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void onTriggerShouldOnlyProcessEventsWithSpecificPath() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/create(/)?");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(1, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 11
Source Project: localization_nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void eventsProcessorShouldProperlyFilterEventTypes() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/.*)?");
    runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create, metadata");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(2, successfulFlowFiles.size());

    List<String> expectedEventTypes = Arrays.asList("CREATE", "METADATA");
    for (MockFlowFile f : successfulFlowFiles) {
        String eventType = f.getAttribute(EventAttributes.EVENT_TYPE);
        assertTrue(expectedEventTypes.contains(eventType));
    }

    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 12
Source Project: localization_nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void makeSureExpressionLanguageIsWorkingProperlyWithinTheHdfsPathToWatch() throws Exception {
    Event[] events = new Event[] {
            new Event.CreateEvent.Builder().path("/some/path/1/2/3/t.txt").build(),
            new Event.CreateEvent.Builder().path("/some/path/1/2/4/t.txt").build(),
            new Event.CreateEvent.Builder().path("/some/path/1/2/3/.t.txt").build()
    };

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/${literal(1)}/${literal(2)}/${literal(3)}/.*.txt");
    runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create");
    runner.setProperty(GetHDFSEvents.IGNORE_HIDDEN_FILES, "true");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(1, successfulFlowFiles.size());

    for (MockFlowFile f : successfulFlowFiles) {
        String eventType = f.getAttribute(EventAttributes.EVENT_TYPE);
        assertTrue(eventType.equals("CREATE"));
    }

    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 13
Source Project: hadoop   Source File: DFSInotifyEventInputStream.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns the next event batch in the stream, waiting up to the specified
 * amount of time for a new batch. Returns null if one is not available at the
 * end of the specified amount of time. The time before the method returns may
 * exceed the specified amount of time by up to the time required for an RPC
 * to the NameNode.
 *
 * @param time number of units of the given TimeUnit to wait
 * @param tu the desired TimeUnit
 * @throws IOException see {@link DFSInotifyEventInputStream#poll()}
 * @throws MissingEventsException
 * see {@link DFSInotifyEventInputStream#poll()}
 * @throws InterruptedException if the calling thread is interrupted
 */
public EventBatch poll(long time, TimeUnit tu) throws IOException,
    InterruptedException, MissingEventsException {
  TraceScope scope = Trace.startSpan("inotifyPollWithTimeout", traceSampler);
  EventBatch next = null;
  try {
    long initialTime = Time.monotonicNow();
    long totalWait = TimeUnit.MILLISECONDS.convert(time, tu);
    long nextWait = INITIAL_WAIT_MS;
    while ((next = poll()) == null) {
      long timeLeft = totalWait - (Time.monotonicNow() - initialTime);
      if (timeLeft <= 0) {
        LOG.debug("timed poll(): timed out");
        break;
      } else if (timeLeft < nextWait * 2) {
        nextWait = timeLeft;
      } else {
        nextWait *= 2;
      }
      LOG.debug("timed poll(): poll() returned null, sleeping for {} ms",
          nextWait);
      Thread.sleep(nextWait);
    }
  } finally {
    scope.close();
  }
  return next;
}
 
Example 14
Source Project: hadoop   Source File: TestDFSInotifyEventInputStream.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120000)
public void testNNFailover() throws IOException, URISyntaxException,
    MissingEventsException {
  Configuration conf = new HdfsConfiguration();
  MiniQJMHACluster cluster = new MiniQJMHACluster.Builder(conf).build();

  try {
    cluster.getDfsCluster().waitActive();
    cluster.getDfsCluster().transitionToActive(0);
    DFSClient client = ((DistributedFileSystem) HATestUtil.configureFailoverFs
        (cluster.getDfsCluster(), conf)).dfs;
    DFSInotifyEventInputStream eis = client.getInotifyEventStream();
    for (int i = 0; i < 10; i++) {
      client.mkdirs("/dir" + i, null, false);
    }
    cluster.getDfsCluster().shutdownNameNode(0);
    cluster.getDfsCluster().transitionToActive(1);
    EventBatch batch = null;
    // we can read all of the edits logged by the old active from the new
    // active
    for (int i = 0; i < 10; i++) {
      batch = waitForNextEvents(eis);
      Assert.assertEquals(1, batch.getEvents().length);
      Assert.assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
      Assert.assertTrue(((Event.CreateEvent) batch.getEvents()[0]).getPath().equals("/dir" +
          i));
    }
    Assert.assertTrue(eis.poll() == null);
  } finally {
    cluster.shutdown();
  }
}
 
Example 15
Source Project: hadoop   Source File: TestDFSInotifyEventInputStream.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120000)
public void testReadEventsWithTimeout() throws IOException,
    InterruptedException, MissingEventsException {
  Configuration conf = new HdfsConfiguration();
  MiniQJMHACluster cluster = new MiniQJMHACluster.Builder(conf).build();

  try {
    cluster.getDfsCluster().waitActive();
    cluster.getDfsCluster().transitionToActive(0);
    final DFSClient client = new DFSClient(cluster.getDfsCluster()
        .getNameNode(0).getNameNodeAddress(), conf);
    DFSInotifyEventInputStream eis = client.getInotifyEventStream();
    ScheduledExecutorService ex = Executors
        .newSingleThreadScheduledExecutor();
    ex.schedule(new Runnable() {
      @Override
      public void run() {
        try {
          client.mkdirs("/dir", null, false);
        } catch (IOException e) {
          // test will fail
          LOG.error("Unable to create /dir", e);
        }
      }
    }, 1, TimeUnit.SECONDS);
    // a very generous wait period -- the edit will definitely have been
    // processed by the time this is up
    EventBatch batch = eis.poll(5, TimeUnit.SECONDS);
    Assert.assertNotNull(batch);
    Assert.assertEquals(1, batch.getEvents().length);
    Assert.assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
    Assert.assertEquals("/dir", ((Event.CreateEvent) batch.getEvents()[0]).getPath());
  } finally {
    cluster.shutdown();
  }
}
 
Example 16
Source Project: big-c   Source File: DFSInotifyEventInputStream.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns the next event batch in the stream, waiting up to the specified
 * amount of time for a new batch. Returns null if one is not available at the
 * end of the specified amount of time. The time before the method returns may
 * exceed the specified amount of time by up to the time required for an RPC
 * to the NameNode.
 *
 * @param time number of units of the given TimeUnit to wait
 * @param tu the desired TimeUnit
 * @throws IOException see {@link DFSInotifyEventInputStream#poll()}
 * @throws MissingEventsException
 * see {@link DFSInotifyEventInputStream#poll()}
 * @throws InterruptedException if the calling thread is interrupted
 */
public EventBatch poll(long time, TimeUnit tu) throws IOException,
    InterruptedException, MissingEventsException {
  TraceScope scope = Trace.startSpan("inotifyPollWithTimeout", traceSampler);
  EventBatch next = null;
  try {
    long initialTime = Time.monotonicNow();
    long totalWait = TimeUnit.MILLISECONDS.convert(time, tu);
    long nextWait = INITIAL_WAIT_MS;
    while ((next = poll()) == null) {
      long timeLeft = totalWait - (Time.monotonicNow() - initialTime);
      if (timeLeft <= 0) {
        LOG.debug("timed poll(): timed out");
        break;
      } else if (timeLeft < nextWait * 2) {
        nextWait = timeLeft;
      } else {
        nextWait *= 2;
      }
      LOG.debug("timed poll(): poll() returned null, sleeping for {} ms",
          nextWait);
      Thread.sleep(nextWait);
    }
  } finally {
    scope.close();
  }
  return next;
}
 
Example 17
Source Project: big-c   Source File: TestDFSInotifyEventInputStream.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120000)
public void testNNFailover() throws IOException, URISyntaxException,
    MissingEventsException {
  Configuration conf = new HdfsConfiguration();
  MiniQJMHACluster cluster = new MiniQJMHACluster.Builder(conf).build();

  try {
    cluster.getDfsCluster().waitActive();
    cluster.getDfsCluster().transitionToActive(0);
    DFSClient client = ((DistributedFileSystem) HATestUtil.configureFailoverFs
        (cluster.getDfsCluster(), conf)).dfs;
    DFSInotifyEventInputStream eis = client.getInotifyEventStream();
    for (int i = 0; i < 10; i++) {
      client.mkdirs("/dir" + i, null, false);
    }
    cluster.getDfsCluster().shutdownNameNode(0);
    cluster.getDfsCluster().transitionToActive(1);
    EventBatch batch = null;
    // we can read all of the edits logged by the old active from the new
    // active
    for (int i = 0; i < 10; i++) {
      batch = waitForNextEvents(eis);
      Assert.assertEquals(1, batch.getEvents().length);
      Assert.assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
      Assert.assertTrue(((Event.CreateEvent) batch.getEvents()[0]).getPath().equals("/dir" +
          i));
    }
    Assert.assertTrue(eis.poll() == null);
  } finally {
    cluster.shutdown();
  }
}
 
Example 18
Source Project: big-c   Source File: TestDFSInotifyEventInputStream.java    License: Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120000)
public void testReadEventsWithTimeout() throws IOException,
    InterruptedException, MissingEventsException {
  Configuration conf = new HdfsConfiguration();
  MiniQJMHACluster cluster = new MiniQJMHACluster.Builder(conf).build();

  try {
    cluster.getDfsCluster().waitActive();
    cluster.getDfsCluster().transitionToActive(0);
    final DFSClient client = new DFSClient(cluster.getDfsCluster()
        .getNameNode(0).getNameNodeAddress(), conf);
    DFSInotifyEventInputStream eis = client.getInotifyEventStream();
    ScheduledExecutorService ex = Executors
        .newSingleThreadScheduledExecutor();
    ex.schedule(new Runnable() {
      @Override
      public void run() {
        try {
          client.mkdirs("/dir", null, false);
        } catch (IOException e) {
          // test will fail
          LOG.error("Unable to create /dir", e);
        }
      }
    }, 1, TimeUnit.SECONDS);
    // a very generous wait period -- the edit will definitely have been
    // processed by the time this is up
    EventBatch batch = eis.poll(5, TimeUnit.SECONDS);
    Assert.assertNotNull(batch);
    Assert.assertEquals(1, batch.getEvents().length);
    Assert.assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
    Assert.assertEquals("/dir", ((Event.CreateEvent) batch.getEvents()[0]).getPath());
  } finally {
    cluster.shutdown();
  }
}
 
Example 19
Source Project: nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void eventsProcessorShouldProperlyFilterEventTypes() throws Exception {
    Event[] events = getEvents();

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/.*)?");
    runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create, metadata");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(2, successfulFlowFiles.size());

    List<String> expectedEventTypes = Arrays.asList("CREATE", "METADATA");
    for (MockFlowFile f : successfulFlowFiles) {
        String eventType = f.getAttribute(EventAttributes.EVENT_TYPE);
        assertTrue(expectedEventTypes.contains(eventType));
    }

    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 20
Source Project: nifi   Source File: TestGetHDFSEvents.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void makeSureExpressionLanguageIsWorkingProperlyWithinTheHdfsPathToWatch() throws Exception {
    Event[] events = new Event[] {
            new Event.CreateEvent.Builder().path("/some/path/1/2/3/t.txt").build(),
            new Event.CreateEvent.Builder().path("/some/path/1/2/4/t.txt").build(),
            new Event.CreateEvent.Builder().path("/some/path/1/2/3/.t.txt").build()
    };

    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);

    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);

    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);

    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/${literal(1)}/${literal(2)}/${literal(3)}/.*.txt");
    runner.setProperty(GetHDFSEvents.EVENT_TYPES, "create");
    runner.setProperty(GetHDFSEvents.IGNORE_HIDDEN_FILES, "true");
    runner.run();

    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(1, successfulFlowFiles.size());

    for (MockFlowFile f : successfulFlowFiles) {
        String eventType = f.getAttribute(EventAttributes.EVENT_TYPE);
        assertTrue(eventType.equals("CREATE"));
    }

    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
 
Example 21
Source Project: kafka-connect-fs   Source File: HdfsFileWatcherPolicy.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void run() {
    while (true) {
        try {
            DFSInotifyEventInputStream eventStream = admin.getInotifyEventStream();
            if (fs.getFileStatus(fs.getWorkingDirectory()) != null &&
                    fs.exists(fs.getWorkingDirectory())) {
                EventBatch batch = eventStream.poll();
                if (batch == null) continue;

                for (Event event : batch.getEvents()) {
                    switch (event.getEventType()) {
                        case CREATE:
                            if (!((Event.CreateEvent) event).getPath().endsWith("._COPYING_")) {
                                enqueue(((Event.CreateEvent) event).getPath());
                            }
                            break;
                        case APPEND:
                            if (!((Event.AppendEvent) event).getPath().endsWith("._COPYING_")) {
                                enqueue(((Event.AppendEvent) event).getPath());
                            }
                            break;
                        case RENAME:
                            if (((Event.RenameEvent) event).getSrcPath().endsWith("._COPYING_")) {
                                enqueue(((Event.RenameEvent) event).getDstPath());
                            }
                            break;
                        case CLOSE:
                            if (!((Event.CloseEvent) event).getPath().endsWith("._COPYING_")) {
                                enqueue(((Event.CloseEvent) event).getPath());
                            }
                            break;
                        default:
                            break;
                    }
                }
            }
        } catch (IOException ioe) {
            if (retrySleepMs > 0) {
                time.sleep(retrySleepMs);
            } else {
                log.warn("Error watching path [{}]. Stopping it...", fs.getWorkingDirectory(), ioe);
                throw new IllegalWorkerStateException(ioe);
            }
        } catch (Exception e) {
            log.warn("Stopping watcher due to an unexpected exception when watching path [{}].",
                    fs.getWorkingDirectory(), e);
            throw new IllegalWorkerStateException(e);
        }
    }
}
 
Example 22
Source Project: hadoop   Source File: DFSInotifyEventInputStream.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Returns the next batch of events in the stream or null if no new
 * batches are currently available.
 *
 * @throws IOException because of network error or edit log
 * corruption. Also possible if JournalNodes are unresponsive in the
 * QJM setting (even one unresponsive JournalNode is enough in rare cases),
 * so catching this exception and retrying at least a few times is
 * recommended.
 * @throws MissingEventsException if we cannot return the next batch in the
 * stream because the data for the events (and possibly some subsequent
 * events) has been deleted (generally because this stream is a very large
 * number of transactions behind the current state of the NameNode). It is
 * safe to continue reading from the stream after this exception is thrown
 * The next available batch of events will be returned.
 */
public EventBatch poll() throws IOException, MissingEventsException {
  TraceScope scope =
      Trace.startSpan("inotifyPoll", traceSampler);
  try {
    // need to keep retrying until the NN sends us the latest committed txid
    if (lastReadTxid == -1) {
      LOG.debug("poll(): lastReadTxid is -1, reading current txid from NN");
      lastReadTxid = namenode.getCurrentEditLogTxid();
      return null;
    }
    if (!it.hasNext()) {
      EventBatchList el = namenode.getEditsFromTxid(lastReadTxid + 1);
      if (el.getLastTxid() != -1) {
        // we only want to set syncTxid when we were actually able to read some
        // edits on the NN -- otherwise it will seem like edits are being
        // generated faster than we can read them when the problem is really
        // that we are temporarily unable to read edits
        syncTxid = el.getSyncTxid();
        it = el.getBatches().iterator();
        long formerLastReadTxid = lastReadTxid;
        lastReadTxid = el.getLastTxid();
        if (el.getFirstTxid() != formerLastReadTxid + 1) {
          throw new MissingEventsException(formerLastReadTxid + 1,
              el.getFirstTxid());
        }
      } else {
        LOG.debug("poll(): read no edits from the NN when requesting edits " +
          "after txid {}", lastReadTxid);
        return null;
      }
    }

    if (it.hasNext()) { // can be empty if el.getLastTxid != -1 but none of the
      // newly seen edit log ops actually got converted to events
      return it.next();
    } else {
      return null;
    }
  } finally {
    scope.close();
  }
}
 
Example 23
Source Project: hadoop   Source File: NameNodeRpcServer.java    License: Apache License 2.0 4 votes vote down vote up
@Override // ClientProtocol
public EventBatchList getEditsFromTxid(long txid) throws IOException {
  checkNNStartup();
  namesystem.checkOperation(OperationCategory.READ); // only active
  namesystem.checkSuperuserPrivilege();
  int maxEventsPerRPC = nn.conf.getInt(
      DFSConfigKeys.DFS_NAMENODE_INOTIFY_MAX_EVENTS_PER_RPC_KEY,
      DFSConfigKeys.DFS_NAMENODE_INOTIFY_MAX_EVENTS_PER_RPC_DEFAULT);
  FSEditLog log = namesystem.getFSImage().getEditLog();
  long syncTxid = log.getSyncTxId();
  // If we haven't synced anything yet, we can only read finalized
  // segments since we can't reliably determine which txns in in-progress
  // segments have actually been committed (e.g. written to a quorum of JNs).
  // If we have synced txns, we can definitely read up to syncTxid since
  // syncTxid is only updated after a transaction is committed to all
  // journals. (In-progress segments written by old writers are already
  // discarded for us, so if we read any in-progress segments they are
  // guaranteed to have been written by this NameNode.)
  boolean readInProgress = syncTxid > 0;

  List<EventBatch> batches = Lists.newArrayList();
  int totalEvents = 0;
  long maxSeenTxid = -1;
  long firstSeenTxid = -1;

  if (syncTxid > 0 && txid > syncTxid) {
    // we can't read past syncTxid, so there's no point in going any further
    return new EventBatchList(batches, firstSeenTxid, maxSeenTxid, syncTxid);
  }

  Collection<EditLogInputStream> streams = null;
  try {
    streams = log.selectInputStreams(txid, 0, null, readInProgress);
  } catch (IllegalStateException e) { // can happen if we have
    // transitioned out of active and haven't yet transitioned to standby
    // and are using QJM -- the edit log will be closed and this exception
    // will result
    LOG.info("NN is transitioning from active to standby and FSEditLog " +
    "is closed -- could not read edits");
    return new EventBatchList(batches, firstSeenTxid, maxSeenTxid, syncTxid);
  }

  boolean breakOuter = false;
  for (EditLogInputStream elis : streams) {
    // our assumption in this code is the EditLogInputStreams are ordered by
    // starting txid
    try {
      FSEditLogOp op = null;
      while ((op = readOp(elis)) != null) {
        // break out of here in the unlikely event that syncTxid is so
        // out of date that its segment has already been deleted, so the first
        // txid we get is greater than syncTxid
        if (syncTxid > 0 && op.getTransactionId() > syncTxid) {
          breakOuter = true;
          break;
        }

        EventBatch eventBatch = InotifyFSEditLogOpTranslator.translate(op);
        if (eventBatch != null) {
          batches.add(eventBatch);
          totalEvents += eventBatch.getEvents().length;
        }
        if (op.getTransactionId() > maxSeenTxid) {
          maxSeenTxid = op.getTransactionId();
        }
        if (firstSeenTxid == -1) {
          firstSeenTxid = op.getTransactionId();
        }
        if (totalEvents >= maxEventsPerRPC || (syncTxid > 0 &&
            op.getTransactionId() == syncTxid)) {
          // we're done
          breakOuter = true;
          break;
        }
      }
    } finally {
      elis.close();
    }
    if (breakOuter) {
      break;
    }
  }

  return new EventBatchList(batches, firstSeenTxid, maxSeenTxid, syncTxid);
}
 
Example 24
Source Project: hadoop   Source File: PBHelper.java    License: Apache License 2.0 4 votes vote down vote up
public static EventBatchList convert(GetEditsFromTxidResponseProto resp) throws
  IOException {
  final InotifyProtos.EventsListProto list = resp.getEventsList();
  final long firstTxid = list.getFirstTxid();
  final long lastTxid = list.getLastTxid();

  List<EventBatch> batches = Lists.newArrayList();
  if (list.getEventsList().size() > 0) {
    throw new IOException("Can't handle old inotify server response.");
  }
  for (InotifyProtos.EventBatchProto bp : list.getBatchList()) {
    long txid = bp.getTxid();
    if ((txid != -1) && ((txid < firstTxid) || (txid > lastTxid))) {
      throw new IOException("Error converting TxidResponseProto: got a " +
          "transaction id " + txid + " that was outside the range of [" +
          firstTxid + ", " + lastTxid + "].");
    }
    List<Event> events = Lists.newArrayList();
    for (InotifyProtos.EventProto p : bp.getEventsList()) {
      switch (p.getType()) {
        case EVENT_CLOSE:
          InotifyProtos.CloseEventProto close =
              InotifyProtos.CloseEventProto.parseFrom(p.getContents());
          events.add(new Event.CloseEvent(close.getPath(),
              close.getFileSize(), close.getTimestamp()));
          break;
        case EVENT_CREATE:
          InotifyProtos.CreateEventProto create =
              InotifyProtos.CreateEventProto.parseFrom(p.getContents());
          events.add(new Event.CreateEvent.Builder()
              .iNodeType(createTypeConvert(create.getType()))
              .path(create.getPath())
              .ctime(create.getCtime())
              .ownerName(create.getOwnerName())
              .groupName(create.getGroupName())
              .perms(convert(create.getPerms()))
              .replication(create.getReplication())
              .symlinkTarget(create.getSymlinkTarget().isEmpty() ? null :
                  create.getSymlinkTarget())
              .defaultBlockSize(create.getDefaultBlockSize())
              .overwrite(create.getOverwrite()).build());
          break;
        case EVENT_METADATA:
          InotifyProtos.MetadataUpdateEventProto meta =
              InotifyProtos.MetadataUpdateEventProto.parseFrom(p.getContents());
          events.add(new Event.MetadataUpdateEvent.Builder()
              .path(meta.getPath())
              .metadataType(metadataUpdateTypeConvert(meta.getType()))
              .mtime(meta.getMtime())
              .atime(meta.getAtime())
              .replication(meta.getReplication())
              .ownerName(
                  meta.getOwnerName().isEmpty() ? null : meta.getOwnerName())
              .groupName(
                  meta.getGroupName().isEmpty() ? null : meta.getGroupName())
              .perms(meta.hasPerms() ? convert(meta.getPerms()) : null)
              .acls(meta.getAclsList().isEmpty() ? null : convertAclEntry(
                  meta.getAclsList()))
              .xAttrs(meta.getXAttrsList().isEmpty() ? null : convertXAttrs(
                  meta.getXAttrsList()))
              .xAttrsRemoved(meta.getXAttrsRemoved())
              .build());
          break;
        case EVENT_RENAME:
          InotifyProtos.RenameEventProto rename =
              InotifyProtos.RenameEventProto.parseFrom(p.getContents());
          events.add(new Event.RenameEvent.Builder()
                .srcPath(rename.getSrcPath())
                .dstPath(rename.getDestPath())
                .timestamp(rename.getTimestamp())
                .build());
          break;
        case EVENT_APPEND:
          InotifyProtos.AppendEventProto append =
              InotifyProtos.AppendEventProto.parseFrom(p.getContents());
          events.add(new Event.AppendEvent.Builder().path(append.getPath())
              .newBlock(append.hasNewBlock() && append.getNewBlock())
              .build());
          break;
        case EVENT_UNLINK:
          InotifyProtos.UnlinkEventProto unlink =
              InotifyProtos.UnlinkEventProto.parseFrom(p.getContents());
          events.add(new Event.UnlinkEvent.Builder()
                .path(unlink.getPath())
                .timestamp(unlink.getTimestamp())
                .build());
          break;
        default:
          throw new RuntimeException("Unexpected inotify event type: " +
              p.getType());
      }
    }
    batches.add(new EventBatch(txid, events.toArray(new Event[0])));
  }
  return new EventBatchList(batches, resp.getEventsList().getFirstTxid(),
      resp.getEventsList().getLastTxid(), resp.getEventsList().getSyncTxid());
}
 
Example 25
Source Project: hadoop   Source File: TestDFSUpgrade.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testPreserveEditLogs() throws Exception {
  conf = new HdfsConfiguration();
  conf = UpgradeUtilities.initializeStorageStateConf(1, conf);
  String[] nameNodeDirs = conf.getStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
  conf.setBoolean(DFSConfigKeys.DFS_DATANODE_DUPLICATE_REPLICA_DELETION, false);

  log("Normal NameNode upgrade", 1);
  File[] created =
      UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
  for (final File createdDir : created) {
    List<String> fileNameList =
        IOUtils.listDirectory(createdDir, EditLogsFilter.INSTANCE);
    for (String fileName : fileNameList) {
      String tmpFileName = fileName + ".tmp";
      File existingFile = new File(createdDir, fileName);
      File tmpFile = new File(createdDir, tmpFileName);
      Files.move(existingFile.toPath(), tmpFile.toPath());
      File newFile = new File(createdDir, fileName);
      Preconditions.checkState(newFile.createNewFile(),
          "Cannot create new edits log file in " + createdDir);
      EditLogFileInputStream in = new EditLogFileInputStream(tmpFile,
          HdfsConstants.INVALID_TXID, HdfsConstants.INVALID_TXID,
          false);
      EditLogFileOutputStream out = new EditLogFileOutputStream(conf, newFile,
          (int)tmpFile.length());
      out.create(NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION + 1);
      FSEditLogOp logOp = in.readOp();
      while (logOp != null) {
        out.write(logOp);
        logOp = in.readOp();
      }
      out.setReadyToFlush();
      out.flushAndSync(true);
      out.close();
      Files.delete(tmpFile.toPath());
    }
  }

  cluster = createCluster();

  DFSInotifyEventInputStream ieis =
      cluster.getFileSystem().getInotifyEventStream(0);
  EventBatch batch = ieis.poll();
  Event[] events = batch.getEvents();
  assertTrue("Should be able to get transactions before the upgrade.",
      events.length > 0);
  assertEquals(events[0].getEventType(), Event.EventType.CREATE);
  assertEquals(((CreateEvent) events[0]).getPath(), "/TestUpgrade");
  cluster.shutdown();
  UpgradeUtilities.createEmptyDirs(nameNodeDirs);
}
 
Example 26
Source Project: hadoop   Source File: TestDFSInotifyEventInputStream.java    License: Apache License 2.0 4 votes vote down vote up
private static EventBatch waitForNextEvents(DFSInotifyEventInputStream eis)
  throws IOException, MissingEventsException {
  EventBatch batch = null;
  while ((batch = eis.poll()) == null);
  return batch;
}
 
Example 27
Source Project: hadoop   Source File: TestDFSInotifyEventInputStream.java    License: Apache License 2.0 4 votes vote down vote up
private static long checkTxid(EventBatch batch, long prevTxid){
  Assert.assertTrue("Previous txid " + prevTxid + " was not less than " +
      "new txid " + batch.getTxid(), prevTxid < batch.getTxid());
  return batch.getTxid();
}
 
Example 28
Source Project: hadoop   Source File: TestDFSInotifyEventInputStream.java    License: Apache License 2.0 4 votes vote down vote up
@Test(timeout = 120000)
public void testTwoActiveNNs() throws IOException, MissingEventsException {
  Configuration conf = new HdfsConfiguration();
  MiniQJMHACluster cluster = new MiniQJMHACluster.Builder(conf).build();

  try {
    cluster.getDfsCluster().waitActive();
    cluster.getDfsCluster().transitionToActive(0);
    DFSClient client0 = new DFSClient(cluster.getDfsCluster().getNameNode(0)
        .getNameNodeAddress(), conf);
    DFSClient client1 = new DFSClient(cluster.getDfsCluster().getNameNode(1)
        .getNameNodeAddress(), conf);
    DFSInotifyEventInputStream eis = client0.getInotifyEventStream();
    for (int i = 0; i < 10; i++) {
      client0.mkdirs("/dir" + i, null, false);
    }

    cluster.getDfsCluster().transitionToActive(1);
    for (int i = 10; i < 20; i++) {
      client1.mkdirs("/dir" + i, null, false);
    }

    // make sure that the old active can't read any further than the edits
    // it logged itself (it has no idea whether the in-progress edits from
    // the other writer have actually been committed)
    EventBatch batch = null;
    for (int i = 0; i < 10; i++) {
      batch = waitForNextEvents(eis);
      Assert.assertEquals(1, batch.getEvents().length);
      Assert.assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
      Assert.assertTrue(((Event.CreateEvent) batch.getEvents()[0]).getPath().equals("/dir" +
          i));
    }
    Assert.assertTrue(eis.poll() == null);
  } finally {
    try {
      cluster.shutdown();
    } catch (ExitUtil.ExitException e) {
      // expected because the old active will be unable to flush the
      // end-of-segment op since it is fenced
    }
  }
}
 
Example 29
Source Project: big-c   Source File: DFSInotifyEventInputStream.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Returns the next batch of events in the stream or null if no new
 * batches are currently available.
 *
 * @throws IOException because of network error or edit log
 * corruption. Also possible if JournalNodes are unresponsive in the
 * QJM setting (even one unresponsive JournalNode is enough in rare cases),
 * so catching this exception and retrying at least a few times is
 * recommended.
 * @throws MissingEventsException if we cannot return the next batch in the
 * stream because the data for the events (and possibly some subsequent
 * events) has been deleted (generally because this stream is a very large
 * number of transactions behind the current state of the NameNode). It is
 * safe to continue reading from the stream after this exception is thrown
 * The next available batch of events will be returned.
 */
public EventBatch poll() throws IOException, MissingEventsException {
  TraceScope scope =
      Trace.startSpan("inotifyPoll", traceSampler);
  try {
    // need to keep retrying until the NN sends us the latest committed txid
    if (lastReadTxid == -1) {
      LOG.debug("poll(): lastReadTxid is -1, reading current txid from NN");
      lastReadTxid = namenode.getCurrentEditLogTxid();
      return null;
    }
    if (!it.hasNext()) {
      EventBatchList el = namenode.getEditsFromTxid(lastReadTxid + 1);
      if (el.getLastTxid() != -1) {
        // we only want to set syncTxid when we were actually able to read some
        // edits on the NN -- otherwise it will seem like edits are being
        // generated faster than we can read them when the problem is really
        // that we are temporarily unable to read edits
        syncTxid = el.getSyncTxid();
        it = el.getBatches().iterator();
        long formerLastReadTxid = lastReadTxid;
        lastReadTxid = el.getLastTxid();
        if (el.getFirstTxid() != formerLastReadTxid + 1) {
          throw new MissingEventsException(formerLastReadTxid + 1,
              el.getFirstTxid());
        }
      } else {
        LOG.debug("poll(): read no edits from the NN when requesting edits " +
          "after txid {}", lastReadTxid);
        return null;
      }
    }

    if (it.hasNext()) { // can be empty if el.getLastTxid != -1 but none of the
      // newly seen edit log ops actually got converted to events
      return it.next();
    } else {
      return null;
    }
  } finally {
    scope.close();
  }
}
 
Example 30
Source Project: big-c   Source File: NameNodeRpcServer.java    License: Apache License 2.0 4 votes vote down vote up
@Override // ClientProtocol
public EventBatchList getEditsFromTxid(long txid) throws IOException {
  checkNNStartup();
  namesystem.checkOperation(OperationCategory.READ); // only active
  namesystem.checkSuperuserPrivilege();
  int maxEventsPerRPC = nn.conf.getInt(
      DFSConfigKeys.DFS_NAMENODE_INOTIFY_MAX_EVENTS_PER_RPC_KEY,
      DFSConfigKeys.DFS_NAMENODE_INOTIFY_MAX_EVENTS_PER_RPC_DEFAULT);
  FSEditLog log = namesystem.getFSImage().getEditLog();
  long syncTxid = log.getSyncTxId();
  // If we haven't synced anything yet, we can only read finalized
  // segments since we can't reliably determine which txns in in-progress
  // segments have actually been committed (e.g. written to a quorum of JNs).
  // If we have synced txns, we can definitely read up to syncTxid since
  // syncTxid is only updated after a transaction is committed to all
  // journals. (In-progress segments written by old writers are already
  // discarded for us, so if we read any in-progress segments they are
  // guaranteed to have been written by this NameNode.)
  boolean readInProgress = syncTxid > 0;

  List<EventBatch> batches = Lists.newArrayList();
  int totalEvents = 0;
  long maxSeenTxid = -1;
  long firstSeenTxid = -1;

  if (syncTxid > 0 && txid > syncTxid) {
    // we can't read past syncTxid, so there's no point in going any further
    return new EventBatchList(batches, firstSeenTxid, maxSeenTxid, syncTxid);
  }

  Collection<EditLogInputStream> streams = null;
  try {
    streams = log.selectInputStreams(txid, 0, null, readInProgress);
  } catch (IllegalStateException e) { // can happen if we have
    // transitioned out of active and haven't yet transitioned to standby
    // and are using QJM -- the edit log will be closed and this exception
    // will result
    LOG.info("NN is transitioning from active to standby and FSEditLog " +
    "is closed -- could not read edits");
    return new EventBatchList(batches, firstSeenTxid, maxSeenTxid, syncTxid);
  }

  boolean breakOuter = false;
  for (EditLogInputStream elis : streams) {
    // our assumption in this code is the EditLogInputStreams are ordered by
    // starting txid
    try {
      FSEditLogOp op = null;
      while ((op = readOp(elis)) != null) {
        // break out of here in the unlikely event that syncTxid is so
        // out of date that its segment has already been deleted, so the first
        // txid we get is greater than syncTxid
        if (syncTxid > 0 && op.getTransactionId() > syncTxid) {
          breakOuter = true;
          break;
        }

        EventBatch eventBatch = InotifyFSEditLogOpTranslator.translate(op);
        if (eventBatch != null) {
          batches.add(eventBatch);
          totalEvents += eventBatch.getEvents().length;
        }
        if (op.getTransactionId() > maxSeenTxid) {
          maxSeenTxid = op.getTransactionId();
        }
        if (firstSeenTxid == -1) {
          firstSeenTxid = op.getTransactionId();
        }
        if (totalEvents >= maxEventsPerRPC || (syncTxid > 0 &&
            op.getTransactionId() == syncTxid)) {
          // we're done
          breakOuter = true;
          break;
        }
      }
    } finally {
      elis.close();
    }
    if (breakOuter) {
      break;
    }
  }

  return new EventBatchList(batches, firstSeenTxid, maxSeenTxid, syncTxid);
}