Java Code Examples for org.apache.parquet.Strings#isNullOrEmpty()

The following examples show how to use org.apache.parquet.Strings#isNullOrEmpty() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PathUtils.java    From Bats with Apache License 2.0 6 votes vote down vote up
/**
 * Normalizes the given path eliminating repeated forward slashes.
 *
 * @return  normalized path
 */
public static final String normalize(final String path) {
  if (Strings.isNullOrEmpty(Preconditions.checkNotNull(path))) {
    return path;
  }

  final StringBuilder builder = new StringBuilder();
  char last = path.charAt(0);
  builder.append(last);
  for (int i=1; i<path.length(); i++) {
    char cur = path.charAt(i);
    if (last == '/' && cur == last) {
      continue;
    }
    builder.append(cur);
    last = cur;
  }
  return builder.toString();
}
 
Example 2
Source File: WholeFileTransformerProcessor.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
protected List<ConfigIssue> init() {
  List<ConfigIssue> issues = super.init();

  if (Strings.isNullOrEmpty(jobConfig.tempDir)) {
    issues.add(getContext().createConfigIssue(
        Groups.JOB.name(),
        JobConfig.TEMPDIR,
        Errors.CONVERT_02
    ));
  }

  this.context = getContext();
  this.errorRecordHandler = new DefaultErrorRecordHandler(getContext());

  tempDirElEval = context.createELEval("tempDir");
  compressionElEval = context.createELEval("compressionCodec");
  rateLimitElEval = FileRefUtil.createElEvalForRateLimit(getContext());

  variables = context.createELVars();

  return issues;
}
 
Example 3
Source File: PathUtils.java    From Bats with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a normalized, combined path out of the given path segments.
 *
 * @param parts  path segments to combine
 * @see #normalize(String)
 */
public static final String join(final String... parts) {
  final StringBuilder sb = new StringBuilder();
  for (final String part:parts) {
    Preconditions.checkNotNull(part, "parts cannot contain null");
    if (!Strings.isNullOrEmpty(part)) {
      sb.append(part).append("/");
    }
  }
  if (sb.length() > 0) {
    sb.deleteCharAt(sb.length() - 1);
  }
  final String path = sb.toString();
  return normalize(path);
}
 
Example 4
Source File: SparkExecutableLivy.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected void onExecuteStart(ExecutableContext executableContext) {
    final Output output = getOutput();
    if (output.getExtra().containsKey(START_TIME)) {
        final String sparkJobID = output.getExtra().get(ExecutableConstants.SPARK_JOB_ID);
        if (sparkJobID == null) {
            getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            return;
        }
        try {
            String status = getAppState(sparkJobID);
            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                //remove previous mr job info
                super.onExecuteStart(executableContext);
            } else {
                getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            }
        } catch (IOException e) {
            logger.warn("error get hadoop status");
            super.onExecuteStart(executableContext);
        }
    } else {
        super.onExecuteStart(executableContext);
    }
}
 
Example 5
Source File: SparkExecutableLivy.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult onResumed(String appId, ExecutableManager mgr) throws ExecuteException {
    Map<String, String> info = new HashMap<>();
    try {
        logger.info("livy spark_job_id:" + appId + " resumed");
        info.put(ExecutableConstants.SPARK_JOB_ID, appId);

        while (!isPaused() && !isDiscarded()) {
            String status = getAppState(appId);

            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                mgr.updateJobOutput(getId(), ExecutableState.ERROR, null, appId + " has failed");
                return new ExecuteResult(ExecuteResult.State.FAILED, appId + " has failed");
            }

            if (LivyStateEnum.success.name().equalsIgnoreCase(status)) {
                mgr.addJobInfo(getId(), info);
                return new ExecuteResult(ExecuteResult.State.SUCCEED, appId + " has finished");
            }

            Thread.sleep(5000);
        }

        killAppRetry(appId);

        if (isDiscarded()) {
            return new ExecuteResult(ExecuteResult.State.DISCARDED, appId + " is discarded");
        } else {
            return new ExecuteResult(ExecuteResult.State.STOPPED, appId + " is stopped");
        }

    } catch (Exception e) {
        logger.error("error run spark job:", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }

}
 
Example 6
Source File: SparkExecutableLivy.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@Override
protected int killAppRetry(String appId) throws IOException, InterruptedException {
    String status = getAppState(appId);
    if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
        logger.warn(appId + "is final state, no need to kill");
        return 0;
    }

    killApp(appId);

    status = getAppState(appId);
    int retry = 0;
    while (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status) && retry < 5) {
        killApp(appId);

        Thread.sleep(1000);

        status = getAppState(appId);
        retry++;
    }

    if (Strings.isNullOrEmpty(status)) {
        logger.info(appId + " killed successfully");
        return 0;
    } else {
        logger.info(appId + " killed failed");
        return 1;
    }
}
 
Example 7
Source File: SparkExecutableLivy.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected void onExecuteStart(ExecutableContext executableContext) {
    final Output output = getOutput();
    if (output.getExtra().containsKey(START_TIME)) {
        final String sparkJobID = output.getExtra().get(ExecutableConstants.SPARK_JOB_ID);
        if (sparkJobID == null) {
            getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            return;
        }
        try {
            String status = getAppState(sparkJobID);
            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                //remove previous mr job info
                super.onExecuteStart(executableContext);
            } else {
                getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            }
        } catch (IOException e) {
            logger.warn("error get hadoop status");
            super.onExecuteStart(executableContext);
        }
    } else {
        super.onExecuteStart(executableContext);
    }
}
 
Example 8
Source File: SparkExecutableLivy.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected ExecuteResult onResumed(String appId, ExecutableManager mgr) throws ExecuteException {
    Map<String, String> info = new HashMap<>();
    try {
        logger.info("livy spark_job_id:" + appId + " resumed");
        info.put(ExecutableConstants.SPARK_JOB_ID, appId);

        while (!isPaused() && !isDiscarded()) {
            String status = getAppState(appId);

            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                mgr.updateJobOutput(getId(), ExecutableState.ERROR, null, appId + " has failed");
                return new ExecuteResult(ExecuteResult.State.FAILED, appId + " has failed");
            }

            if (LivyStateEnum.success.name().equalsIgnoreCase(status)) {
                mgr.addJobInfo(getId(), info);
                return new ExecuteResult(ExecuteResult.State.SUCCEED, appId + " has finished");
            }

            Thread.sleep(5000);
        }

        killAppRetry(appId);

        if (isDiscarded()) {
            return new ExecuteResult(ExecuteResult.State.DISCARDED, appId + " is discarded");
        } else {
            return new ExecuteResult(ExecuteResult.State.STOPPED, appId + " is stopped");
        }

    } catch (Exception e) {
        logger.error("error run spark job:", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }

}
 
Example 9
Source File: SparkExecutableLivy.java    From kylin with Apache License 2.0 5 votes vote down vote up
@Override
protected int killAppRetry(String appId) throws IOException, InterruptedException {
    String status = getAppState(appId);
    if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
        logger.warn(appId + "is final state, no need to kill");
        return 0;
    }

    killApp(appId);

    status = getAppState(appId);
    int retry = 0;
    while (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status) && retry < 5) {
        killApp(appId);

        Thread.sleep(1000);

        status = getAppState(appId);
        retry++;
    }

    if (Strings.isNullOrEmpty(status)) {
        logger.info(appId + " killed successfully");
        return 0;
    } else {
        logger.info(appId + " killed failed");
        return 1;
    }
}
 
Example 10
Source File: ThriftReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static FieldProjectionFilter getFieldProjectionFilter(Configuration conf) {
  String deprecated = conf.get(THRIFT_COLUMN_FILTER_KEY);
  String strict = conf.get(STRICT_THRIFT_COLUMN_FILTER_KEY);

  if (Strings.isNullOrEmpty(deprecated) && Strings.isNullOrEmpty(strict)) {
    return null;
  }

  if(!Strings.isNullOrEmpty(deprecated) && !Strings.isNullOrEmpty(strict)) {
    throw new ThriftProjectionException(
        "You cannot provide both "
            + THRIFT_COLUMN_FILTER_KEY
            + " and "
            + STRICT_THRIFT_COLUMN_FILTER_KEY
            +"! "
            + THRIFT_COLUMN_FILTER_KEY
            + " is deprecated."
    );
  }

  if (!Strings.isNullOrEmpty(deprecated)) {
    LOG.warn("Using {} is deprecated. Please see the docs for {}!",
        THRIFT_COLUMN_FILTER_KEY, STRICT_THRIFT_COLUMN_FILTER_KEY);
    return new DeprecatedFieldProjectionFilter(deprecated);
  }

  return StrictFieldProjectionFilter.fromSemicolonDelimitedString(strict);
}
 
Example 11
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static HiveStorageCapabilities getHiveStorageCapabilities(final StorageDescriptor storageDescriptor) {
  final String location = storageDescriptor.getLocation();

  if (null != location) {
    final URI uri;
    try {
      uri = URI.create(location);
    } catch (IllegalArgumentException e) {
      // unknown table source, default to HDFS.
      return HiveStorageCapabilities.DEFAULT_HDFS;
    }

    final String scheme = uri.getScheme();
    if (!Strings.isNullOrEmpty(scheme)) {
      if (scheme.regionMatches(true, 0, "s3", 0, 2)) {
        /* AWS S3 does not support impersonation, last modified times or orc split file ids. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(false)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(false)
          .build();
      } else if (scheme.regionMatches(true, 0, "wasb", 0, 4) ||
        scheme.regionMatches(true, 0, "abfs", 0, 4) ||
        scheme.regionMatches(true, 0, "wasbs", 0, 5) ||
        scheme.regionMatches(true, 0, "abfss", 0, 5)) {
        /* DX-17365: Azure Storage does not support correct last modified times, Azure returns last modified times,
         *  however, the timestamps returned are incorrect. They reference the folder's create time rather
         *  that the folder content's last modified time. Please see Prototype.java for Azure storage fs uri schemes. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(true)
          .build();
      } else if (!scheme.regionMatches(true, 0, "hdfs", 0, 4)) {
        /* Most hive supported non-HDFS file systems allow for impersonation and last modified times, but
           not orc split file ids.  */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(true)
          .supportsOrcSplitFileIds(false)
          .build();
      }
    }
  }
  // Default to HDFS.
  return HiveStorageCapabilities.DEFAULT_HDFS;
}
 
Example 12
Source File: HiveMetadataUtils.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static HiveStorageCapabilities getHiveStorageCapabilities(final StorageDescriptor storageDescriptor) {
  final String location = storageDescriptor.getLocation();

  if (null != location) {
    final URI uri;
    try {
      uri = URI.create(location);
    } catch (IllegalArgumentException e) {
      // unknown table source, default to HDFS.
      return HiveStorageCapabilities.DEFAULT_HDFS;
    }

    final String scheme = uri.getScheme();
    if (!Strings.isNullOrEmpty(scheme)) {
      if (scheme.regionMatches(true, 0, "s3", 0, 2)) {
        /* AWS S3 does not support impersonation, last modified times or orc split file ids. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(false)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(false)
          .build();
      } else if (scheme.regionMatches(true, 0, "wasb", 0, 4) ||
        scheme.regionMatches(true, 0, "abfs", 0, 4) ||
        scheme.regionMatches(true, 0, "wasbs", 0, 5) ||
        scheme.regionMatches(true, 0, "abfss", 0, 5)) {
        /* DX-17365: Azure Storage does not support correct last modified times, Azure returns last modified times,
         *  however, the timestamps returned are incorrect. They reference the folder's create time rather
         *  that the folder content's last modified time. Please see Prototype.java for Azure storage fs uri schemes. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(true)
          .build();
      } else if (!scheme.regionMatches(true, 0, "hdfs", 0, 4)) {
        /* Most hive supported non-HDFS file systems allow for impersonation and last modified times, but
           not orc split file ids.  */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(true)
          .supportsOrcSplitFileIds(false)
          .build();
      }
    }
  }
  // Default to HDFS.
  return HiveStorageCapabilities.DEFAULT_HDFS;
}