Java Code Examples for org.apache.hadoop.io.Writable#toString()

The following examples show how to use org.apache.hadoop.io.Writable#toString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PGBulkloadExportMapper.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 7 votes vote down vote up
public void map(LongWritable key, Writable value, Context context)
  throws IOException, InterruptedException {
  try {
    String str = value.toString();
    if (value instanceof Text) {
      writer.write(str, 0, str.length());
      writer.newLine();
    } else if (value instanceof SqoopRecord) {
      writer.write(str, 0, str.length());
    }
  } catch (Exception e) {
    doExecuteUpdate("DROP TABLE " + tmpTableName);
    cleanup(context);
    throw new IOException(e);
  }
}
 
Example 2
Source File: PatternMatcher.java    From jumbune with GNU Lesser General Public License v3.0 6 votes vote down vote up
/**
 * It matches the value against given regular Expression. If value is null no need to match it with regEx
 * 
 * @param value
 *            - The value to be matched
 * @param regex
 *            - RegularExpression
 * @return true if value matches with RegEx false otherwise. If value is null false will be returned
 */
public static boolean match(Writable value, String regex) {
	if (value == null) {
		return false;
	}

	String valueStr = value.toString();
	if (valueStr == null || valueStr.length() == 0) {
		return false;
	}
	// TODO: Remove Pattern.compile
	Pattern p = Pattern.compile(regex);

	Matcher m = p.matcher(valueStr);
	return m.matches();
}
 
Example 3
Source File: DynamoDBSerDe.java    From emr-dynamodb-connector with Apache License 2.0 5 votes vote down vote up
@Override
public Object deserialize(Writable dataMap) throws SerDeException {
  if (!(dataMap instanceof DynamoDBItemWritable)) {
    throw new SerDeException("Expected DynamoDBMapWritable data type, got "
        + dataMap.getClass().getName() + " data: " + dataMap.toString());
  }
  return dataMap;
}
 
Example 4
Source File: PatternMatcher.java    From jumbune with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * It matches the key/value with given pattern.
 * 
 * @param value
 *            - The value to be matched
 * @param pattern
 *            - Pattern against which it should be matched
 * @return true if value matches with RegEx false otherwise. If value is null false will be returned
 */
public static boolean match(Writable value, Pattern pattern) {
	if (value == null) {
		return false;
	}

	String valueStr = value.toString();
	if (valueStr == null || valueStr.length() == 0) {
		return false;
	}

	Matcher m = pattern.matcher(valueStr);
	return m.matches();
}
 
Example 5
Source File: UtilES.java    From deep-spark with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the object inside Writable
 *
 * @param writable
 * @return
 * @throws IllegalAccessException
 * @throws InstantiationException
 * @throws InvocationTargetException
 * @throws NoSuchMethodException
 */
private static Object getObjectFromWritable(Writable writable)
        throws IllegalAccessException, InstantiationException, InvocationTargetException {
    Object object = null;

    if (writable instanceof NullWritable) {
        object = NullWritable.get();
    } else if (writable instanceof BooleanWritable) {
        object = ((BooleanWritable) writable).get();
    } else if (writable instanceof Text) {
        object = writable.toString();
    } else if (writable instanceof ByteWritable) {
        object = ((ByteWritable) writable).get();
    } else if (writable instanceof IntWritable) {
        object = ((IntWritable) writable).get();
    } else if (writable instanceof LongWritable) {
        object = ((LongWritable) writable).get();
    } else if (writable instanceof BytesWritable) {
        object = ((BytesWritable) writable).getBytes();
    } else if (writable instanceof DoubleWritable) {
        object = ((DoubleWritable) writable).get();
    } else if (writable instanceof FloatWritable) {
        object = ((FloatWritable) writable).get();
    } else {
        // TODO : do nothing
    }

    return object;
}
 
Example 6
Source File: SolrSerde.java    From hive-solr with MIT License 4 votes vote down vote up
@Override
public Object deserialize(Writable writable) throws SerDeException {
    final MapWritable input = (MapWritable) writable;
    final Text t = new Text();
    row.clear();

    for(int i=0;i<columnNames.size();i++){
        String k=columnNames.get(i);
        t.set(k);

        final Writable value = input.get(t);
        if (value != null && !NullWritable.get().equals(value)) {


            String colName = null;
            TypeInfo type_info = null;
            Object obj = null;

                colName = columnNames.get(i);
                type_info = columnTypes.get(i);
                obj = null;
                if (type_info.getCategory() == ObjectInspector.Category.PRIMITIVE) {
                    PrimitiveTypeInfo p_type_info = (PrimitiveTypeInfo) type_info;
                    switch (p_type_info.getPrimitiveCategory()) {
                        case STRING:
                            obj = value.toString();
                            break;
                        case LONG:
                        case INT:
                            try {
                                obj = Long.parseLong(value.toString());
                            } catch (Exception e) {
                                e.printStackTrace();
                            }
                    }
                }
                row.add(obj);


        }

    }

    return row;

}
 
Example 7
Source File: DynamoDBExportSerDe.java    From emr-dynamodb-connector with Apache License 2.0 4 votes vote down vote up
@Override
public Object deserialize(Writable inputData) throws SerDeException {
  if (inputData == null) {
    return null;
  }
  if (inputData instanceof Text) {
    String data = inputData.toString();
    if (Strings.isNullOrEmpty(data)) {
      return null;
    }
    String collectionSplitCharacter = byteToString(1);

    List<String> fields = Arrays.asList(data.split(collectionSplitCharacter));

    if (fields.isEmpty()) {
      return null;
    }

    Map<String, AttributeValue> item = Maps.newHashMap();
    String mapSplitCharacter = byteToString(2);

    for (String field : fields) {
      if (Strings.isNullOrEmpty(field)) {
        throw new SerDeException("Empty fields in data: " + data);
      }
      List<String> values = Arrays.asList(field.split(mapSplitCharacter));
      if (values.size() != 2) {
        throw new SerDeException("Invalid record with map value: " + values);
      }
      String dynamoDBAttributeName = values.get(0);
      String dynamoDBAttributeValue = values.get(1);

      /* Deserialize the AttributeValue string */
      AttributeValue deserializedAttributeValue = HiveDynamoDBItemType
          .deserializeAttributeValue(dynamoDBAttributeValue);

      item.put(dynamoDBAttributeName, deserializedAttributeValue);
    }

    return new DynamoDBItemWritable(item);
  } else {
    throw new SerDeException(getClass().toString() + ": expects Text object!");
  }
}
 
Example 8
Source File: MoreIndexingFilter.java    From anthelion with Apache License 2.0 4 votes vote down vote up
/**
 * <p>
 * Add Content-Type and its primaryType and subType add contentType,
 * primaryType and subType to field "type" as un-stored, indexed and
 * un-tokenized, so that search results can be confined by contentType or its
 * primaryType or its subType.
 * </p>
 * <p>
 * For example, if contentType is application/vnd.ms-powerpoint, search can be
 * done with one of the following qualifiers
 * type:application/vnd.ms-powerpoint type:application type:vnd.ms-powerpoint
 * all case insensitive. The query filter is implemented in
 * {@link TypeQueryFilter}.
 * </p>
 *
 * @param doc
 * @param data
 * @param url
 * @return
 */
private NutchDocument addType(NutchDocument doc, ParseData data, String url,
    CrawlDatum datum) {
  String mimeType = null;
  String contentType = null;

  Writable tcontentType = datum.getMetaData().get(
      new Text(Response.CONTENT_TYPE));
  if (tcontentType != null) {
    contentType = tcontentType.toString();
  } else
    contentType = data.getMeta(Response.CONTENT_TYPE);
  if (contentType == null) {
    // Note by Jerome Charron on 20050415:
    // Content Type not solved by a previous plugin
    // Or unable to solve it... Trying to find it
    // Should be better to use the doc content too
    // (using MimeTypes.getMimeType(byte[], String), but I don't know
    // which field it is?
    // if (MAGIC) {
    //   contentType = MIME.getMimeType(url, content);
    // } else {
    //   contentType = MIME.getMimeType(url);
    // }
    mimeType = MIME.getMimeType(url);
  } else {
    mimeType = MIME.forName(MimeUtil.cleanMimeType(contentType));
  }

  // Checks if we solved the content-type.
  if (mimeType == null) {
    return doc;
  }

  // Check if we have to map mime types
  if (mapMimes) {
    // Check if the current mime is mapped
    if (mimeMap.containsKey(mimeType)) {
      // It's mapped, let's replace it
      mimeType = mimeMap.get(mimeType);
    }
  }

  contentType = mimeType;

  doc.add("type", contentType);

  // Check if we need to split the content type in sub parts
  if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
    String[] parts = getParts(contentType);

    for(String part: parts) {
      doc.add("type", part);
    }
  }

  // leave this for future improvement
  //MimeTypeParameterList parameterList = mimeType.getParameters()

  return doc;
}
 
Example 9
Source File: MoreIndexingFilter.java    From nutch-htmlunit with Apache License 2.0 4 votes vote down vote up
/**
 * <p>
 * Add Content-Type and its primaryType and subType add contentType,
 * primaryType and subType to field "type" as un-stored, indexed and
 * un-tokenized, so that search results can be confined by contentType or its
 * primaryType or its subType.
 * </p>
 * <p>
 * For example, if contentType is application/vnd.ms-powerpoint, search can be
 * done with one of the following qualifiers
 * type:application/vnd.ms-powerpoint type:application type:vnd.ms-powerpoint
 * all case insensitive. The query filter is implemented in
 * {@link TypeQueryFilter}.
 * </p>
 *
 * @param doc
 * @param data
 * @param url
 * @return
 */
private NutchDocument addType(NutchDocument doc, ParseData data, String url,
    CrawlDatum datum) {
  String mimeType = null;
  String contentType = null;

  Writable tcontentType = datum.getMetaData().get(
      new Text(Response.CONTENT_TYPE));
  if (tcontentType != null) {
    contentType = tcontentType.toString();
  } else
    contentType = data.getMeta(Response.CONTENT_TYPE);
  if (contentType == null) {
    // Note by Jerome Charron on 20050415:
    // Content Type not solved by a previous plugin
    // Or unable to solve it... Trying to find it
    // Should be better to use the doc content too
    // (using MimeTypes.getMimeType(byte[], String), but I don't know
    // which field it is?
    // if (MAGIC) {
    //   contentType = MIME.getMimeType(url, content);
    // } else {
    //   contentType = MIME.getMimeType(url);
    // }

    mimeType = tika.detect(url);
  } else {
    mimeType = MIME.forName(MimeUtil.cleanMimeType(contentType));
  }

  // Checks if we solved the content-type.
  if (mimeType == null) {
    return doc;
  }

  // Check if we have to map mime types
  if (mapMimes) {
    // Check if the current mime is mapped
    if (mimeMap.containsKey(mimeType)) {
      // It's mapped, let's replace it
      mimeType = mimeMap.get(mimeType);
    }
  }

  contentType = mimeType;
  doc.add("type", contentType);

  // Check if we need to split the content type in sub parts
  if (conf.getBoolean("moreIndexingFilter.indexMimeTypeParts", true)) {
    String[] parts = getParts(contentType);

    for(String part: parts) {
      doc.add("type", part);
    }
  }

  // leave this for future improvement
  //MimeTypeParameterList parameterList = mimeType.getParameters()

  return doc;
}