com.datastax.spark.connector.japi.CassandraRow Java Examples

The following examples show how to use com.datastax.spark.connector.japi.CassandraRow. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CassandraRowToSpan.java    From zipkin-dependencies with Apache License 2.0 5 votes vote down vote up
private Endpoint readEndpoint(CassandraRow row, String name) {
  if (!inTest) {
    return readEndpoint(row.getUDTValue(name));
  }
  // UDT type doesn't work in tests
  // Caused by: com.datastax.spark.connector.types.TypeConversionException: Cannot convert object zipkin2.storage.cassandra.Schema$EndpointUDT@67a3fdf8 of type class zipkin2.storage.cassandra.Schema$EndpointUDT to com.datastax.spark.connector.japi.UDTValue.
  return readEndpoint(row.getObject(name));
}
 
Example #2
Source File: CollabFilterCassandraDriver.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 5 votes vote down vote up
double trainAndValidate(int version) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
	final ICollabFilterCassandra cfc;
	String className = "collabfilter.CollabFilterCassandra" + version;
	cfc = (ICollabFilterCassandra) Class.forName(className).newInstance();
	try (Session session = this.cassandraConnector.openSession()) {
		MatrixFactorizationModel model = cfc.train(this.sparkCtx, this.cassandraConnector);
		CassandraJavaRDD<CassandraRow> validationsCassRdd = javaFunctions(this.sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.VALIDATION_TABLE);
		JavaRDD<Rating> predictionJavaRdd = cfc.predict(model, validationsCassRdd);
		double rmse = cfc.validate(predictionJavaRdd, validationsCassRdd);
		System.out.println(cfc.resultsReport(predictionJavaRdd, validationsCassRdd, rmse));
		return rmse;
	}

}
 
Example #3
Source File: CollabFilterCassandra7.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 5 votes vote down vote up
public double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	JavaPairRDD<Tuple2<Integer, Integer>, Double> predictionsJavaPairs = JavaPairRDD.fromJavaRDD(predictionJavaRdd.map(new org.apache.spark.api.java.function.Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
		@Override
		public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating pred) throws Exception {
			return new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(pred.user(), pred.product()), pred.rating());
		}
		//
	}));
	JavaRDD<Rating> validationRatings = validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() {
		@Override
		public Rating call(CassandraRow validation) throws Exception {
			return new Rating(validation.getInt(RatingDO.USER_COL), validation.getInt(RatingDO.PRODUCT_COL), validation.getInt(RatingDO.RATING_COL));
		}
	
	});
	JavaRDD<Tuple2<Double, Double>> validationAndPredictions = JavaPairRDD.fromJavaRDD(validationRatings.map(new org.apache.spark.api.java.function.Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
	
		@Override
		public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating validationRating) throws Exception {
			return new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(validationRating.user(), validationRating.product()), validationRating.rating());
		}
	
	})).join(predictionsJavaPairs).values();
	
	double meanSquaredError = JavaDoubleRDD.fromRDD(validationAndPredictions.map(new org.apache.spark.api.java.function.Function<Tuple2<Double, Double>, Object>() {
		@Override
		public Object call(Tuple2<Double, Double> pair) throws Exception {
			Double err = pair._1() - pair._2();
			return (Object) (err * err);// No covariance! Need to cast
		}
	}).rdd()).mean();
	double rmse = Math.sqrt(meanSquaredError);
	return rmse;
	 
}
 
Example #4
Source File: CollabFilterCassandra7.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 5 votes vote down vote up
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Tuple2<Object, Object>>() {
		@Override
		public Tuple2<Object, Object> call(CassandraRow validationRow) throws Exception {
			return new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL));
		}
	}));
	JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD();
	return predictionJavaRdd;
}
 
Example #5
Source File: CollabFilterCassandra7.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 5 votes vote down vote up
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) {
	CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE);
	JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Rating>() {
		@Override
		public Rating call(CassandraRow trainingRow) throws Exception {
			return new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL));
		}
	});
	MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA);
	return model;
}
 
Example #6
Source File: CollabFilterCassandra8.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 5 votes vote down vote up
private String predictionString(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	java.util.function.Function<CassandraRow, Tuple2<Integer, Integer>> keyMapper = validationRow -> new Tuple2<Integer, Integer>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL));
	java.util.function.Function<CassandraRow, Double> valueMapper = validationRow -> validationRow.getDouble(RatingDO.RATING_COL);
	java.util.Map<Tuple2<Integer, Integer>, Double> validationMap = validationsCassRdd.collect().stream().collect(Collectors.toMap(keyMapper, valueMapper));

	java.util.function.Function<Rating, String> stringMapper = prediction -> {
		double validationRating = validationMap.get(new Tuple2<Integer, Integer>(prediction.user(), prediction.product()));
		String errWarningString = Math.abs(validationRating - prediction.rating()) >= 1 ? "ERR" : "OK";
		return prediction.user() + "\t" + prediction.product() + "\t" + Util.round(prediction.rating()) + "\t\t" + Util.round(validationRating) + "\t" + errWarningString;
	};
	Stream<Rating> sortedPredictions = predJavaRdd.collect().stream().sorted((o1, o2) -> o1.user() == o2.user() ? o1.product() - o2.product() : o1.user() - o2.user());
	String ret = sortedPredictions.map(stringMapper).collect(Collectors.joining("\n"));

	return ret;
}
 
Example #7
Source File: CollabFilterCassandra8.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 5 votes vote down vote up
public double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	JavaPairRDD<Tuple2<Integer, Integer>, Double> predictionsJavaPairs = JavaPairRDD.fromJavaRDD(predictionJavaRdd.map(pred -> new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(pred.user(), pred.product()), pred.rating())));
	JavaRDD<Rating> validationRatings = validationsCassRdd.map(validation -> new Rating(validation.getInt(RatingDO.USER_COL), validation.getInt(RatingDO.PRODUCT_COL), validation.getInt(RatingDO.RATING_COL)));
	JavaRDD<Tuple2<Double, Double>> validationAndPredictions = JavaPairRDD.fromJavaRDD(validationRatings.map(validationRating -> new Tuple2<Tuple2<Integer, Integer>, Double>(new Tuple2<Integer, Integer>(validationRating.user(), validationRating.product()), validationRating.rating()))).join(predictionsJavaPairs).values();

	double meanSquaredError = JavaDoubleRDD.fromRDD(validationAndPredictions.map(pair -> {
		Double err = pair._1() - pair._2();
		return (Object) (err * err);// No covariance! Need to cast to Object
		}).rdd()).mean();
	double rmse = Math.sqrt(meanSquaredError);
	return rmse;

}
 
Example #8
Source File: CassandraRowsToDependencyLinks.java    From zipkin-dependencies with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<DependencyLink> call(Iterable<CassandraRow> rows) {
  if (logInitializer != null) logInitializer.run();
  V1ThriftSpanReader reader = V1ThriftSpanReader.create();
  V1SpanConverter converter = V1SpanConverter.create();
  List<Span> sameTraceId = new ArrayList<>();
  for (CassandraRow row : rows) {
    try {
      V1Span v1Span = reader.read(ReadBuffer.wrapUnsafe(row.getBytes("span")));
      for (Span span : converter.convert(v1Span)) {
        // check to see if the trace is within the interval
        if (span.parentId() == null) {
          long timestamp = span.timestampAsLong();
          if (timestamp == 0 || timestamp < startTs || timestamp > endTs) {
            return Collections.emptyList();
          }
        }
        sameTraceId.add(span);
      }
    } catch (RuntimeException e) {
      log.warn(
          String.format(
              "Unable to decode span from traces where trace_id=%s and ts=%s and span_name='%s'",
              row.getLong("trace_id"), row.getDate("ts").getTime(), row.getString("span_name")),
          e);
    }
  }
  return new DependencyLinker().putTrace(sameTraceId).link();
}
 
Example #9
Source File: CassandraRowsToDependencyLinks.java    From zipkin-dependencies with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<DependencyLink> call(Iterable<CassandraRow> rows) {
  if (logInitializer != null) logInitializer.run();
  // use a hash set to dedupe any redundantly accepted spans
  Set<Span> sameTraceId = new LinkedHashSet<>();
  for (CassandraRow row : rows) {
    Span span = cassandraRowToSpan.call(row);
    sameTraceId.add(span);
  }

  return spansToDependencyLinks.call(sameTraceId);
}
 
Example #10
Source File: CassandraDependenciesJob.java    From zipkin-dependencies with Apache License 2.0 5 votes vote down vote up
JavaPairRDD<String, DependencyLink> flatMapToLinksByTraceId(
    CassandraTableScanJavaRDD<CassandraRow> spans,
    long microsUpper, long microsLower, boolean inTest
) {
  if (strictTraceId) {
    return spans.spanBy(r -> r.getString("trace_id"), String.class)
        .flatMapValues(
            new CassandraRowsToDependencyLinks(logInitializer, microsLower, microsUpper, inTest));
  }
  return spans.map(new CassandraRowToSpan(inTest))
      .groupBy(Span::traceId) // groupBy instead of spanBy because trace_id is mixed length
      .flatMapValues(new SpansToDependencyLinks(logInitializer, microsLower, microsUpper));
}
 
Example #11
Source File: CassandraRowToSpan.java    From zipkin-dependencies with Apache License 2.0 5 votes vote down vote up
@Override public Span call(CassandraRow row) {
  String traceId = CassandraDependenciesJob.traceId(row), spanId = row.getString("id");
  Span.Builder builder = Span.newBuilder()
      .traceId(traceId)
      .parentId(row.getString("parent_id"))
      .id(spanId)
      .timestamp(row.getLong("ts"))
      .shared(row.getBoolean("shared"));

  Map<String, String> tags = row.getMap(
      "tags", TypeConverter.StringConverter$.MODULE$, TypeConverter.StringConverter$.MODULE$);
  String error = tags.get("error");
  if (error != null) builder.putTag("error", error);
  String kind = row.getString("kind");
  if (kind != null) {
    try {
      builder.kind(Span.Kind.valueOf(kind));
    } catch (IllegalArgumentException ignored) {
      log.debug("couldn't parse kind {} in span {}/{}", kind, traceId, spanId);
    }
  }
  Endpoint localEndpoint = readEndpoint(row, "l_ep");
  if (localEndpoint != null) {
    builder.localEndpoint(localEndpoint);
  }
  Endpoint remoteEndpoint = readEndpoint(row, "r_ep");
  if (remoteEndpoint != null) {
    builder.remoteEndpoint(remoteEndpoint);
  }
  return builder.build();
}
 
Example #12
Source File: CollabFilterCassandra8.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 4 votes vote down vote up
public MatrixFactorizationModel train(JavaSparkContext sparkCtx, CassandraConnector cassandraConnector) {
	CassandraJavaRDD<CassandraRow> trainingRdd = javaFunctions(sparkCtx).cassandraTable(RatingDO.EMPLOYERRATINGS_KEYSPACE, RatingDO.RATINGS_TABLE);
	JavaRDD<Rating> trainingJavaRdd = trainingRdd.map(trainingRow -> new Rating(trainingRow.getInt(RatingDO.USER_COL), trainingRow.getInt(RatingDO.PRODUCT_COL), trainingRow.getDouble(RatingDO.RATING_COL)));
	MatrixFactorizationModel model = ALS.train(JavaRDD.toRDD(trainingJavaRdd), RANK, ITER, LAMBDA);
	return model;
}
 
Example #13
Source File: CollabFilterCassandra8.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 4 votes vote down vote up
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) {
	RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(validationRow -> new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL))));
	JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD();
	return predictionJavaRdd;
}
 
Example #14
Source File: CollabFilterCassandra8.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 4 votes vote down vote up
public String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse) {
	return "User\tProduct\tPredicted\tActual\tError?\n" + predictionString(predJavaRdd, validationsCassRdd) + "\n" + "RMSE = " + Util.round(rmse, 2);
}
 
Example #15
Source File: CollabFilterCassandra7.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 4 votes vote down vote up
@Override
public String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse) {
	return "User\tProduct\tPredicted\tActual\tError?\n" + predictionString(predJavaRdd, validationsCassRdd) + "\n" + "RMSE = " + Util.round(rmse, 2);
	
}
 
Example #16
Source File: ICollabFilterCassandra.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 votes vote down vote up
public abstract String resultsReport(JavaRDD<Rating> predJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd, double rmse); 
Example #17
Source File: ICollabFilterCassandra.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 votes vote down vote up
double validate(JavaRDD<Rating> predictionJavaRdd, CassandraJavaRDD<CassandraRow> validationsCassRdd); 
Example #18
Source File: ICollabFilterCassandra.java    From Spark-Cassandra-Collabfiltering with Apache License 2.0 votes vote down vote up
JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd);