org.apache.spark.ml.classification.MultilayerPerceptronClassifier Java Examples

The following examples show how to use org.apache.spark.ml.classification.MultilayerPerceptronClassifier. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DatasetClassifier.java    From mmtf-spark with Apache License 2.0 4 votes vote down vote up
/**
 * @param args args[0] path to parquet file, args[1] name of classification column
 * @throws IOException 
 * @throws StructureException 
 */
public static void main(String[] args) throws IOException {

	if (args.length != 2) {
		System.err.println("Usage: " + DatasetClassifier.class.getSimpleName() + " <parquet file> <classification column name>");
		System.exit(1);
	}

	// name of the class label
	String label = args[1];
	
	long start = System.nanoTime();

	SparkSession spark = SparkSession
			.builder()
			.master("local[*]")
			.appName(DatasetClassifier.class.getSimpleName())
			.getOrCreate();

	Dataset<Row> data = spark.read().parquet(args[0]).cache();
	
	int featureCount = 0;
	Object vector = data.first().getAs("features");
	if (vector instanceof DenseVector) {
	   featureCount = ((DenseVector)vector).numActives();
	} else if (vector instanceof SparseVector) {
	   featureCount = ((SparseVector)vector).numActives();
	}
	
	System.out.println("Feature count            : "  + featureCount);
	
	int classCount = (int)data.select(label).distinct().count();
	System.out.println("Class count              : " + classCount);

	System.out.println("Dataset size (unbalanced): " + data.count());
	data.groupBy(label).count().show(classCount);

	data = DatasetBalancer.downsample(data, label, 1);
	
	System.out.println("Dataset size (balanced)  : " + data.count());
	data.groupBy(label).count().show(classCount);

	double testFraction = 0.3;
	long seed = 123;

	SparkMultiClassClassifier mcc;
	Map<String, String> metrics;

	DecisionTreeClassifier dtc = new DecisionTreeClassifier();
	mcc = new SparkMultiClassClassifier(dtc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	RandomForestClassifier rfc = new RandomForestClassifier();
	mcc = new SparkMultiClassClassifier(rfc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	LogisticRegression lr = new LogisticRegression();
	mcc = new SparkMultiClassClassifier(lr, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	// specify layers for the neural network
	//    input layer: dimension of feature vector
	//    output layer: number of classes
	int[] layers = new int[] {featureCount, 10, classCount};
	MultilayerPerceptronClassifier mpc = new MultilayerPerceptronClassifier()
			.setLayers(layers)
			.setBlockSize(128)
			.setSeed(1234L)
			.setMaxIter(200);

	mcc = new SparkMultiClassClassifier(mpc, label, testFraction, seed);
	metrics = mcc.fit(data);
	System.out.println(metrics);

	long end = System.nanoTime();

	System.out.println((end-start)/1E9 + " sec");
}
 
Example #2
Source File: JavaMultilayerPerceptronClassifierExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaMultilayerPerceptronClassifierExample")
    .getOrCreate();

  // $example on$
  // Load training data
  String path = "data/mllib/sample_multiclass_classification_data.txt";
  Dataset<Row> dataFrame = spark.read().format("libsvm").load(path);

  // Split the data into train and test
  Dataset<Row>[] splits = dataFrame.randomSplit(new double[]{0.6, 0.4}, 1234L);
  Dataset<Row> train = splits[0];
  Dataset<Row> test = splits[1];

  // specify layers for the neural network:
  // input layer of size 4 (features), two intermediate of size 5 and 4
  // and output of size 3 (classes)
  int[] layers = new int[] {4, 5, 4, 3};

  // create the trainer and set its parameters
  MultilayerPerceptronClassifier trainer = new MultilayerPerceptronClassifier()
    .setLayers(layers)
    .setBlockSize(128)
    .setSeed(1234L)
    .setMaxIter(100);

  // train the model
  MultilayerPerceptronClassificationModel model = trainer.fit(train);

  // compute accuracy on the test set
  Dataset<Row> result = model.transform(test);
  Dataset<Row> predictionAndLabels = result.select("prediction", "label");
  MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
    .setMetricName("accuracy");

  System.out.println("Test set accuracy = " + evaluator.evaluate(predictionAndLabels));
  // $example off$

  spark.stop();
}
 
Example #3
Source File: TransitionClassifier.java    From vn.vitk with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Trains a transition classifier on the data frame.
 * @param jsc
 * @param graphs
 * @param featureFrame
 * @param classifierFileName
 * @param numHiddenUnits
 * @return a transition classifier.
 */
public Transformer trainMLP(JavaSparkContext jsc,
		List<DependencyGraph> graphs, FeatureFrame featureFrame,
		String classifierFileName, int numHiddenUnits) {
	// create a SQLContext
	this.sqlContext = new SQLContext(jsc);
	// extract a data frame from these graphs
	DataFrame dataset = toDataFrame(jsc, graphs, featureFrame);
	
	// create a processing pipeline and fit it to the data frame
	Pipeline pipeline = createPipeline();
	PipelineModel pipelineModel = pipeline.fit(dataset);
	DataFrame trainingData = pipelineModel.transform(dataset);
	
	// cache the training data for better performance
	trainingData.cache();
	
	if (verbose) {
		trainingData.show(false);
	}
	
	// compute the number of different labels, which is the maximum element 
	// in the 'label' column.
	trainingData.registerTempTable("dfTable");
	Row row = sqlContext.sql("SELECT MAX(label) as maxValue from dfTable").first();
	int numLabels = (int)row.getDouble(0);
	numLabels++;
	
	int vocabSize = ((CountVectorizerModel)(pipelineModel.stages()[1])).getVocabSize();
	
	// default is a two-layer MLP
	int[] layers = {vocabSize, numLabels};
	// if user specify a hidden layer, use a 3-layer MLP:
	if (numHiddenUnits > 0) {
		layers = new int[3];
		layers[0] = vocabSize;
		layers[1] = numHiddenUnits;
		layers[2] = numLabels;
	}
	MultilayerPerceptronClassifier classifier = new MultilayerPerceptronClassifier()
		.setLayers(layers)
		.setBlockSize(128)
		.setSeed(1234L)
		.setTol((Double)params.getOrDefault(params.getTolerance()))
		.setMaxIter((Integer)params.getOrDefault(params.getMaxIter()));
	MultilayerPerceptronClassificationModel model = classifier.fit(trainingData);
	
	// compute precision on the training data
	//
	DataFrame result = model.transform(trainingData);
	DataFrame predictionAndLabel = result.select("prediction", "label");
	MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setMetricName("precision");
	if (verbose) {
		System.out.println("N = " + trainingData.count());
		System.out.println("D = " + vocabSize);
		System.out.println("K = " + numLabels);
		System.out.println("H = " + numHiddenUnits);
		System.out.println("training precision = " + evaluator.evaluate(predictionAndLabel));
	}
	
	// save the trained MLP to a file
	//
	String classifierPath = new Path(classifierFileName, "data").toString();
	jsc.parallelize(Arrays.asList(model), 1).saveAsObjectFile(classifierPath);
	// save the pipeline model to sub-directory "pipelineModel"
	// 
	try {
		String pipelinePath = new Path(classifierFileName, "pipelineModel").toString(); 
		pipelineModel.write().overwrite().save(pipelinePath);
	} catch (IOException e) {
		e.printStackTrace();
	}
	return model;
}