Java Code Examples for weka.core.Instances#add()

The following examples show how to use weka.core.Instances#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataSetUtilsTest.java    From AILibs with GNU Affero General Public License v3.0 6 votes vote down vote up
public void cifar10InstancesAttributesTest() {
    ArrayList<Attribute> atts = new ArrayList<>();
    for (int i = 0; i < 32 * 32 * 3 + 1; i++) {
        atts.add(new Attribute("blub" + i));
    }
    Instances instances = new Instances("test", atts, 1);
    DenseInstance inst = new DenseInstance(atts.size());
    for (int i = 0; i < inst.numAttributes(); i++) {
        inst.setValue(i, 1d);
    }
    inst.setDataset(instances);
    instances.add(inst);

    INDArray result = DataSetUtils.cifar10InstanceToMatrix(inst);
    Assert.assertArrayEquals(new long[]{32, 32, 3}, result.shape());
}
 
Example 2
Source File: AbstractEnsemble.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * @return the distributions of each individual module, i.e [0] = first module's dist, [1] = second...
 */
public double[][] distributionForInstanceByConstituents(Instance instance) throws Exception{
    Instance ins = instance;
    if(this.transform!=null){
        Instances rawContainer = new Instances(instance.dataset(),0);
        rawContainer.add(instance);
        Instances converted = transform.process(rawContainer);
        ins = converted.instance(0);
    }

    double[][] distsByClassifier = new double[this.modules.length][];

    for(int i=0;i<modules.length;i++){
        distsByClassifier[i] = modules[i].getClassifier().distributionForInstance(ins);
    }

    return distsByClassifier;
}
 
Example 3
Source File: BestConf.java    From bestconf with Apache License 2.0 5 votes vote down vote up
public static void testCOMT2() throws Exception{
	BestConf bestconf = new BestConf();
	Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
	trainingSet.setClassIndex(trainingSet.numAttributes()-1);
	
	Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false);
	samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
	samplePoints.setClassIndex(samplePoints.numAttributes()-1);
	
	COMT2 comt = new COMT2(samplePoints, COMT2Iteration);
	
	comt.buildClassifier(trainingSet);
	
	Evaluation eval = new Evaluation(trainingSet);
	eval.evaluateModel(comt, trainingSet);
	System.err.println(eval.toSummaryString());
	
	Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
	Instances bestInstances = new Instances(trainingSet,2);
	bestInstances.add(best);
	DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);
	
	//now we output the training set with the class value updated as the predicted value
	Instances output = new Instances(trainingSet, trainingSet.numInstances());
	Enumeration<Instance> enu = trainingSet.enumerateInstances();
	while(enu.hasMoreElements()){
		Instance ins = enu.nextElement();
		double[] values = ins.toDoubleArray();
		values[values.length-1] = comt.classifyInstance(ins);
		output.add(ins.copy(values));
	}
	DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}
 
Example 4
Source File: KDTree.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns the k nearest neighbours of the supplied instance.
 * &gt;k neighbours are returned if there are more than one 
 * neighbours at the kth boundary. 
 * 
 * @param target	The instance to find the nearest neighbours for.
 * @param k 		The number of neighbours to find.
 * @return The k nearest neighbours (or &gt;k if more there are than
 * one neighbours at the kth boundary). 
 * @throws Exception 	if the nearest neighbour could not be found.
 */
public Instances kNearestNeighbours(Instance target, int k) throws Exception {
  checkMissing(target);

  if (m_Stats != null)
    m_Stats.searchStart();

  MyHeap heap = new MyHeap(k);
  findNearestNeighbours(target, m_Root, k, heap, 0.0);

  if (m_Stats != null)
    m_Stats.searchFinish();

  Instances neighbours = new Instances(m_Instances, (heap.size() + heap
      .noOfKthNearest()));
  m_DistanceList = new double[heap.size() + heap.noOfKthNearest()];
  int[] indices = new int[heap.size() + heap.noOfKthNearest()];
  int i = indices.length - 1;
  MyHeapElement h;
  while (heap.noOfKthNearest() > 0) {
    h = heap.getKthNearest();
    indices[i] = h.index;
    m_DistanceList[i] = h.distance;
    i--;
  }
  while (heap.size() > 0) {
    h = heap.get();
    indices[i] = h.index;
    m_DistanceList[i] = h.distance;
    i--;
  }
  m_DistanceFunction.postProcessDistances(m_DistanceList);

  for (int idx = 0; idx < indices.length; idx++) {
    neighbours.add(m_Instances.instance(indices[idx]));
  }

  return neighbours;
}
 
Example 5
Source File: ZooModelTest.java    From wekaDeeplearning4j with GNU General Public License v3.0 5 votes vote down vote up
private Instances shrinkInstances(Instances data) {
    ArrayList<Attribute> atts = new ArrayList<>();
    for (int i = 0; i < data.numAttributes(); i++) {
        atts.add(data.attribute(i));
    }
    Instances shrunkenData = new Instances("shrinked", atts, 10);
    shrunkenData.setClassIndex(1);
    for (int i = 0; i < 10; i++) {
        Instance inst = data.get(i);
        inst.setClassValue(i % 10);
        inst.setDataset(shrunkenData);
        shrunkenData.add(inst);
    }
    return shrunkenData;
}
 
Example 6
Source File: FeatureGeneratorTree.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Instance apply(final Instance data) throws PreprocessingException {
	try {
		Instances instances = new Instances(data.dataset());
		instances.clear();
		instances.add(data);
		return this.apply(instances).firstInstance();
	} catch (Exception e) {
		throw new PreprocessingException(e);
	}
}
 
Example 7
Source File: Standardization.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Instances apply(final Instances data) throws PreprocessingException {
	Instances newInstances = new Instances(data);
	newInstances.clear();
	for (Instance i : data) {
		newInstances.add(this.apply(i));
	}
	return newInstances;
}
 
Example 8
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static Instances mergeClassesOfInstances(final Instances data, final Collection<String> cluster1, final Collection<String> cluster2) {
	Instances newData = WekaUtil.getEmptySetOfInstancesWithRefactoredClass(data);
	for (Instance i : data) {
		Instance iNew = (Instance) i.copy();
		String className = i.classAttribute().value((int) Math.round(i.classValue()));
		if (cluster1.contains(className)) {
			iNew.setClassValue(0.0);
			newData.add(iNew);
		} else if (cluster2.contains(className)) {
			iNew.setClassValue(1.0);
			newData.add(iNew);
		}
	}
	return newData;
}
 
Example 9
Source File: BinaryTransform.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
@Override
    public Instances process(Instances data) throws Exception{
         Instances output = determineOutputFormat(data);
         if(findNewSplits){
            splits=new  double[data.numAttributes()];
            double[] classes=new  double[data.numInstances()];
            for(int i=0;i<classes.length;i++)
                classes[i]=data.instance(i).classValue();
            for (int j=0; j< data.numAttributes(); j++) { // for each data
                if(j!=data.classIndex()){

    //Get values of attribute j
                    double[] vals=new double[data.numInstances()];
                    for(int i=0;i<data.numInstances();i++)
                        vals[i]=data.instance(i).value(j);
    //find the IG split point                
                    splits[j] =findSplitValue(data,vals,classes);
                }
            }
            findNewSplits=false;
         }
//Extract out the terms and set the attributes
        for(int i=0;i<data.numInstances();i++){
            Instance newInst=new DenseInstance(data.numAttributes());
            for(int j=0;j<data.numAttributes();j++){
                if(j!=data.classIndex()){
                    if(data.instance(i).value(j)<splits[j])
                        newInst.setValue(j,0);
                    else
                        newInst.setValue(j,1);
                }
                else
                    newInst.setValue(j,data.instance(i).classValue());
            }
            output.add(newInst);
        }
        return output;
    }
 
Example 10
Source File: MLSophisticatedPipeline.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Instances apply(final Instances data) throws PreprocessingException{
	Instances probingResults = new Instances(this.getEmptyProbingResultDataset());
	for (Instance inst : data) {
		Instance probedInst = this.apply(inst);
		probedInst.setDataset(probingResults);
		probingResults.add(probedInst);
	}
	return probingResults;
}
 
Example 11
Source File: C45Loader.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Return the full data set. If the structure hasn't yet been determined by a
 * call to getStructure then method should do so before processing the rest of
 * the data set.
 * 
 * @return the structure of the data set as an empty set of Instances
 * @exception IOException if there is no source or parsing fails
 */
@Override
public Instances getDataSet() throws IOException {
  if (m_sourceFile == null) {
    throw new IOException("No source has been specified");
  }
  if (getRetrieval() == INCREMENTAL) {
    throw new IOException(
        "Cannot mix getting Instances in both incremental and batch modes");
  }
  setRetrieval(BATCH);
  if (m_structure == null) {
    getStructure();
  }
  StreamTokenizer st = new StreamTokenizer(m_dataReader);
  initTokenizer(st);
  // st.ordinaryChar('.');
  Instances result = new Instances(m_structure);
  Instance current = getInstance(st);

  while (current != null) {
    result.add(current);
    current = getInstance(st);
  }
  try {
    // close the stream
    m_dataReader.close();
    // reset();
  } catch (Exception ex) {
    ex.printStackTrace();
  }
  return result;
}
 
Example 12
Source File: BoTSWEnsemble.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
public double[] distributionForInstanceSVM(Instance instnc) throws Exception {
    BoTSW_Bag testBag = buildTestBag(instnc);

    //classify
    Instances testBagData = new Instances(bagData, 1);
    double[] inst = new double[params.k+1];
    for (int j = 0; j < params.k; ++j)
        inst[j] = testBag.hist[j];
    inst[inst.length-1] = testBag.classValue;
    testBagData.add(new DenseInstance(1, inst));

    return svm.distributionForInstance(testBagData.get(0));
}
 
Example 13
Source File: LHSInitializer.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
		for(int j=1;j<sampleSetSize;j++){
			bounds[i][j] = bounds[i][j-1] + pace;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 14
Source File: UnionFilter.java    From AILibs with GNU Affero General Public License v3.0 4 votes vote down vote up
static DataSet union(final DataSet coll1, final DataSet coll2) {
	if (coll1 == null || coll2 == null) {
		throw new IllegalArgumentException("Parameters 'coll1' and 'coll2' must not be null!");
	}

	if (coll1.getIntermediateInstances() == null || coll2.getIntermediateInstances() == null) {
		// Merge Weka instances
		Instances instances1 = coll1.getInstances();
		Instances instances2 = coll2.getInstances();

		if (instances1.numInstances() != instances2.numInstances()) {
			throw new IllegalArgumentException("Data sets to be united must have the same amount of instances!");
		}

		ArrayList<Attribute> attributes = new ArrayList<>(
				coll1.getInstances().numAttributes() + coll2.getInstances().numAttributes() - 1);
		for (int i = 0; i < instances1.numAttributes() - 1; i++) {
			attributes.add(instances1.attribute(i).copy(instances1.attribute(i).name() + "u1"));
		}
		for (int i = 0; i < instances2.numAttributes() - 1; i++) {
			attributes.add(instances2.attribute(i).copy(instances2.attribute(i).name() + "u2"));
		}

		// Add class attribute
		List<String> classValues = IntStream.range(0, instances1.classAttribute().numValues()).asDoubleStream()
				.mapToObj(String::valueOf).collect(Collectors.toList());
		Attribute classAtt = new Attribute("classAtt", classValues);
		attributes.add(classAtt);

		Instances unitedInstances = new Instances("UnitedInstances", attributes, instances1.numInstances());
		unitedInstances.setClassIndex(unitedInstances.numAttributes() - 1);

		for (int i = 0; i < instances1.numInstances(); i++) {
			Instance instance = new DenseInstance(attributes.size());
			instance.setDataset(unitedInstances);

			// Copy values
			int runningIndex = 0;
			for (int j = 0; j < instances1.numAttributes() - 1; j++) {
				instance.setValue(runningIndex++, instances1.get(i).value(j));
			}
			for (int j = 0; j < instances2.numAttributes() - 1; j++) {
				instance.setValue(runningIndex++, instances2.get(i).value(j));
			}
			instance.setClassValue(instances1.get(i).classValue());

			unitedInstances.add(instance);
		}

		return new DataSet(unitedInstances, null);
	} else {
		if (coll1.getIntermediateInstances().isEmpty() || coll2.getIntermediateInstances().isEmpty()) {
			throw new IllegalArgumentException("There must be intermediate instances if the collection is set.");
		}

		// Merge intermediate instances
		List<INDArray> intermediateInsts1 = coll1.getIntermediateInstances();
		List<INDArray> intermediateInsts2 = coll2.getIntermediateInstances();

		List<INDArray> unitedIntermediateInsts = new ArrayList<>(
				(int) (intermediateInsts1.get(0).length() + intermediateInsts2.get(0).length()));
		for (int i = 0; i < intermediateInsts1.size(); i++) {
			INDArray intermediateInst = Nd4j.hstack(intermediateInsts1.get(i).ravel(),
					intermediateInsts2.get(i).ravel());
			unitedIntermediateInsts.add(intermediateInst);
		}

		return new DataSet(coll1.getInstances(), unitedIntermediateInsts);
	}
}
 
Example 15
Source File: DecisionTreeEstimator.java    From jMetal with MIT License 4 votes vote down vote up
public double doPredictionVariable(int index,S testSolution) {
  double result = 0.0d;

  try {
    int numberOfVariables = solutionList.get(0).getNumberOfVariables();
    //Attributes
    //numeric
    Attribute attr = new Attribute("my-numeric");

    //nominal
    ArrayList<String> myNomVals = new ArrayList<>();

    for (int i=0; i<numberOfVariables; i++)
      myNomVals.add(VALUE_STRING+i);
    Attribute attr1 = new Attribute(NOMINAL_STRING, myNomVals);

    //string
    Attribute attr2 = new Attribute(MY_STRING, (List<String>)null);

    //2.create dataset
    ArrayList<Attribute> attrs = new ArrayList<>();
    attrs.add(attr);
    attrs.add(attr1);
    attrs.add(attr2);
    Instances dataset = new Instances("my_dataset", attrs, 0);

    //Add instances
    for (S solution : solutionList) {
      //instaces
      for (int i = 0; i <numberOfVariables ; i++) {
        double[] attValues = new double[dataset.numAttributes()];
        attValues[0] = ((DoubleSolution)solution).getVariable(i);
        attValues[1] = dataset.attribute(NOMINAL_STRING).indexOfValue(VALUE_STRING+i);
        attValues[2] = dataset.attribute(MY_STRING).addStringValue(solution.toString()+i);
        dataset.add(new DenseInstance(1.0, attValues));
      }
    }


    //DataSet test
    Instances datasetTest = new Instances("my_dataset_test", attrs, 0);

    //Add instances
    for (int i = 0; i < numberOfVariables; i++) {
      Instance test = new DenseInstance(3);
      test.setValue(attr, ((DoubleSolution)testSolution).getVariable(i));
      test.setValue(attr1, VALUE_STRING+i);
      test.setValue(attr2, testSolution.toString()+i);
      datasetTest.add(test);
      //  dataset.add(test);
    }


    //split to 70:30 learn and test set

    //Preprocess strings (almost no classifier supports them)
    StringToWordVector filter = new StringToWordVector();

    filter.setInputFormat(dataset);
    dataset = Filter.useFilter(dataset, filter);

    //Buid classifier
    dataset.setClassIndex(1);
    Classifier classifier = new J48();
    classifier.buildClassifier(dataset);
    //resample if needed
    //dataset = dataset.resample(new Random(42));
    dataset.setClassIndex(1);
    datasetTest.setClassIndex(1);
    //do eval
    Evaluation eval = new Evaluation(datasetTest); //trainset
    eval.evaluateModel(classifier, datasetTest); //testset
    result = classifier.classifyInstance(datasetTest.get(index));
  } catch (Exception e) {
    result = ((DoubleSolution)testSolution).getVariable(index);
  }
  return result;
}
 
Example 16
Source File: ADTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Recursive function that carries out search for the best test (splitter) to add to
  * this part of the tree, by aiming to minimize the Z value. Performs Z-pure cutoff to
  * reduce search space.
  *
  * @param currentNode the root of the subtree to be searched, and the current node 
  * being considered as parent of a new split
  * @param posInstances the positive-class instances that apply at this node
  * @param negInstances the negative-class instances that apply at this node
  * @exception Exception if search fails
  */
 private void searchForBestTestSingle(PredictionNode currentNode,
			       Instances posInstances, Instances negInstances)
   throws Exception {

   // don't investigate pure or empty nodes any further
   if (posInstances.numInstances() == 0 || negInstances.numInstances() == 0) return;

   // do z-pure cutoff
   if (calcZpure(posInstances, negInstances) >= m_search_smallestZ) return;

   // keep stats
   m_nodesExpanded++;
   m_examplesCounted += posInstances.numInstances() + negInstances.numInstances();

   // evaluate static splitters (nominal)
   for (int i=0; i<m_nominalAttIndices.length; i++)
     evaluateNominalSplitSingle(m_nominalAttIndices[i], currentNode,
			 posInstances, negInstances);

   // evaluate dynamic splitters (numeric)
   if (m_numericAttIndices.length > 0) {

     // merge the two sets of instances into one
     Instances allInstances = new Instances(posInstances);
     for (Enumeration e = negInstances.enumerateInstances(); e.hasMoreElements(); )
allInstances.add((Instance) e.nextElement());
   
     // use method of finding the optimal Z split-point
     for (int i=0; i<m_numericAttIndices.length; i++)
evaluateNumericSplitSingle(m_numericAttIndices[i], currentNode,
			   posInstances, negInstances, allInstances);
   }

   if (currentNode.getChildren().size() == 0) return;

   // keep searching
   switch (m_searchPath) {
   case SEARCHPATH_ALL:
     goDownAllPathsSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_HEAVIEST: 
     goDownHeaviestPathSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_ZPURE: 
     goDownZpurePathSingle(currentNode, posInstances, negInstances);
     break;
   case SEARCHPATH_RANDOM: 
     goDownRandomPathSingle(currentNode, posInstances, negInstances);
     break;
   }
 }
 
Example 17
Source File: LHSSampler.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 18
Source File: ArffFileFromRun.java    From NLIWOD with GNU Affero General Public License v3.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
HAWK hawk = new HAWK();
SINA sina = new SINA();
QAKIS qakis = new QAKIS();
YODA yoda = new YODA();

/*
 * For multilable classification:
 */

ArrayList<String> fvhawk = new ArrayList<String>();
fvhawk.add("1");
fvhawk.add("0");
Attribute hawkatt = new Attribute("hawk", fvhawk);

ArrayList<String> fvqakis = new ArrayList<String>();
fvqakis.add("1");
fvqakis.add("0");
Attribute qakisatt = new Attribute("qakis", fvqakis);

ArrayList<String> fvyoda = new ArrayList<String>();
fvyoda.add("1");
fvyoda.add("0");
Attribute yodaatt = new Attribute("yoda", fvyoda);

ArrayList<String> fvsina = new ArrayList<String>();
fvsina.add("1");
fvsina.add("0");
Attribute sinaatt = new Attribute("sina", fvsina);


/*
 * 
 */

// 1. Learn on the training data for each system a classifier to find
// out which system can answer which question

// 1.1 load the questions and how good each system answers
log.debug("Load the questions and how good each system answers");
List<IQuestion> trainQuestions = LoaderController.load(Dataset.QALD6_Train_Multilingual);
List<ASystem> systems = Lists.newArrayList(hawk, sina, qakis, yoda);
JSONArray traindata = RunProducer.loadRunData(Dataset.QALD6_Train_Multilingual);

// 1.2 calculate the features per question and system
log.debug("Calculate the features per question and system");
Analyzer analyzer = new Analyzer();
ArrayList<Attribute> fvfinal = analyzer.fvWekaAttributes;

fvfinal.add(0, hawkatt);
fvfinal.add(0, yodaatt);
fvfinal.add(0, sinaatt);
fvfinal.add(0,qakisatt);


Instances trainingSet = new Instances("training_classifier: -C 4" , fvfinal, trainQuestions.size());
log.debug("Start collection of training data for each system");

	
for (int i = 0; i < traindata.size(); i++) {
	JSONObject questiondata = (JSONObject) traindata.get(i);
	JSONObject allsystemsdata = (JSONObject) questiondata.get("answers");
	String question = (String) questiondata.get("question");
	Instance tmp = analyzer.analyze(question);

	tmp.setValue(hawkatt, 0);
	tmp.setValue(yodaatt, 0);
	tmp.setValue(sinaatt, 0);
	tmp.setValue(qakisatt, 0);

	for(ASystem system: systems){
		JSONObject systemdata = (JSONObject) allsystemsdata.get(system.name());
		if(new Double(systemdata.get("fmeasure").toString()) > 0)
			switch (system.name()){
			case "hawk": tmp.setValue(hawkatt, 1); break;
			case "yoda": tmp.setValue(yodaatt, 1); break;
			case "sina": tmp.setValue(sinaatt, 1); break;
			case "qakis": tmp.setValue(qakisatt, 1); break;
			}
		}

	trainingSet.add(tmp);
	}
log.debug(trainingSet.toString());

try (FileWriter file = new FileWriter("./src/main/resources/old/Train.arff")) {
	file.write(trainingSet.toString());
} catch (IOException e) {
	e.printStackTrace();
}				
}
 
Example 19
Source File: PropositionalToMultiInstance.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Signify that this batch of input to the filter is finished. 
 * If the filter requires all instances prior to filtering,
 * output() may now be called to retrieve the filtered instances.
 *
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input structure has been defined
 */
public boolean batchFinished() {

  if (getInputFormat() == null) {
    throw new IllegalStateException("No input instance format defined");
  }

  Instances input = getInputFormat();
  input.sort(0);   // make sure that bagID is sorted
  Instances output = getOutputFormat();
  Instances bagInsts = output.attribute(1).relation();
  Instance inst = new DenseInstance(bagInsts.numAttributes());
  inst.setDataset(bagInsts);

  double bagIndex   = input.instance(0).value(0);
  double classValue = input.instance(0).classValue(); 
  double bagWeight  = 0.0;

  // Convert pending input instances
  for(int i = 0; i < input.numInstances(); i++) {
    double currentBagIndex = input.instance(i).value(0);

    // copy the propositional instance value, except the bagIndex and the class value
    for (int j = 0; j < input.numAttributes() - 2; j++) 
      inst.setValue(j, input.instance(i).value(j + 1));
    inst.setWeight(input.instance(i).weight());

    if (currentBagIndex == bagIndex){
      bagInsts.add(inst);
      bagWeight += inst.weight();
    }
    else{
      addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

      bagInsts   = bagInsts.stringFreeStructure();  
      bagInsts.add(inst);
      bagIndex   = currentBagIndex;
      classValue = input.instance(i).classValue();
      bagWeight  = inst.weight();
    }
  }

  // reach the last instance, create and add the last bag
  addBag(input, output, bagInsts, (int) bagIndex, classValue, bagWeight);

  if (getRandomize())
    output.randomize(new Random(getSeed()));
  
  for (int i = 0; i < output.numInstances(); i++)
    push(output.instance(i));
  
  // Free memory
  flushInput();

  m_NewBatch = true;
  m_FirstBatchDone = true;
  
  return (numPendingOutput() != 0);
}
 
Example 20
Source File: SAX.java    From tsml with GNU General Public License v3.0 1 votes vote down vote up
/**
 * Will perform a SAX transformation on a single data series passed as a double[], input format
 * must already be known. 
 * 
 * Generally to be used 
 * in the SAX_1NN classifier (essentially a wrapper classifier that just feeds SAX-filtered
 * data to a 1NN classifier) to filter individual instances during testing
 * 
 * Instance objects need the header info as well as the basic data
 * 
 * @param alphabetSize size of SAX alphabet
 * @param numIntervals size of resulting word
 * @throws Exception 
 */
public Instance convertInstance(Instance inst, int alphabetSize, int numIntervals) throws Exception {

    Instances newInsts = new Instances(inputFormat, 1);
    newInsts.add(inst);
    
    newInsts = process(newInsts);
    
    return newInsts.firstInstance();
}