Java Code Examples for weka.core.Attribute#isNumeric()

The following examples show how to use weka.core.Attribute#isNumeric() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContractRotationForest.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/** 
  * Adds random instances to the dataset.
  * 
  * @param dataset the dataset
  * @param numInstances the number of instances
  * @param random a random number generator
  */
 protected void addRandomInstances( Instances dataset, int numInstances, 
                                 Random random ) {
   int n = dataset.numAttributes();				
   double [] v = new double[ n ];
   for( int i = 0; i < numInstances; i++ ) {
     for( int j = 0; j < n; j++ ) {
       Attribute att = dataset.attribute( j );
       if( att.isNumeric() ) {
  v[ j ] = random.nextDouble();
}
else if ( att.isNominal() ) { 
  v[ j ] = random.nextInt( att.numValues() );
}
     }
     dataset.add( new DenseInstance( 1, v ) );
   }
 }
 
Example 2
Source File: ActiveHNode.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void updateNode(Instance inst) throws Exception {
  super.updateDistribution(inst);

  for (int i = 0; i < inst.numAttributes(); i++) {
    Attribute a = inst.attribute(i);
    if (i != inst.classIndex()) {
      ConditionalSufficientStats stats = m_nodeStats.get(a.name());
      if (stats == null) {
        if (a.isNumeric()) {
          stats = new GaussianConditionalSufficientStats();
        } else {
          stats = new NominalConditionalSufficientStats();
        }
        m_nodeStats.put(a.name(), stats);
      }

      stats
          .update(inst.value(a),
              inst.classAttribute().value((int) inst.classValue()),
              inst.weight());
    }
  }
}
 
Example 3
Source File: Test.java    From tsml with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Returns the test represented by a string in Prolog notation.
 *
 * @return a string representing the test in Prolog notation
 */   
public String toPrologString() {
  Attribute att = m_Dataset.attribute(m_AttIndex);
  StringBuffer str = new StringBuffer();
  String attName = m_Dataset.attribute(m_AttIndex).name();
  if (att.isNumeric()) {
    str = str.append(attName + " ");
    if (m_Not) str = str.append(">= " + Utils.doubleToString(m_Split, 3));
    else str = str.append("< " + Utils.doubleToString(m_Split, 3));
  } else {
    String value = att.value((int)m_Split);
  
    if (value == "false") { str = str.append("not(" + attName + ")"); }      
    else { str = str.append(attName); }
  }
return str.toString();
}
 
Example 4
Source File: RDG1.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Generates a new rule for the decision list
 * and classifies the new example.
 *
 * @param random random number generator
 * @param example the instance to classify
 * @return a list of tests
 * @throws Exception if dataset format not defined
 */
private FastVector generateTestList(Random random, Instance example) 
 throws Exception {

  Instances format = getDatasetFormat();
  if (format == null) 
    throw new Exception("Dataset format not defined.");

  int numTests = getNumAttributes() - getNumIrrelevant();
  FastVector TestList = new FastVector(numTests);
  boolean[] irrelevant = getAttList_Irr();

  for (int i = 0; i < getNumAttributes(); i++) {
    if (!irrelevant[i]) {
      Test newTest = null;
      Attribute att = example.attribute(i);
      if (att.isNumeric()) {
        double newSplit = random.nextDouble();
        boolean newNot = newSplit < example.value(i);
        newTest = new Test(i, newSplit, format, newNot);
      } else {
        newTest = new Test(i, example.value(i), format, false);
      }
    TestList.addElement (newTest);     
    }
  }
  
  return TestList;
}
 
Example 5
Source File: MekaInstancesUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) {
	String attributeName = att.name();
	if (att.isNumeric()) {
		return new NumericAttribute(attributeName);
	} else if (att.isNominal()) {
		List<String> domain = new LinkedList<>();
		for (int i = 0; i < att.numValues(); i++) {
			domain.add(att.value(i));
		}
		return new IntBasedCategoricalAttribute(attributeName, domain);
	}
	throw new IllegalArgumentException("Can only transform numeric or categorical attributes");
}
 
Example 6
Source File: WekaUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static boolean hasOnlyNumericAttributes(final Instances instances) {
	for (Attribute a : getAttributes(instances, false)) {
		if (!a.isNumeric()) {
			return false;
		}
	}
	return true;
}
 
Example 7
Source File: WekaInstancesUtil.java    From AILibs with GNU Affero General Public License v3.0 5 votes vote down vote up
public static IAttribute transformWEKAAttributeToAttributeType(final Attribute att) {
	String attributeName = att.name();
	if (att.isNumeric()) {
		return new NumericAttribute(attributeName);
	} else if (att.isNominal()) {
		List<String> domain = new LinkedList<>();
		for (int i = 0; i < att.numValues(); i++) {
			domain.add(att.value(i));
		}
		return new IntBasedCategoricalAttribute(attributeName, domain);
	}
	throw new IllegalArgumentException("Can only transform numeric or categorical attributes");
}
 
Example 8
Source File: Test.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Gives a string representation of the test in Prolog notation, starting
 * from the comparison symbol.
 *
 * @return a string representing the test in Prolog notation
 */   
private String testPrologComparisonString() {
  Attribute att = m_Dataset.attribute(m_AttIndex);
  if (att.isNumeric()) {
    return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split,3));
  }
  else {
    if (att.numValues() != 2) 
      return ((m_Not ? "!= " : "= ") + att.value((int)m_Split));
    else return ("= " 
                 + (m_Not ? att.value((int)m_Split == 0 ? 1 : 0) 
                        : att.value((int)m_Split)));
  }
}
 
Example 9
Source File: Test.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Gives a string representation of the test, starting from the comparison
 * symbol.
 *
 * @return a string representing the test
 */   
private String testComparisonString() {
  Attribute att = m_Dataset.attribute(m_AttIndex);
  if (att.isNumeric()) {
    return ((m_Not ? ">= " : "< ") + Utils.doubleToString(m_Split,3));
  }
  else {
    if (att.numValues() != 2) 
      return ((m_Not ? "!= " : "= ") + att.value((int)m_Split));
    else return ("= " 
                 + (m_Not ?
    att.value((int)m_Split == 0 ? 1 : 0) : att.value((int)m_Split)));
  }
}
 
Example 10
Source File: BinaryItem.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Constructor.
 * 
 * @param att the attribute that backs this item.
 * @param valueIndex the index of the value for this item.
 * @throws Exception if the backing attribute is not binary or unary.
 */
public BinaryItem(Attribute att, int valueIndex) throws Exception {
  super(att, valueIndex);
  
  if (att.isNumeric() || (att.isNominal() && att.numValues() > 2)) {
    throw new Exception("BinaryItem must be constructed using a nominal attribute" +
    		" with at most 2 values!");
  }
}
 
Example 11
Source File: NominalItem.java    From tsml with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Constructs a new NominalItem.
 * 
 * @param att the attribute that backs the item.
 * @param valueIndex the index of the value for this item.
 * @throws Exception if the NominalItem can't be constructed.
 */
public NominalItem(Attribute att, int valueIndex) throws Exception {
  
  super(att);
  
  if (att.isNumeric()) {
    throw new Exception("NominalItem must be constructed using a nominal attribute");
  }
  m_attribute = att;
  if (m_attribute.numValues() == 1) {
    m_valueIndex = 0; // unary attribute (? used to indicate absence from a basket)
  } else {
    m_valueIndex = valueIndex;
  }
}
 
Example 12
Source File: Analyzer.java    From NLIWOD with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Analyzes the question and extracts all features that were set for this Analyzer.
 * @param q question string
 * @return feature vector for the input question
 */
public Instance analyze(String q) {
	Instance tmpInstance = new DenseInstance(fvWekaAttributes.size());
	
	for (IAnalyzer analyzer : analyzers) {
		//special case for PartOfSpeechTags, need to set 36 attributes
		if(analyzer instanceof PartOfSpeechTags) {
			analyzePOS(tmpInstance, (PartOfSpeechTags) analyzer, q);
			continue;
		}		
		
		//special case for Dependencies, need to set 18 attributes
		if(analyzer instanceof Dependencies) {
			analyzeDeps(tmpInstance, (Dependencies) analyzer, q);
			continue;
		}
		
		Attribute attribute = analyzer.getAttribute();
		if (attribute.isNumeric()) {
			tmpInstance.setValue(attribute, (double) analyzer.analyze(q));
		} else if (attribute.isNominal() || attribute.isString()) {
			String value = (String) analyzer.analyze(q);
			tmpInstance.setValue(attribute,value);
			tmpInstance.setDataset(null);
		}
	}
	return tmpInstance;
}
 
Example 13
Source File: Ridor.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
    * Build one rule using the growing data
    *
    * @param data the growing data used to build the rule
    */    
   private void grow(Instances data){
     Instances growData = new Instances(data);
    
     m_AccuG = computeDefAccu(growData);
     m_CoverG = growData.sumOfWeights();
     /* Compute the default accurate rate of the growing data */
     double defAcRt= m_AccuG / m_CoverG; 
    
     /* Keep the record of which attributes have already been used*/    
     boolean[] used=new boolean [growData.numAttributes()];
     for (int k=0; k<used.length; k++)
used[k]=false;
     int numUnused=used.length;
    
     double maxInfoGain;
     boolean isContinue = true; // The stopping criterion of this rule
    
     while (isContinue){   
maxInfoGain = 0;       // We require that infoGain be positive
	
/* Build a list of antecedents */
Antd oneAntd=null;
Instances coverData = null;
Enumeration enumAttr=growData.enumerateAttributes();	    
int index=-1;  
	
/* Build one condition based on all attributes not used yet*/
while (enumAttr.hasMoreElements()){
  Attribute att= (Attribute)(enumAttr.nextElement());
  index++;
	    
  Antd antd =null;	
  if(att.isNumeric())
    antd = new NumericAntd(att);
  else
    antd = new NominalAntd(att);
	    
  if(!used[index]){
    /* Compute the best information gain for each attribute,
       it's stored in the antecedent formed by this attribute.
       This procedure returns the data covered by the antecedent*/
    Instances coveredData = computeInfoGain(growData, defAcRt, antd);
    if(coveredData != null){
      double infoGain = antd.getMaxInfoGain();			
      if(Utils.gr(infoGain, maxInfoGain)){
	oneAntd=antd;
	coverData = coveredData;  
	maxInfoGain = infoGain;
      }		    
    }
  }
}
	
if(oneAntd == null)	 return;
	
//Numeric attributes can be used more than once
if(!oneAntd.getAttr().isNumeric()){ 
  used[oneAntd.getAttr().index()]=true;
  numUnused--;
}
	
m_Antds.addElement((Object)oneAntd);
growData = coverData;// Grow data size is shrinking 
	
defAcRt = oneAntd.getAccuRate();
	
/* Stop if no more data, rule perfect, no more attributes */
if(Utils.eq(growData.sumOfWeights(), 0.0) || Utils.eq(defAcRt, 1.0) || (numUnused == 0))
  isContinue = false;
     }
   }
 
Example 14
Source File: InputMappedClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
public Instance constructMappedInstance(Instance incoming) throws Exception {
  
  boolean regenerateMapping = false;
  
  if (m_inputHeader == null) {
    m_inputHeader = incoming.dataset();
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (!m_inputHeader.equalHeaders(incoming.dataset())) {
    /*System.out.println("[InputMappedClassifier] incoming data does not match " +
              "last known input format - regenerating mapping...");
    System.out.println("Incoming\n" + new Instances(incoming.dataset(), 0));
    System.out.println("Stored input header\n" + new Instances(m_inputHeader, 0));
    System.out.println("Model header\n" + new Instances(m_modelHeader, 0)); */
    m_inputHeader = incoming.dataset();
    
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  } else if (m_attributeMap == null) {
    regenerateMapping = true;
    m_initialTestStructureKnown = false;
  }
  
  if (regenerateMapping) {
    regenerateMapping();
    m_vals = null;
    
    if (!m_suppressMappingReport) {
      StringBuffer result = createMappingReport();
      System.out.println(result.toString());
    }
  }    
  
  m_vals = new double[m_modelHeader.numAttributes()];
  
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    if (m_attributeStatus[i] == OK) {
      Attribute modelAtt = m_modelHeader.attribute(i);
      Attribute incomingAtt = m_inputHeader.attribute(m_attributeMap[i]);
      
      if (Utils.isMissingValue(incoming.value(m_attributeMap[i]))) {
        m_vals[i] = Utils.missingValue();
        continue;
      }
      
      if (modelAtt.isNumeric()) {
        m_vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        int mapVal = m_nominalValueMap[i][(int)incoming.value(m_attributeMap[i])];
        
        if (mapVal == NO_MATCH) {
          m_vals[i] = Utils.missingValue();
        } else {
          m_vals[i] = mapVal;
        }
      }
    } else {
      m_vals[i] = Utils.missingValue();
    }
  }
  
  Instance newInst = new DenseInstance(incoming.weight(), m_vals);
  newInst.setDataset(m_modelHeader);

  return newInst;
}
 
Example 15
Source File: InputMappedClassifier.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
private StringBuffer createMappingReport() {
  StringBuffer result = new StringBuffer();
  result.append("Attribute mappings:\n\n");
  
  int maxLength = 0;
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    if (m_modelHeader.attribute(i).name().length() > maxLength) {
      maxLength = m_modelHeader.attribute(i).name().length();        
    }
  }
  maxLength += 12;
  
  int minLength = 16;
  String headerS = "Model attributes";
  String sep = "----------------";

  if (maxLength < minLength) {
    maxLength = minLength;
  }
  
  headerS = getFixedLengthString(headerS, ' ', maxLength);
  sep = getFixedLengthString(sep, '-', maxLength);
  sep += "\t    ----------------\n";
  headerS += "\t    Incoming attributes\n";
  result.append(headerS);
  result.append(sep);
  
  for (int i = 0; i < m_modelHeader.numAttributes(); i++) {
    Attribute temp = m_modelHeader.attribute(i);
    String attName = "("
      + ((temp.isNumeric())
         ? "numeric)"
         : "nominal)") 
      + " " + temp.name();
    attName = getFixedLengthString(attName, ' ', maxLength);
    attName +=  "\t--> ";
    result.append(attName);
    String inAttNum = "";
    if (m_attributeStatus[i] == NO_MATCH) {
      inAttNum += "- ";
      result.append(inAttNum + "missing (no match)\n");
    } else if (m_attributeStatus[i] == TYPE_MISMATCH) {       
      inAttNum += (m_attributeMap[i] + 1) + " ";
      result.append(inAttNum + "missing (type mis-match)\n");
    } else {
      Attribute inAtt = m_inputHeader.attribute(m_attributeMap[i]);
      String inName = "" + (m_attributeMap[i] + 1) + " (" +
      ((inAtt.isNumeric())
          ? "numeric)"
          : "nominal)")
          + " " + inAtt.name();
      result.append(inName + "\n");
    }
  }
  
  return result;
}
 
Example 16
Source File: BFTree.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Split data into two subsets and store sorted indices and weights for two
  * successor nodes.
  *
  * @param subsetIndices 	sorted indecis of instances for each attribute for two successor node
  * @param subsetWeights 	weights of instances for each attribute for two successor node
  * @param att 		attribute the split based on
  * @param splitPoint 		split point the split based on if att is numeric
  * @param splitStr 		split subset the split based on if att is nominal
  * @param sortedIndices 	sorted indices of the instances to be split
  * @param weights 		weights of the instances to bes split
  * @param data 		training data
  * @throws Exception 		if something goes wrong  
  */
 protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights,
     Attribute att, double splitPoint, String splitStr, int[][] sortedIndices,
     double[][] weights, Instances data) throws Exception {

   int j;
   // For each attribute
   for (int i = 0; i < data.numAttributes(); i++) {
     if (i==data.classIndex()) continue;
     int[] num = new int[2];
     for (int k = 0; k < 2; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[weights[i].length];
     }

     for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
  // Split instance up
  for (int k = 0; k < 2; k++) {
    if (m_Props[k] > 0) {
      subsetIndices[k][i][num[k]] = sortedIndices[i][j];
      subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j];
      num[k]++;
    }
  }
} else {
  int subset;
  if (att.isNumeric())  {
    subset = (inst.value(att) < splitPoint) ? 0 : 1;
  } else { // nominal attribute
    if (splitStr.indexOf
	("(" + att.value((int)inst.value(att.index()))+")")!=-1) {
      subset = 0;
    } else subset = 1;
  }
  subsetIndices[subset][i][num[subset]] = sortedIndices[i][j];
  subsetWeights[subset][i][num[subset]] = weights[i][j];
  num[subset]++;
}
     }

     // Trim arrays
     for (int k = 0; k < 2; k++) {
int[] copy = new int[num[k]];
System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
subsetIndices[k][i] = copy;
double[] copyWeights = new double[num[k]];
System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]);
subsetWeights[k][i] = copyWeights;
     }
   }
 }
 
Example 17
Source File: SimpleCart.java    From tsml with GNU General Public License v3.0 4 votes vote down vote up
/**
  * Split data into two subsets and store sorted indices and weights for two
  * successor nodes.
  * 
  * @param subsetIndices 	sorted indecis of instances for each attribute 
  * 				for two successor node
  * @param subsetWeights 	weights of instances for each attribute for 
  * 				two successor node
  * @param att 		attribute the split based on
  * @param splitPoint 		split point the split based on if att is numeric
  * @param splitStr 		split subset the split based on if att is nominal
  * @param sortedIndices 	sorted indices of the instances to be split
  * @param weights 		weights of the instances to bes split
  * @param data 		training data
  * @throws Exception 		if something goes wrong  
  */
 protected void splitData(int[][][] subsetIndices, double[][][] subsetWeights,
     Attribute att, double splitPoint, String splitStr, int[][] sortedIndices,
     double[][] weights, Instances data) throws Exception {

   int j;
   // For each attribute
   for (int i = 0; i < data.numAttributes(); i++) {
     if (i==data.classIndex()) continue;
     int[] num = new int[2];
     for (int k = 0; k < 2; k++) {
subsetIndices[k][i] = new int[sortedIndices[i].length];
subsetWeights[k][i] = new double[weights[i].length];
     }

     for (j = 0; j < sortedIndices[i].length; j++) {
Instance inst = data.instance(sortedIndices[i][j]);
if (inst.isMissing(att)) {
  // Split instance up
  for (int k = 0; k < 2; k++) {
    if (m_Props[k] > 0) {
      subsetIndices[k][i][num[k]] = sortedIndices[i][j];
      subsetWeights[k][i][num[k]] = m_Props[k] * weights[i][j];
      num[k]++;
    }
  }
} else {
  int subset;
  if (att.isNumeric())  {
    subset = (inst.value(att) < splitPoint) ? 0 : 1;
  } else { // nominal attribute
    if (splitStr.indexOf
	("(" + att.value((int)inst.value(att.index()))+")")!=-1) {
      subset = 0;
    } else subset = 1;
  }
  subsetIndices[subset][i][num[subset]] = sortedIndices[i][j];
  subsetWeights[subset][i][num[subset]] = weights[i][j];
  num[subset]++;
}
     }

     // Trim arrays
     for (int k = 0; k < 2; k++) {
int[] copy = new int[num[k]];
System.arraycopy(subsetIndices[k][i], 0, copy, 0, num[k]);
subsetIndices[k][i] = copy;
double[] copyWeights = new double[num[k]];
System.arraycopy(subsetWeights[k][i], 0 ,copyWeights, 0, num[k]);
subsetWeights[k][i] = copyWeights;
     }
   }
 }
 
Example 18
Source File: LHSSampler.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 19
Source File: LHSInitializer.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 20
Source File: LabelWordVectors.java    From AffectiveTweets with GNU General Public License v3.0 2 votes vote down vote up
@Override
protected Instances determineOutputFormat(Instances inputFormat)
		throws Exception {

	ArrayList<Attribute> atts = new ArrayList<Attribute>();

	// Adds all attributes of the inputformat
	for (int i = 0; i < inputFormat.numAttributes(); i++) {
		atts.add(inputFormat.attribute(i));
	}


	// The dictionaries of the lexicons are intialized only in the first batch
	if(!this.isFirstBatchDone())
		this.initializeDicts();



	for(ArffLexiconWordLabeller lexEval:this.lexiconLabs){
		for(Attribute att:lexEval.getAttributes()){
			if(att.isNumeric())
				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name()));
			else if(att.isNominal()){
				List<String> attValues=new ArrayList<String>();

				for(int i=0;i<att.numValues();i++){
					attValues.add(att.value(i));
				}					

				atts.add(new Attribute(lexEval.getLexiconName()+"-"+att.name(),attValues));

			}


		}

	}




	Instances result = new Instances(inputFormat.relationName(), atts, 0);

	// set the class index
	result.setClassIndex(inputFormat.classIndex());



	return result;
}