Java Code Examples for weka.core.Attribute#getUpperNumericBound()

The following examples show how to use weka.core.Attribute#getUpperNumericBound() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LHSSampler.java    From bestconf with Apache License 2.0 5 votes vote down vote up
private static void uniBoundsGeneration(double[] bounds, Attribute crntAttr, int sampleSetSize){
	bounds[0] = crntAttr.getLowerNumericBound();
	bounds[sampleSetSize] = crntAttr.getUpperNumericBound();
	double pace = (bounds[sampleSetSize] - bounds[0])/sampleSetSize;
	for(int j=1;j<sampleSetSize;j++){
		bounds[j] = bounds[j-1] + pace;
	}
}
 
Example 2
Source File: DDSSampler.java    From bestconf with Apache License 2.0 5 votes vote down vote up
private static void uniBoundsGeneration(double[] bounds, Attribute crntAttr, int sampleSetSize){
	bounds[0] = crntAttr.getLowerNumericBound();
	bounds[sampleSetSize] = crntAttr.getUpperNumericBound();
	double pace = (bounds[sampleSetSize] - bounds[0])/sampleSetSize;
	for(int j=1;j<sampleSetSize;j++){
		bounds[j] = bounds[j-1] + pace;
	}
}
 
Example 3
Source File: LHSInitializer.java    From bestconf with Apache License 2.0 5 votes vote down vote up
private static void uniBoundsGeneration(double[] bounds, Attribute crntAttr, int sampleSetSize){
	bounds[0] = crntAttr.getLowerNumericBound();
	bounds[sampleSetSize] = crntAttr.getUpperNumericBound();
	double pace = (bounds[sampleSetSize] - bounds[0])/sampleSetSize;
	for(int j=1;j<sampleSetSize;j++){
		bounds[j] = bounds[j-1] + pace;
	}
}
 
Example 4
Source File: LHSSampler.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 5
Source File: LHSSampler.java    From bestconf with Apache License 2.0 4 votes vote down vote up
private static void flexibleBoundsGeneration(double[] bounds, Attribute crntAttr, int sampleSetSize){
	int howGen = 0;//div
	int step, crntStep;
	double pace;
	
	bounds[0] = crntAttr.getLowerNumericBound();
	bounds[sampleSetSize] = crntAttr.getUpperNumericBound();

	pace = (bounds[sampleSetSize] - bounds[0])/sampleSetSize;
	crntStep = bounds[0]>1?(int)Math.log10(bounds[sampleSetSize] / bounds[0]):(int)Math.log10(bounds[sampleSetSize]);
	if(crntStep>0)
		step = sampleSetSize/crntStep;//num of points drawn after the multiplication of 10
	else
		step = 11;//anything larger than 10
	
	if(sampleSetSize<crntStep){
		howGen = 3;
	}else if(0<step && step <10)//each hierarchy has fewer than 10 points
		howGen = 1;
	else if((bounds[0]>1 && (int)Math.log10(pace/bounds[0])> BigStepPower) || 
			(bounds[0]<1 && (int)Math.log10(pace)> BigStepPower) )//a big first step
		howGen = 2;
	else
		howGen = 0;
	
	switch (howGen) {
		case 1://use log
			int left = sampleSetSize%crntStep;
			while(bounds[0]==0)
				bounds[0]=uniRand.nextInt(10);
			crntStep = 1;
			double theBound = bounds[sampleSetSize]/10;
			for(int j=1;j<sampleSetSize;j++){
				//step是每轮的个数
				if(crntStep>=step && bounds[j-1]<=theBound)
					crntStep=0;
				
				if(crntStep==0)
					bounds[j] = bounds[j-step] * 10;
				else if(crntStep<step)
					bounds[j] = bounds[j-crntStep] * ((double)crntStep*10./((double)step+1.));
				else//(crntStep>=step)
					bounds[j] = bounds[j-crntStep] * ((double)crntStep*10./(double)(left+step+1));
				
				if(bounds[j]>=bounds[sampleSetSize]){
					bounds[j] = bounds[sampleSetSize]-Math.random()*pace;
					System.err.println("============Be careful!!!!=============");
				}
				crntStep++;
			}
			break;
		case 2://first log, then pace
			//for smaller than pace
			int count = 0;
			while(bounds[count]<pace && count<sampleSetSize-1){
				count++;
				bounds[count] = bounds[count-1]*10;
			}
			//for larger than pace
			pace = (bounds[sampleSetSize] - bounds[count])/(sampleSetSize-count);
			for(int j=count;j<sampleSetSize;j++){
				bounds[j] = bounds[j-1] + pace;
			}
			break;
		case 3://randomly choices
			pace = bounds[sampleSetSize] - bounds[0];
			for(int j=1;j<sampleSetSize;j++){
				bounds[j] = bounds[0] + Math.random() * pace;
			}
			break;
		default:
			for(int j=1;j<sampleSetSize;j++){
				bounds[j] = bounds[j-1] + pace;
			}
			break;
	}
}
 
Example 6
Source File: LHSSampler.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
		for(int j=1;j<sampleSetSize;j++){
			bounds[i][j] = bounds[i][j-1] + pace;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 7
Source File: LHSInitializer.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		if(crntAttr.isNumeric()){
			bounds[i][0] = crntAttr.getLowerNumericBound();
			bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
			pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound())/sampleSetSize;
			for(int j=1;j<sampleSetSize;j++){
				bounds[i][j] = bounds[i][j-1] + pace;
			}
		}else{//crntAttr.isNominal()
			if(crntAttr.numValues()>=sampleSetSize){
				//randomly select among the set
				for(int j=0;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
			}else{
				//first round-robin
				int lastPart = sampleSetSize%crntAttr.numValues();
				for(int j=0;j<sampleSetSize-lastPart;j++)
					bounds[i][j] = j%crntAttr.numValues();
				//then randomly select
				for(int j=sampleSetSize-lastPart;j<=sampleSetSize;j++)
					bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
			}
		}//nominal attribute
	}//get all subdomains
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			if(atts.get(j).isNumeric()){
				vals[j] = useMid?
						(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
							bounds[j][setWithMaxMinDist[j].get(i)]+
							(
								(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
							);
			}else{//isNominal()
				vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
			}
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 8
Source File: LHSInitializer.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be log10(upper/lower)
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousLog(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	int step, crntStep;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		crntStep = (int)Math.log10(bounds[i][sampleSetSize] - bounds[i][0]);
		step = sampleSetSize/crntStep;//num of points drawn after the multiplication of 10
		int left = sampleSetSize%crntStep;
		if(bounds[i][0]==0)
			bounds[i][0]=uniRand.nextInt(10);
		crntStep = 1;
		double theBound = bounds[i][sampleSetSize]/10;
		for(int j=1;j<sampleSetSize;j++){
			if(crntStep>=step && bounds[i][j-1]<=theBound)
				crntStep=0;
			
			if(crntStep==0)
				bounds[i][j] = bounds[i][j-step] * 10;
			else if(crntStep<step)
				bounds[i][j] = bounds[i][j-crntStep] * ((double)crntStep*10./((double)step+1.));
			else if(crntStep>=step)
				bounds[i][j] = bounds[i][j-crntStep] * ((double)crntStep*10./(double)(left+step+1));
			
			if(bounds[i][j]>=bounds[i][sampleSetSize])
				System.err.println("be careful!!!!");
			crntStep++;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}
 
Example 9
Source File: LHSInitializer.java    From bestconf with Apache License 2.0 4 votes vote down vote up
private static void flexibleBoundsGeneration(double[] bounds, Attribute crntAttr, int sampleSetSize){
	int howGen = 0;//div
	int step, crntStep;
	double pace;
	
	bounds[0] = crntAttr.getLowerNumericBound();
	bounds[sampleSetSize] = crntAttr.getUpperNumericBound();

	pace = (bounds[sampleSetSize] - bounds[0])/sampleSetSize;
	crntStep = bounds[0]>1?(int)Math.log10(bounds[sampleSetSize] / bounds[0]):(int)Math.log10(bounds[sampleSetSize]);
	if(crntStep>0)
		step = sampleSetSize/crntStep;//num of points drawn after the multiplication of 10
	else
		step = 11;//anything larger than 10
	
	if(sampleSetSize<crntStep){
		howGen = 3;
	}else if(0<step && step <10)//each hierarchy has fewer than 10 points
		howGen = 1;
	else if((bounds[0]>1 && (int)Math.log10(pace/bounds[0])> BigStepPower) || 
			(bounds[0]<1 && (int)Math.log10(pace)> BigStepPower) )//a big first step
		howGen = 2;
	else
		howGen = 0;
	
	switch (howGen) {
		case 1://use log
			int left = sampleSetSize%crntStep;//æœ?Žä¸?½®çš„个æ•?
			while(bounds[0]==0)
				bounds[0]=uniRand.nextInt(10);
			crntStep = 1;
			double theBound = bounds[sampleSetSize]/10;
			for(int j=1;j<sampleSetSize;j++){
				//step是每轮的个数
				if(crntStep>=step && bounds[j-1]<=theBound)
					crntStep=0;
				
				if(crntStep==0)
					bounds[j] = bounds[j-step] * 10;
				else if(crntStep<step)
					bounds[j] = bounds[j-crntStep] * ((double)crntStep*10./((double)step+1.));
				else//(crntStep>=step)
					bounds[j] = bounds[j-crntStep] * ((double)crntStep*10./(double)(left+step+1));
				
				if(bounds[j]>=bounds[sampleSetSize]){
					bounds[j] = bounds[sampleSetSize]-Math.random()*pace;
					System.err.println("============Be careful!!!!=============");
				}
				crntStep++;
			}
			break;
		case 2://first log, then pace
			//for smaller than pace
			int count = 0;
			while(bounds[count]<pace && count<sampleSetSize-1){
				count++;
				bounds[count] = bounds[count-1]*10;
			}
			//for larger than pace
			pace = (bounds[sampleSetSize] - bounds[count])/(sampleSetSize-count);
			for(int j=count;j<sampleSetSize;j++){
				bounds[j] = bounds[j-1] + pace;
			}
			break;
		case 3://randomly choices
			pace = bounds[sampleSetSize] - bounds[0];
			for(int j=1;j<sampleSetSize;j++){
				bounds[j] = bounds[0] + Math.random() * pace;
			}
			break;
		default:
			for(int j=1;j<sampleSetSize;j++){
				bounds[j] = bounds[j-1] + pace;
			}
			break;
	}
}
 
Example 10
Source File: LHSInitializer.java    From bestconf with Apache License 2.0 4 votes vote down vote up
/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * 
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid){
	
	int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
	double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
	ArrayList<Integer>[] setWithMaxMinDist=null;
	//generate L sets of sampleSetSize points
	for(int i=0; i<L; i++){
		ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
		//compute the minimum distance minDist between any sample pair for each set
		crntMinDist = minDistForSet(setPerm);
		//select the set with the maximum minDist
		if(crntMinDist>maxMinDist){
			setWithMaxMinDist = setPerm;
			maxMinDist = crntMinDist;
		}
	}
	
	//generate and output the set with the maximum minDist as the result
	
	//first, divide the domain of each attribute into sampleSetSize equal subdomain
	double[][] bounds = new double[atts.size()][sampleSetSize+1];//sampleSetSize+1 to include the lower and upper bounds
	Iterator<Attribute> itr = atts.iterator();
	Attribute crntAttr;
	double pace;
	for(int i=0;i<bounds.length;i++){
		crntAttr = itr.next();
		
		bounds[i][0] = crntAttr.getLowerNumericBound();
		bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
		pace = (bounds[i][sampleSetSize] - bounds[i][0])/sampleSetSize;
		for(int j=1;j<sampleSetSize;j++){
			bounds[i][j] = bounds[i][j-1] + pace;
		}
	}
	
	//second, generate the set according to setWithMaxMinDist
	Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
	for(int i=0;i<sampleSetSize;i++){
		double[] vals = new double[atts.size()];
		for(int j=0;j<vals.length;j++){
			vals[j] = useMid?
					(bounds[j][setWithMaxMinDist[j].get(i)]+bounds[j][setWithMaxMinDist[j].get(i)+1])/2:
						bounds[j][setWithMaxMinDist[j].get(i)]+
						(
							(bounds[j][setWithMaxMinDist[j].get(i)+1]-bounds[j][setWithMaxMinDist[j].get(i)])*uniRand.nextDouble()
						);
		}
		data.add(new DenseInstance(1.0, vals));
	}
	
	//third, return the generated points
	return data;
}