Java Code Examples for org.apache.lucene.util.OpenBitSet#clone()

The following examples show how to use org.apache.lucene.util.OpenBitSet#clone() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Validator.java    From winter with Apache License 2.0 6 votes vote down vote up
private OpenBitSet extendWith(OpenBitSet lhs, int rhs, int extensionAttr) {
	if (lhs.get(extensionAttr) || 											// Triviality: AA->C cannot be valid, because A->C is invalid
		(rhs == extensionAttr) || 											// Triviality: AC->C cannot be valid, because A->C is invalid
		this.posCover.containsFdOrGeneralization(lhs, extensionAttr) ||		// Pruning: If A->B, then AB->C cannot be minimal // TODO: this pruning is not used in the Inductor when inverting the negCover; so either it is useless here or it is useful in the Inductor?
		((this.posCover.getChildren() != null) && (this.posCover.getChildren()[extensionAttr] != null) && this.posCover.getChildren()[extensionAttr].isFd(rhs)))	
																			// Pruning: If B->C, then AB->C cannot be minimal
		return null;
	
	OpenBitSet childLhs = lhs.clone(); // TODO: This clone() could be avoided when done externally
	childLhs.set(extensionAttr);
	
	// TODO: Add more pruning here
	
	// if contains FD: element was a child before and has already been added to the next level
	// if contains Generalization: element cannot be minimal, because generalizations have already been validated
	if (this.posCover.containsFdOrGeneralization(childLhs, rhs))										// Pruning: If A->C, then AB->C cannot be minimal
		return null;
	
	return childLhs;
}
 
Example 2
Source File: Inductor.java    From winter with Apache License 2.0 5 votes vote down vote up
public void updatePositiveCover(FDList nonFds) {
/*		if (nonFds.isEmpty())
			return;
		
		// Sort the negative cover
		Logger.getInstance().writeln("Sorting FD-violations ...");
		Collections.sort(nonFds, new Comparator<OpenBitSet>() {
			@Override
			public int compare(OpenBitSet o1, OpenBitSet o2) {
				return (int)(o1.cardinality() - o2.cardinality());
			}
		});
*/		// THE SORTING IS NOT NEEDED AS THE UCCSet SORTS THE NONUCCS BY LEVEL ALREADY
		
		Logger.getInstance().writeln("Inducing FD candidates ...");
		for (int i = nonFds.getFdLevels().size() - 1; i >= 0; i--) {
			if (i >= nonFds.getFdLevels().size()) // If this level has been trimmed during iteration
				continue;
			
			List<OpenBitSet> nonFdLevel = nonFds.getFdLevels().get(i);
			for (OpenBitSet lhs : nonFdLevel) {
				
				OpenBitSet fullRhs = lhs.clone();
				fullRhs.flip(0, this.posCover.getNumAttributes());
				
				for (int rhs = fullRhs.nextSetBit(0); rhs >= 0; rhs = fullRhs.nextSetBit(rhs + 1))
					this.specializePositiveCover(lhs, rhs, nonFds);
			}
			nonFdLevel.clear();
		}
	}
 
Example 3
Source File: Sampler.java    From winter with Apache License 2.0 5 votes vote down vote up
public void runNext(FDList newNonFds, int[][] compressedRecords) {
	this.windowDistance++;
	int numNewNonFds = 0;
	int numComparisons = 0;
	OpenBitSet equalAttrs = new OpenBitSet(this.posCover.getNumAttributes());
	
	int previousNegCoverSize = newNonFds.size();
	Iterator<IntArrayList> clusterIterator = this.clusters.iterator();
	while (clusterIterator.hasNext()) {
		IntArrayList cluster = clusterIterator.next();
		
		if (cluster.size() <= this.windowDistance) {
			clusterIterator.remove();
			continue;
		}
		
		for (int recordIndex = 0; recordIndex < (cluster.size() - this.windowDistance); recordIndex++) {
			int recordId = cluster.getInt(recordIndex);
			int partnerRecordId = cluster.getInt(recordIndex + this.windowDistance);
			
			this.sampler.match(equalAttrs, compressedRecords[recordId], compressedRecords[partnerRecordId]);
			
			if (!this.negCover.contains(equalAttrs)) {
				OpenBitSet equalAttrsCopy = equalAttrs.clone();
				this.negCover.add(equalAttrsCopy);
				newNonFds.add(equalAttrsCopy);
				
				this.memoryGuardian.memoryChanged(1);
				this.memoryGuardian.match(this.negCover, this.posCover, newNonFds);
			}
			numComparisons++;
		}
	}
	numNewNonFds = newNonFds.size() - previousNegCoverSize;
	
	this.numNewNonFds.add(numNewNonFds);
	this.numComparisons.add(numComparisons);
}
 
Example 4
Source File: FDTreeElement.java    From winter with Apache License 2.0 5 votes vote down vote up
public void grow(OpenBitSet lhs, FDTree fdTree) {
	// Add specializations of all nodes an mark them as isFD, but if specialization exists, then it is invalid and should not be marked; only add specializations of nodes not marked as isFD!
	OpenBitSet rhs = this.rhsAttributes;
	
	OpenBitSet invalidRhs = rhs.clone();
	invalidRhs.remove(this.rhsFds);
	
	// Add specializations that are not invalid
	if (invalidRhs.cardinality() > 0) {
		for (int extensionAttr = 0; extensionAttr < this.numAttributes; extensionAttr++) {
			if (lhs.get(extensionAttr) || rhs.get(extensionAttr))
				continue;
			
			lhs.set(extensionAttr);
			fdTree.addFunctionalDependencyIfNotInvalid(lhs, invalidRhs);
			lhs.clear(extensionAttr);
		}
	}
	
	// Traverse children and let them add their specializations
	if (this.children != null) {
		for (int childAttr = 0; childAttr < this.numAttributes; childAttr++) {
			FDTreeElement element = this.children[childAttr];
			if (element != null) {
				lhs.set(childAttr);
				element.grow(lhs, fdTree);
				lhs.clear(childAttr);
			}
		}
	}
}
 
Example 5
Source File: HyFD.java    From winter with Apache License 2.0 4 votes vote down vote up
private FDTree calculatePositiveCover(ArrayList<OpenBitSet> negCover) {
		FDTree posCover = new FDTree(this.numAttributes, this.maxLhsSize);
		posCover.addMostGeneralDependencies();
		
		//
		//int bitsetCounter = 0;
		//long t = System.currentTimeMillis();
		//
		
//		OpenBitSet previous1Lhs = null;
//		OpenBitSet previous2Lhs = null;
		for (int i = negCover.size() - 1; i >= 0; i--) {
			OpenBitSet lhs = negCover.remove(i);

			//
			//bitsetCounter++;
			//if (bitsetCounter % 1 == 0) {
			//	System.out.println("\t\t" + bitsetCounter + "\\" + negCover.size() + " bitsets; " + "- fds; " + (System.currentTimeMillis() - t) + " time");
			//	t = System.currentTimeMillis();
			//}
			//
			
			OpenBitSet fullRhs = lhs.clone();
			fullRhs.flip(0, fullRhs.size());
			
			for (int rhs = fullRhs.nextSetBit(0); rhs >= 0; rhs = fullRhs.nextSetBit(rhs + 1)) {
				// If one of the previous lhs subsumes this lhs with the same rhs, then we can skip it here
//				if ((previous1Lhs != null) && this.subsumes(lhs, previous1Lhs, rhs))
//					continue;
//				if ((previous2Lhs != null) && this.subsumes(lhs, previous2Lhs, rhs))
//					continue;
				
				this.memoryGuardian.memoryChanged(this.specializePositiveCover(posCover, lhs, rhs));
			}
			
			// If dynamic memory management is enabled, frequently check the memory consumption and trim the positive cover if it does not fit anymore
		//	this.memoryGuardian.match(posCover);
			
//			previous2Lhs = previous1Lhs;
//			previous1Lhs = lhs;
		}
		return posCover;
	}
 
Example 6
Source File: FDTreeElement.java    From winter with Apache License 2.0 4 votes vote down vote up
protected void maximizeNegativeRecursive(PositionListIndex currentPli, OpenBitSet currentLhs, int numAttributes, int[][] rhsPlis, FDTree invalidFds) {
	PositionListIndex[] childPlis = new PositionListIndex[numAttributes];
	
	// Traverse the tree depth-first, left-first; generate plis for children and pass them over; store the child plis locally to reuse them for the checking afterwards
	if (this.children != null) {
		for (int attr = 0; attr < numAttributes; attr++) {
			if (this.children[attr] != null) {
				childPlis[attr] = currentPli.intersect(rhsPlis[attr]);
				
				currentLhs.set(attr);
				this.children[attr].maximizeNegativeRecursive(childPlis[attr], currentLhs, numAttributes, rhsPlis, invalidFds);
				currentLhs.clear(attr);
			}
		}
	}
	
	// On the way back, check all rhs-FDs that all their possible supersets are valid FDs; check with refines or, if available, with previously calculated plis
	//     which supersets to consider: add all attributes A with A notIn lhs and A notequal rhs; 
	//         for newLhs->rhs check that no FdOrSpecialization exists, because it is invalid then; this check might be slower than the FD check on high levels but faster on low levels in particular in the root! this check is faster on the negative cover, because we look for a non-FD
	for (int rhs = this.rhsFds.nextSetBit(0); rhs >= 0; rhs = this.rhsFds.nextSetBit(rhs + 1)) {
		OpenBitSet extensions = currentLhs.clone();
		extensions.flip(0, numAttributes);
		extensions.clear(rhs);
		
		for (int extensionAttr = extensions.nextSetBit(0); extensionAttr >= 0; extensionAttr = extensions.nextSetBit(extensionAttr + 1)) {
			currentLhs.set(extensionAttr);
			if (childPlis[extensionAttr] == null)
				childPlis[extensionAttr] = currentPli.intersect(rhsPlis[extensionAttr]);
			
			// If a superset is a non-FD, mark this as not rhsFD, add the superset as a new node, filterGeneralizations() of the new node, call maximizeNegative() on the new supersets node
			//     if the superset node is in a right node of the tree, it will be checked anyways later; hence, only check supersets that are left or in the same tree path
			if (!childPlis[extensionAttr].refines(rhsPlis[rhs])) {
				this.rhsFds.clear(rhs);

				FDTreeElement newElement = invalidFds.addFunctionalDependency(currentLhs, rhs);
				//invalidFds.filterGeneralizations(currentLhs, rhs); // TODO: test effect
				newElement.maximizeNegativeRecursive(childPlis[extensionAttr], currentLhs, numAttributes, rhsPlis, invalidFds);
			}
			currentLhs.clear(extensionAttr);
		}
	}
}
 
Example 7
Source File: FDTree.java    From winter with Apache License 2.0 4 votes vote down vote up
public void maximizeNegative(List<PositionListIndex> plis, int[][] invertedPlis, int numRecords) {
	// Maximizing negative cover is better than maximizing positive cover, because we do not need to check minimality; inversion does this automatically, i.e., generating a non-FD that creates a valid, non-minimal FD is not possible
	int numAttributes = plis.size();
	OpenBitSet currentLhs = new OpenBitSet(numAttributes);
	
	// Traverse the tree depth-first, left-first
	if (this.getChildren() != null) {
		for (int attr = 0; attr < numAttributes; attr++) {
			if (this.getChildren()[attr] != null) {
				currentLhs.set(attr);
				this.getChildren()[attr].maximizeNegativeRecursive(plis.get(attr), currentLhs, numAttributes, invertedPlis, this);
				currentLhs.clear(attr);
			}
		}
	}

	// Add invalid root FDs {} -/-> rhs to negative cover, because these are seeds for not yet discovered non-FDs
	this.addInvalidRootFDs(plis, numRecords); // TODO: These FDs make the search complex again :-(

	// On the way back, check all rhs-FDs that all their possible supersets are valid FDs; check with refines or, if available, with previously calculated plis
	//     which supersets to consider: add all attributes A with A notIn lhs and A notequal rhs; 
	//         for newLhs->rhs check that no FdOrSpecialization exists, because it is invalid then; this check might be slower than the FD check on high levels but faster on low levels in particular in the root! this check is faster on the negative cover, because we look for a non-FD
	for (int rhs = this.rhsFds.nextSetBit(0); rhs >= 0; rhs = this.rhsFds.nextSetBit(rhs + 1)) {
		OpenBitSet extensions = currentLhs.clone();
		extensions.flip(0, numAttributes);
		extensions.clear(rhs);
		
		// If a superset is a non-FD, mark this as not rhsFD, add the superset as a new node, filterGeneralizations() of the new node, call maximizeNegative() on the new supersets node
		//     if the superset node is in a right node of the tree, it will be checked anyways later; hence, only check supersets that are left or in the same tree path
		for (int extensionAttr = extensions.nextSetBit(0); extensionAttr >= 0; extensionAttr = extensions.nextSetBit(extensionAttr + 1)) {
			currentLhs.set(extensionAttr);
			
			if (this.containsFdOrSpecialization(currentLhs, rhs) || !plis.get(extensionAttr).refines(invertedPlis[rhs])) { // Otherwise, it must be false and a specialization is already contained; Only needed in root node, because we already filtered generalizations of other nodes and use a depth-first search that always removes generalizations when a new node comes in!
				this.rhsFds.clear(rhs);
				
				FDTreeElement newElement = this.addFunctionalDependency(currentLhs, rhs);
				//this.filterGeneralizations(currentLhs, rhs); // TODO: test effect
				newElement.maximizeNegativeRecursive(plis.get(extensionAttr), currentLhs, numAttributes, invertedPlis, this);
			}
			currentLhs.clear(extensionAttr);
		}
	}
}