Java Code Examples for gnu.trove.set.hash.THashSet#add()

The following examples show how to use gnu.trove.set.hash.THashSet#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PartitionEquivalences.java    From metanome-algorithms with Apache License 2.0 6 votes vote down vote up
public void addPartition(EquivalenceManagedPartition partition) {
	if (!this.observedPartitions.contains(partition.getIndices()) && !this.containsSimilarPartition(partition)) {
		this.observedPartitions.add(partition.getIndices());
		long hashNumber = partition.getHashNumber();
		System.out.println(String.format("Partition[%s]\t%d\tSize: %d", partition.getIndices(), Long.valueOf(hashNumber), Integer.valueOf(partition.size())));
		partitionHashes.putIfAbsent(hashNumber, new TIntObjectHashMap<THashSet<EquivalenceManagedPartition>>());
		partitionHashes.get(hashNumber).putIfAbsent(partition.size(), new THashSet<EquivalenceManagedPartition>());
		THashSet<EquivalenceManagedPartition> partitionGroup = partitionHashes.get(hashNumber).get(partition.size());

		if (partitionGroup.isEmpty()) {
			partitionGroup.add(partition);
		} else {
			// then there is at least one element in the partitionGroup
			checkPossibleEquivalences(partitionGroup, partition);
		}
	}
}
 
Example 2
Source File: Observations.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getUncheckedMaximalSubsets(ColumnCollection lhs, ColumnOrder order) {
		THashSet<ColumnCollection> uncheckedMaximalSubsets = new THashSet<>();
		
//		if (lhs.cardinality() > 2) {
			for (int columnIndex : order.getOrderHighDistinctCount(lhs)) { 
				ColumnCollection subsetIndices = lhs.removeColumnCopy(columnIndex);
				if (!this.containsKey(subsetIndices)) {
					uncheckedMaximalSubsets.add(subsetIndices);
				}
			}
//		}
		return uncheckedMaximalSubsets;
	}
 
Example 3
Source File: Observations.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getUncheckedOrCandidateMaximalSubsets(ColumnCollection lhs, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMaximalSubsets = new THashSet<>();
	
	// we only want to check subsets with at least 2 columns
	if (lhs.cardinality() > 2) {
		for (int columnIndex : order.getOrderHighDistinctCount(lhs)) { 
			ColumnCollection subsetIndices = lhs.removeColumnCopy(columnIndex);
			if (!this.containsKey(subsetIndices) || this.get(subsetIndices) == Observation.CANDIDATE_MINIMAL_DEPENDENCY) {
				uncheckedMaximalSubsets.add(subsetIndices);
			}
		}
	}
	return uncheckedMaximalSubsets;
}
 
Example 4
Source File: Observations.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getMaximalSubsets(ColumnCollection lhs, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMaximalSubsets = new THashSet<>();
	
	// we only want to check subsets with at least 2 columns
	if (lhs.cardinality() > 2) {
		for (int columnIndex : order.getOrderHighDistinctCount(lhs)) { 
			ColumnCollection subsetIndices = lhs.removeColumnCopy(columnIndex);
			uncheckedMaximalSubsets.add(subsetIndices);
		}
	}
	return uncheckedMaximalSubsets;
}
 
Example 5
Source File: Observations.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getUncheckedOrCandidateMinimalSupersets(ColumnCollection lhs, int rhsIndex, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMinimalSupersets = new THashSet<>();
	
	for (int columnIndex : order.getOrderLowDistinctCount(lhs.setCopy(rhsIndex).complement())) {
		ColumnCollection supersetIndices = lhs.setCopy(columnIndex);
		if (!this.containsKey(supersetIndices) || this.get(supersetIndices) == Observation.CANDIDATE_MAXIMAL_NON_DEPENDENCY) {
			uncheckedMinimalSupersets.add(supersetIndices);
		}
	}
	return uncheckedMinimalSupersets;
}
 
Example 6
Source File: Observations.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getUncheckedMinimalSupersets(ColumnCollection lhs, int rhsIndex, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMinimalSupersets = new THashSet<>();
	
	for (int columnIndex : order.getOrderLowDistinctCount(lhs.setCopy(rhsIndex).complement())) {
		ColumnCollection supersetIndices = lhs.setCopy(columnIndex);
		if (!this.containsKey(supersetIndices)) {
			uncheckedMinimalSupersets.add(supersetIndices);
		}
	}
	return uncheckedMinimalSupersets;
}
 
Example 7
Source File: Observations.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getMinimalSupersets(ColumnCollection lhs, int rhsIndex, ColumnOrder order) {
	THashSet<ColumnCollection> uncheckedMinimalSupersets = new THashSet<>();

	for (int columnIndex : order.getOrderLowDistinctCount(lhs.setCopy(rhsIndex).complement())) {
		ColumnCollection supersetIndices = lhs.setCopy(columnIndex);
		uncheckedMinimalSupersets.add(supersetIndices);
	}
	return uncheckedMinimalSupersets;
}
 
Example 8
Source File: Dependencies.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getPrunedSubsets(THashSet<ColumnCollection> subsets) {
	THashSet<ColumnCollection> prunedSubsets = new THashSet<>();
	for (ColumnCollection subset : subsets) {
		if (this.isRepresented(subset)) {
			prunedSubsets.add(subset);
		}
	}
	return prunedSubsets;
}
 
Example 9
Source File: NonDependencies.java    From metanome-algorithms with Apache License 2.0 5 votes vote down vote up
public THashSet<ColumnCollection> getPrunedSupersets(THashSet<ColumnCollection> supersets) {
	THashSet<ColumnCollection> prunedSupersets = new THashSet<>();
	for (ColumnCollection superset : supersets) {
		if (this.isRepresented(superset)) {
			prunedSupersets.add(superset);
		}
	}
	return prunedSupersets;
}
 
Example 10
Source File: DICT.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
public static void BMES2DICT(String file, String dicfile) throws UnsupportedEncodingException,
FileNotFoundException, IOException {


	BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8"));
	String line = null;			
	int count=0;
	THashSet<String> dict = new THashSet<String>();
	StringBuilder sb = new StringBuilder();
	while ((line = bfr.readLine()) != null) {
		if(line.length()==0)
			continue;

		String[] toks = line.split("\\s+");
		String label = toks[1];
		String w = toks[0];
		if(w.equals(" ")){//空格特殊处理
			if(sb.length()>0){
				dict.add(sb.toString());
				sb = new StringBuilder();
			}
			continue;
		}
		sb.append(w);
		if (label.equals("E") || label.equals("S")) {
			dict.add(sb.toString());
			sb = new StringBuilder();
		}
	}
	MyCollection.write(dict,dicfile);
}
 
Example 11
Source File: IterativeConditionalAnalysis.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
private THashSet<String> collectEQTLProbes(String origOutputDir, int currentIteration, double fdr) throws IOException {

		THashSet<String> output = new THashSet<String>();
		String iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + "-ProbeLevel.txt.gz";
		if (m_settings.fdrType.equals(FDR.FDRMethod.FULL)) {
			iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + ".txt.gz";
		} else if (m_settings.fdrType.equals(FDR.FDRMethod.SNPLEVEL)) {
			iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + "-SNPLevel.txt.gz";
		} else if (m_settings.fdrType.equals(FDR.FDRMethod.GENELEVEL)) {
			iterationFile = origOutputDir + "/Iteration" + (currentIteration - 1) + "/eQTLProbesFDR" + fdr + "-GeneLevel.txt.gz";
		}

		System.out.println("Trying to collect genes/probes from: " + iterationFile);
		if (Gpio.exists(iterationFile)) {
			TextFile tf = new TextFile(iterationFile, TextFile.R);
			tf.readLineElems(TextFile.tab);
			String[] elems = tf.readLineElems(TextFile.tab);
			while (elems != null) {
				output.add(elems[4]);
				elems = tf.readLineElems(TextFile.tab);
			}
			System.out.println("Iteration " + (currentIteration - 1) + " has " + output.size() + " significant probes.");
		}


		return output;
	}
 
Example 12
Source File: DFDMiner.java    From metanome-algorithms with Apache License 2.0 4 votes vote down vote up
private Stack<Seed> nextSeeds(int currentRHSIndex) {
//		System.out.println("Find holes");
		THashSet<ColumnCollection> deps = new THashSet<>();
		ArrayList<ColumnCollection> currentMaximalNonDependencies = maximalNonDependencies.getLHSForRHS(currentRHSIndex);
		HashSet<ColumnCollection> currentMinimalDependencies = new HashSet<>(minimalDependencies.getLHSForRHS(currentRHSIndex));
		ArrayList<ColumnCollection> newDeps = new ArrayList<>(numberOfColumns * deps.size());
//		Holes holes = new Holes();
		
//		int i = 0;
//		for (ColumnCollection maximalNonDependency : currentMaximalNonDependencies) {
//			ColumnCollection complement = maximalNonDependency.setCopy(currentRHSIndex).complement();
//			if (deps.isEmpty()) {
//				ColumnCollection emptyColumnIndices = new ColumnCollection(numberOfColumns);
//				for (Integer complementColumnIndex : complement.getSetBits()) {
//					deps.add(emptyColumnIndices.setCopy(complementColumnIndex));
//				}
//			} else {
//				for (ColumnCollection dep : deps) {
//					int[] setBits = complement.getSetBits();
//					for (int setBit = 0; setBit < setBits.length; setBit++) {
//						holes.add(dep.setCopy(setBits[setBit]));
////						System.out.println("Dep:\t" + dep.setCopy(setBits[setBit]));
//					}
//				}
//				// minimize newDeps
//				System.out.println(i++ + "\t" + currentMaximalNonDependencies.size());
//				System.out.println("total deps:\t" + deps.size());
//				System.out.println("before minimizing:\t" + holes.size());
////				ArrayList<ColumnCollection> minimizedNewDeps = minimizeSeeds(newDeps);
//				holes.minimize();
//				System.out.println("after minimizing:\t" + holes.size());
//				deps.clear();
//				deps.addAll(holes);
//				holes.clear();
//			}
//		}

		for (ColumnCollection maximalNonDependency : currentMaximalNonDependencies) {
			ColumnCollection complement = maximalNonDependency.setCopy(currentRHSIndex).complement();
			if (deps.isEmpty()) {
				ColumnCollection emptyColumnIndices = new ColumnCollection(numberOfColumns);
				for (int complementColumnIndex : complement.getSetBits()) {
					deps.add(emptyColumnIndices.setCopy(complementColumnIndex));
				}
			} else {
				for (ColumnCollection dep : deps) {
					int[] setBits = complement.getSetBits();
					for (int setBit = 0; setBit < setBits.length; setBit++) {
						newDeps.add(dep.setCopy(setBits[setBit]));
					}
				}
				// minimize newDeps
				ArrayList<ColumnCollection> minimizedNewDeps = minimizeSeeds(newDeps);
				deps.clear();
				deps.addAll(minimizedNewDeps);
				newDeps.clear();
			}
		}
		
		// return only elements that aren't already covered by the minimal
		// dependencies
		Stack<Seed> remainingSeeds = new Stack<>();
		deps.removeAll(currentMinimalDependencies);
		for (ColumnCollection remainingSeed : deps) {
			remainingSeeds.push(new Seed(remainingSeed));
		}

		return remainingSeeds;
	}
 
Example 13
Source File: RLSeg.java    From fnlp with GNU Lesser General Public License v3.0 4 votes vote down vote up
int update(String[] toks) throws IOException {
	if(toks==null)
		return 0;
	THashSet<String> newdict = new THashSet<String>();
	String nowords = "";
	int count = 0;
	for(int i=0;i<toks.length;i++){//取得包含新词的最长子串
		if(Chars.isLetterOrDigitOrPunc(toks[i]))
			continue;

		if(!dict.contains(toks[i])&&!tempdict.contains(toks[i])){
			nowords += "" + toks[i];
			count++;
		}else{
			if(nowords.length()>0){
				System.out.println(nowords);
				newdict.add(nowords.trim());
				nowords = "";
			}
		}
	}


	TObjectHashIterator<String> it = newdict.iterator();
	while(it.hasNext()){
		String s = it.next();
		if(nodict.contains(s))
			continue;
		System.out.println("搜索: "+s);
		THashSet<String> sset = getNewWords(s);
		if(sset==null||sset.size()==0)
			continue;
		System.out.println(sset);
		tempdict.addAll(sset);
		if(!sset.contains(s)&&!nodict.contains(s)){
			nodict.add(s);
			bwNo.write(s);
			bwNo.write("\n");
		}

	}
	bwNew.flush();
	bwNo.flush();
	return count;
}
 
Example 14
Source File: RLSeg.java    From fnlp with GNU Lesser General Public License v3.0 4 votes vote down vote up
public THashSet<String> getNewWords(String s) throws IOException {
	if(s.length()==0)
		return null;
	THashSet<String> newset = new THashSet<String>();
	HashMap<String,Float> map = new HashMap<String, Float>();
	String q = genQuery(s);
	String res = SearchByBaidu.search(q);
	if(res.length()==0)
		return null;

	String[] words = tag.tag2Array(res);

	for(int i=0;i<words.length;i++){
		String w = words[i];
		if(w.length()<2||dict.contains(w)||tempdict.contains(w))
			continue;
		//				if(dict.contains(words[i]))
		//					continue;
		if(map.containsKey(w))
			map.put(w, map.get(w)+1);
		else
			map.put(w, 1f);
	}
	//			Set<Entry<String, Float>> set = map.entrySet();
	//			for(Entry e:set){
	//				e.setValue((Float) e.getValue()/words.length);
	//			}
	List<Entry> list = MyCollection.sort(map);



	int num = getOccur(res, s);

	float thres = num*prop;
	thres = thres<50?50:thres;
	for(Entry e:list){
		String ss = (String) e.getKey();
		if((Float) e.getValue()>thres&&ss.length()>1&&!dict.contains(ss)&&!tempdict.contains(ss)){				
			newset.add(ss);
			bwNew.write(ss);
			bwNew.write("\n");
		}

	}

	newset.remove("快照");
	return newset;
}
 
Example 15
Source File: TriTyperGeneticalGenomicsDataset.java    From systemsgenetics with GNU General Public License v3.0 4 votes vote down vote up
public TriTyperGeneticalGenomicsDataset(TriTyperGeneticalGenomicsDatasetSettings settings, Pair<List<String>, List<List<String>>> pathwayDefinitions, boolean displayWarnings) throws IOException, Exception {

		this.settings = settings;

		settings.genotypeLocation = Gpio.formatAsDirectory(settings.genotypeLocation);

		if (settings.expressionLocation == null) {
			settings.expressionLocation = settings.genotypeLocation + "ExpressionData.txt";
		}

		// load the genotype metadata
		genotypeData = new TriTyperGenotypeData();
		genotypeData.displayWarnings = displayWarnings;
		genotypeData.load(settings.genotypeLocation, settings.snpmapFileLocation, settings.snpFileLocation);
		THashSet<String> includedExpressionIndividuals = new THashSet<String>();
		Boolean[] isIncluded = genotypeData.getIsIncluded();

		// preload the sample coupling file
		loadCouplings();

		// determine which expression samples to include
		Set<Entry<String, String>> entries = genotypeToExpressionCouplings.entrySet();
		for (Entry<String, String> entry : entries) {
			String genotypeIndividual = entry.getKey();
			Integer genotypeIndividualId = genotypeData.getIndividualId(genotypeIndividual);

			if (genotypeIndividualId != -9 && isIncluded[genotypeIndividualId] != null && isIncluded[genotypeIndividualId]) {
				includedExpressionIndividuals.add(entry.getValue());
			}
		}

		if (includedExpressionIndividuals.isEmpty()) {
			System.err.println("ERROR: none of the expression samples will be included with your current settings.\n" +
					"zPlease check the links between genotype and gene expression samples and/or your PhenotypeInformation.txt");
			System.exit(-1);
		}

		// load the expression data
		expressionData = new TriTyperExpressionData();
		expressionData.displayWarnings = displayWarnings;
		expressionData.confineToProbes(settings.tsProbesConfine);
		expressionData.setConfineToProbesThatMapToAnyChromosome(settings.confineProbesToProbesMappingToAnyChromosome);
		expressionData.setConfineToProbesThatMapToChromosome(settings.confineProbesToProbesThatMapToChromosome);
		expressionData.setIncludeIndividuals(includedExpressionIndividuals);
		expressionData.setPathwayDefinitions(pathwayDefinitions);
		expressionDataLoadedCorrectly = expressionData.load(settings.expressionLocation, settings.probeannotation, settings.expressionplatform, (settings.cisAnalysis && settings.transAnalysis));
		pruneGenotypeToExpressionCouplings();

		if (settings.covariateFile != null && Gpio.exists(settings.covariateFile)) {
			// load covariates..
			System.out.println("Loading covariates: " + settings.covariateFile);
			HashSet<String> individualSet = new HashSet<String>();
			individualSet.addAll(Arrays.asList(expressionData.getIndividuals()));
			covariates = new DoubleMatrixDataset<String, String>(settings.covariateFile, null, individualSet);

			if (covariates.colObjects.isEmpty()) {
				// try the transpose
				System.out.println("Could not find matching sample identifiers between covariate file and expression file.\nTransposing your covariate file.");
				covariates = new DoubleMatrixDataset<String, String>(settings.covariateFile, individualSet);
				if (covariates.rowObjects.isEmpty()) {
					System.err.println("Could not find matching samples between expression data and covariate data.");
					System.exit(-1);
				} else {
					covariates.transposeDataset(); // put the covariates on the rows, samples on the columns
					covariates.recalculateHashMaps();
				}
			}

			covariates.removeColumnsWithNaNs();
			covariates.recalculateHashMaps();
			if (covariates.colObjects.isEmpty()) {
				System.err.println("ERROR: after removing samples with NaN values, no covariates remain");
				System.exit(-1);
			}

			System.out.println(covariates.rowObjects.size() + " covariates loaded for " + covariates.colObjects.size() + " samples");

			// remove expression samples without covariates, and reorder expression data
			expressionData.pruneAndReorderSamples(covariates.colObjects);

			// prune expression dataset to samples having covariates
			loadCouplings();
			pruneGenotypeToExpressionCouplings();
		}

	}