pitt.search.semanticvectors.FlagConfig Java Examples

The following examples show how to use pitt.search.semanticvectors.FlagConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BeagleVectorSearcher.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param queryVecStore Vector store to use for query generation.
 * @param searchVecStore The vector store to search.
 * @param luceneUtils LuceneUtils object to use for query weighting. (May be null.)
 * @param queryTerms Terms that will be parsed into a query expression. 
 */
public BeagleVectorSearcher(VectorStore queryVecStore, VectorStore searchVecStore,
														LuceneUtils luceneUtils,
														FlagConfig flagConfig,
														String[] queryTerms)
	throws ZeroVectorException 
{
	super(queryVecStore, searchVecStore, luceneUtils, flagConfig);
			
	BeagleCompoundVecBuilder bcvb = new BeagleCompoundVecBuilder(flagConfig);
	
	queryVector = new RealVector(bcvb.getNGramQueryVector(queryVecStore, queryTerms));
			
	if (this.queryVector.isZeroVector()) {
		throw new ZeroVectorException("Query vector is zero ... no results.");
	}
}
 
Example #2
Source File: VectorUtilsTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testOrthogonalizeVectors() {
  FlagConfig flagConfig = FlagConfig.getFlagConfig(null);
  flagConfig.setDimension(3);
  Vector vec1 = new RealVector(new float[] {1, 2, 1});
  Vector vec2 = new RealVector(new float[] {2, 3, 1});
  Vector vec3 = new RealVector(new float[] {2, 1, 1});
  Vector vec4 = new RealVector(new float[] {2, 1, 5});
  ArrayList<Vector> list = new ArrayList<Vector>();
  list.add(vec1);
  list.add(vec2);
  list.add(vec3);
  list.add(vec4);

  VectorUtils.orthogonalizeVectors(list);
  
  assertEquals(1.0, list.get(0).measureOverlap(list.get(0)), TOL);
  assertEquals(1.0, list.get(1).measureOverlap(list.get(1)), TOL);
  assertEquals(1.0, list.get(2).measureOverlap(list.get(2)), TOL);
  assertEquals(0, list.get(0).measureOverlap(list.get(1)), TOL);
  assertEquals(0, list.get(0).measureOverlap(list.get(2)), TOL);
  assertEquals(0, list.get(1).measureOverlap(list.get(2)), TOL);

  // If we try to orthogonalize more vectors than dimensions, we expect degeneracy eventually!
  assertEquals(Float.NaN, ((RealVector) list.get(3)).getCoordinates()[0]);
}
 
Example #3
Source File: VectorStoreRecommender.java    From seldon-server with Apache License 2.0 6 votes vote down vote up
/**
 * @param queryVecStore Vector store to use for query generation.
 * @param searchVecStore The vector store to search.
 * @param luceneUtils LuceneUtils object to use for query weighting. (May be null.)
 * @param queryTerms Terms that will be parsed into a query
 * expression. If the string "NOT" appears, terms after this will be negated.
 */
public VectorStoreRecommenderCosine(VectorStore queryVecStore,
                            VectorStore searchVecStore,
                            LuceneUtils luceneUtils,
                            String[] queryTerms,
                            Set<String> exclusions,
                            Set<String> inclusions,
                            String minDoc)
    throws ZeroVectorException {
  super(queryVecStore, searchVecStore, luceneUtils, exclusions,inclusions,minDoc);
  this.queryVector = CompoundVectorBuilder.getQueryVector(queryVecStore,
                                                          luceneUtils,
                                                          FlagConfig.getFlagConfig(null),
                                                          queryTerms);
  if (this.queryVector.isZeroVector()) {
    throw new ZeroVectorException("Query vector is zero ... no results.");
  }
}
 
Example #4
Source File: TypeSpec.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * For a field known to be numeric, generates appropriate bookend vectors.
 */
public void addMinMaxVectors(FlagConfig flagConfig, String columnName) {
  if (this.getType() != SupportedType.DOUBLE) {
    throw new IllegalArgumentException("Min and max vectors only supported for type DOUBLE so far.");
  }
  long randomSeed = Bobcat.asLong(columnName);
  Random random = new Random(randomSeed);
  while (true) {
    minBookendVector = VectorFactory.generateRandomVector(
        flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength(), random);
    maxBookendVector = VectorFactory.generateRandomVector(
        flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength(), random);
    if (minBookendVector.measureOverlap(maxBookendVector) < 0.1 || flagConfig.vectortype().equals(VectorType.BINARY) && minBookendVector.measureOverlap(maxBookendVector) < 0.01 ) break;
    VerbatimLogger.info("Bookend vectors too similar to each other ... repeating generation.\n");
  }
}
 
Example #5
Source File: NumberRepresentationTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testCreateAndGetNumberVectors() {
  FlagConfig flagConfig = FlagConfig.getFlagConfig(
      new String[] {"-vectortype", "real", "-dimension", "200"});
  NumberRepresentation numberRepresentation = new NumberRepresentation(flagConfig);
  
  VectorStoreRAM vsr2 = numberRepresentation.getNumberVectors(0, 2);
  assertEquals(5, vsr2.getNumVectors());
  
  VectorStoreRAM vsr4 = numberRepresentation.getNumberVectors(0, 4);
  assertEquals(7, vsr4.getNumVectors());
  
  // The beginning and end vectors should be the same in all cases.
  assertEquals(1.0, vsr2.getVector(0).measureOverlap(vsr4.getVector(0)), TOL);
  assertEquals(1.0, vsr2.getVector(2).measureOverlap(vsr4.getVector(4)), TOL);
}
 
Example #6
Source File: NumberRepresentationTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testVectorsNearerToBeginningOrEnd() {
  FlagConfig flagConfig = FlagConfig.getFlagConfig(
      new String[] {"-vectortype", "binary", "-dimension", "2048"});
  NumberRepresentation numberRepresentation = new NumberRepresentation(flagConfig);
  
  VectorStoreRAM vsr = numberRepresentation.getNumberVectors(0, 4);
  assertTrue(vsr.getVector(0).measureOverlap(vsr.getVector(1))
      > vsr.getVector(4).measureOverlap(vsr.getVector(1)));
  assertTrue(vsr.getVector(4).measureOverlap(vsr.getVector(3))
      > vsr.getVector(3).measureOverlap(vsr.getVector(0)));
  /** This "half-way" equality isn't exact, demonstrating that I don't exactly understand
   * the process. -DW. */
  assertEquals(vsr.getVector(4).measureOverlap(vsr.getVector(2)),
      vsr.getVector(2).measureOverlap(vsr.getVector(0)), 0.05);
}
 
Example #7
Source File: PsiUtils.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Prints the nearest predicate for a particular flagConfig. (Please extend this comment!)
 *
 * @param flagConfig
 * @throws IOException
 */
public static void printNearestPredicate(FlagConfig flagConfig) throws IOException {
  VerbatimLogger.info("Printing predicate results.");
  Vector queryVector = VectorFactory.createZeroVector(flagConfig.vectortype(), flagConfig.dimension());
  VectorSearcher.VectorSearcherBoundProduct predicateFinder;
  try {
    predicateFinder = new VectorSearcher.VectorSearcherBoundProduct(
        VectorStoreReader.openVectorStore(flagConfig.semanticvectorfile(), flagConfig),
        VectorStoreReader.openVectorStore(flagConfig.boundvectorfile(), flagConfig),
        null, flagConfig, queryVector);
    List<SearchResult> bestPredicate = predicateFinder.getNearestNeighbors(1);
    if (bestPredicate.size() > 0) {
      String pred = bestPredicate.get(0).getObjectVector().getObject().toString();
      System.out.println(pred);
    }
  } catch (ZeroVectorException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
}
 
Example #8
Source File: VectorUtils.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Utility method to perform superposition (hopefully) quickly using BLAS routines
 * Arguably, this should be disseminated across the individual Vector classes
 *
 *
 * @param toBeAdded
 * @param toBeAltered
 * @param blas
 * @return
 */
public static void superposeInPlace(Vector toBeAdded, Vector toBeAltered, FlagConfig flagConfig, BLAS blas, double weight) throws IncompatibleVectorsException {
  if (!toBeAdded.getVectorType().equals(toBeAltered.getVectorType()))
    throw new IncompatibleVectorsException();

  switch (toBeAdded.getVectorType()) {
    case REAL:
      blas.saxpy(flagConfig.dimension(), (float) weight, ((RealVector) toBeAdded).getCoordinates(), 1, ((RealVector) toBeAltered).getCoordinates(), 1);
      break;
    case COMPLEX:
      blas.saxpy(flagConfig.dimension()*2, (float) weight, ((ComplexVector) toBeAdded).getCoordinates(), 1, ((ComplexVector) toBeAltered).getCoordinates(), 1);
      break;
    case BINARY: //first attempt at this - add the results of the election multiplied by the number of votes to date
      ((BinaryVector) toBeAdded).tallyVotes();
      toBeAltered.superpose(toBeAdded, weight, null);
      break;
    default:
      break;

  }


}
 
Example #9
Source File: VectorUtils.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public static void superposeInPlace(Vector toBeAdded, Vector toBeAltered, FlagConfig flagConfig, BLAS blas, double weight, int[] permutation) throws IncompatibleVectorsException {
   
 if (permutation == null)
  superposeInPlace(toBeAdded, toBeAltered, flagConfig, blas, weight);
  else
  {
  
 if (!toBeAdded.getVectorType().equals(toBeAltered.getVectorType()) || !toBeAdded.getVectorType().equals(VectorType.REAL))
       throw new IncompatibleVectorsException();
     
 	     for (int q=0; q < toBeAdded.getDimension(); q++)
 	    	 ((RealVector) toBeAltered).getCoordinates()[permutation[q]]
 	    			 +=  ((RealVector) toBeAdded).getCoordinates()[q]*weight;
     		
      }
}
 
Example #10
Source File: VectorUtils.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Utility method to compute scalar product (hopefully) quickly using BLAS routines
 * Arguably, this should be disseminated across the individual Vector classes
 */
public static double scalarProduct(Vector v1, Vector v2, FlagConfig flagConfig, BLAS blas, int[] permutations) throws IncompatibleVectorsException {
  
 if (permutations == null)
  return scalarProduct(v1, v2, flagConfig, blas);
 
 if (!v1.getVectorType().equals(v2.getVectorType()) || !v1.getVectorType().equals(VectorType.REAL))
    throw new IncompatibleVectorsException();
  
  	double score = 0;
  
  	  for (int q=0; q < v1.getDimension(); q++)
  		  score += ((RealVector) v1).getCoordinates()[permutations[q]] * 
  				  ((RealVector) v2).getCoordinates()[q];
		
     return score;
   }
 
Example #11
Source File: VectorUtils.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Utility method to compute scalar product (hopefully) quickly using BLAS routines
 * Arguably, this should be disseminated across the individual Vector classes
 */
public static double scalarProduct(Vector v1, Vector v2, FlagConfig flagConfig, BLAS blas) throws IncompatibleVectorsException {
  if (!v1.getVectorType().equals(v2.getVectorType()))
    throw new IncompatibleVectorsException();

  switch (v1.getVectorType()) {
    case REAL:
      return blas.sdot(v1.getDimension(), ((RealVector) v1).getCoordinates(), 1, ((RealVector) v2).getCoordinates(), 1);
    case COMPLEX: //hermitian scalar product
      return blas.sdot(v1.getDimension()*2, ((ComplexVector) v1).getCoordinates(), 1, ((ComplexVector) v2).getCoordinates(), 1);
    case BINARY:
      ((BinaryVector) v1).tallyVotes();
      ((BinaryVector) v2).tallyVotes();
      return v1.measureOverlap(v2); 
    default:
      return 0;

  }
}
 
Example #12
Source File: SemanticVectorSearcher.java    From uncc2014watsonsim with GNU General Public License v2.0 6 votes vote down vote up
public SemanticVectorSearcher(Environment env) {
	super(env);

	try {
		// How to use SemanticVectors comes from their Wiki.
		// The search function takes many arguments, which are what we are
		// storing as fields here.
		fconfig = FlagConfig.getFlagConfig(
				new String[]{"-luceneindexpath", env.getConfOrDie("lucene_index"),
						"-docvectorsfile", "data/semanticvectors/docvectors.bin",
						"-termvectorsfile", "data/semanticvectors/termvectors.bin"});
		queryVecReader =
				VectorStoreReader.openVectorStore(
						fconfig.termvectorsfile(), fconfig);
		resultsVecReader =
				VectorStoreReader.openVectorStore(
						fconfig.docvectorsfile(), fconfig);
		luceneUtils = new LuceneUtils(fconfig); 
	} catch (IOException e) {
		e.printStackTrace();
	}
	
	Score.register("SEMVEC_RANK", -1, Merge.Mean);
	Score.register("SEMVEC_SCORE", -1, Merge.Mean);
	Score.register("SEMVEC_PRESENT", 0.0, Merge.Sum);
}
 
Example #13
Source File: BeagleNGramBuilder.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static BeagleNGramBuilder getInstance(FlagConfig flagConfig)
{
     if(instance == null) {
        instance = new BeagleNGramBuilder(flagConfig);
     } else if (instance.flagConfig != flagConfig) {
       throw new IllegalArgumentException(
           "Trying to create instances with two different FlagConfig objects. This is not supported.");
     }
     return instance;
}
 
Example #14
Source File: BeagleNGramBuilder.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
protected BeagleNGramBuilder(FlagConfig flagConfig)
{
  this.flagConfig = flagConfig;
	utils = BeagleUtils.getInstance();
	utils.setNormal( 0.0f, (float)(Math.sqrt(1.0/(double)flagConfig.dimension())));

	phi = utils.generateColtRandomVector(flagConfig.dimension());
	Permute1 = utils.makeScrambledIntArray(flagConfig.dimension());
	Permute2 = utils.makeScrambledIntArray(flagConfig.dimension());
}
 
Example #15
Source File: TypeSpec.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns a vector appropriate for this value by interpolating endpoints.
 * Only for type {@link SupportedType#DOUBLE}.
 */
public Vector getDoubleValueVector(FlagConfig flagConfig, double value) {
 
 if (this.getType() != SupportedType.DOUBLE) {
    throw new IllegalArgumentException("Bad call to getDoubleValue.");
  }
  if (value < minDoubleValue || value > maxDoubleValue) {
    throw new IllegalArgumentException("Value out of bounds: " + value);
  }
  double doubleRange = maxDoubleValue - minDoubleValue;
  Vector result = VectorUtils.weightedSuperposition(minBookendVector, (value - minDoubleValue),
      maxBookendVector, (maxDoubleValue - value) );
  return result;
}
 
Example #16
Source File: MarkedUpDocumentAnalyzer.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static void main(String[] args) throws ParserConfigurationException, IOException, SAXException {
  FlagConfig flagConfig = FlagConfig.getFlagConfig(args);
  MarkedUpDocumentAnalyzer analyzer = new MarkedUpDocumentAnalyzer(flagConfig);

  Vector othelloVector = analyzer.getVectorForString(othello);
  Vector midsummerVector = analyzer.getVectorForString(midsummerNightsDream);
  Vector twelfthNightVector = analyzer.getVectorForString(twelfthNight);

  System.out.println("Structural similarity of Othello with A Midsummer Night's Dream:");
  System.out.println(othelloVector.measureOverlap(midsummerVector));
  System.out.println("Structural similarity of Othello with Twelfth Night:");
  System.out.println(twelfthNightVector.measureOverlap(othelloVector));
  System.out.println("Structural similarity of A Midsummer Night's Dream with Twelfth Night:");
  System.out.println(twelfthNightVector.measureOverlap(midsummerVector));
}
 
Example #17
Source File: MarkedUpDocumentAnalyzer.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public MarkedUpDocumentAnalyzer(FlagConfig flagConfig) {
  this.flagConfig = flagConfig;
  this.elementalVectors = new VectorStoreDeterministic(flagConfig);
  this.proportionVectors = new ProportionVectors(flagConfig);
  DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
  try {
    this.documentBuilder = dbFactory.newDocumentBuilder();
  } catch (ParserConfigurationException e) {
    e.printStackTrace();
  }
}
 
Example #18
Source File: TableTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Before
public void setUp() {
  columnNames = new String[] {"Name", "Start", "End"};
  dataRows = Arrays.asList(new String[][]{
      new String[]{"a", "1600", "1700"},
      new String[]{"b", "1600", "1800"},
      new String[]{"c", "1700", "1800"}
  });
  table = new Table(FlagConfig.getFlagConfig("-vectortype complex -dimension 1000 -seedlength 500".split(" ")), columnNames, dataRows);
}
 
Example #19
Source File: NumberRepresentationTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testAbsoluteValuesOfEndpointsDontMatter() {
  FlagConfig flagConfig = FlagConfig.getFlagConfig(
      new String[] {"-vectortype", "binary", "-dimension", "2048"});
  NumberRepresentation numberRepresentation = new NumberRepresentation(flagConfig);
  
  VectorStoreRAM vsr1 = numberRepresentation.getNumberVectors(0, 4);    
  VectorStoreRAM vsr2 = numberRepresentation.getNumberVectors(8, 12);
  assertEquals(1.0, vsr1.getVector(0).measureOverlap(vsr2.getVector(8)), TOL);
  assertEquals(1.0, vsr1.getVector(2).measureOverlap(vsr2.getVector(10)), TOL); 
}
 
Example #20
Source File: NumberRepresentationTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testBinaryVectorsChangeGradually() {
  FlagConfig flagConfig = FlagConfig.getFlagConfig(
      new String[] {"-vectortype", "binary", "-dimension", "2048"});
  NumberRepresentation numberRepresentation = new NumberRepresentation(flagConfig);
  
  VectorStoreRAM vsr = numberRepresentation.getNumberVectors(0, 4);
  assertEquals(0.75, vsr.getVector(0).measureOverlap(vsr.getVector(1)), TOL);
}
 
Example #21
Source File: BeagleTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void createNGrams(String fileOut, FlagConfig flagConfig, int numGrams )
{
	BeagleNGramVectors bngv;
	BeagleUtils utils = BeagleUtils.getInstance();

	long time;

	try
	{
		time = System.currentTimeMillis();

		bngv = new BeagleNGramVectors(
		    flagConfig, "index", 5, 2, new String[] {"contents"}, numGrams, "stoplist.txt");

		time = System.currentTimeMillis() - time;

		System.out.println("\nTime to process: " + time/1000 + " secs.");
		System.out.println("\nNumber of convolutions: " + utils.getNumConvolutions());

		VectorStoreWriter.writeVectors(
		    fileOut + "_" + flagConfig.dimension() + "_" + numGrams + ".bin", flagConfig, bngv);

		VectorStore indexVectors = bngv.getIndexVectors();
		VectorStoreWriter.writeVectors(
		    fileOut + "_" + flagConfig.dimension() + "_" + numGrams + "_index.bin", flagConfig, indexVectors);

		bngv = null;
		System.gc();
	}
	catch(Exception e)
	{
		e.printStackTrace();
	}
}
 
Example #22
Source File: BeagleTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param args
 */
public static void main(String[] args)
{
	BeagleTest bt = new BeagleTest();
  FlagConfig flagConfig = FlagConfig.getFlagConfig(
      new String[] {"-vectortype", "real", "-dimension", "512"});

	// Some example method calls
	bt.createNGrams("KJB", flagConfig, 3 );

	bt.testQuery(flagConfig, "KJB_512_3.bin", "KJB_512_3_index.bin", "king ?" );
}
 
Example #23
Source File: ThreadSafetyTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static void outputSuggestions(String query) throws Exception  {
  String[] args = new String[] {
      "-queryvectorfile", "termvectors.bin", "-numsearchresults", "10",
      "-luceneindexpath", "permutation_index",
      query };
  List<SearchResult> results = Search.runSearch(FlagConfig.getFlagConfig(args));

  if (results.size() > 0) {
    for (SearchResult result: results) {
      String suggestion = ((ObjectVector)result.getObjectVector()).getObject().toString();
      logger.finest("query:"+query + " suggestion:" + suggestion + " score:" + result.getScore());
    }
  }
}
 
Example #24
Source File: TableIndexer.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static void main(String[] args) throws IOException {
  FlagConfig flagConfig = null;
  try {
    flagConfig = FlagConfig.getFlagConfig(args);
    args = flagConfig.remainingArgs;
  } catch (IllegalArgumentException e) {
    System.err.println(usageMessage);
    throw e;
  }

  if (flagConfig.remainingArgs.length != 1) {
    throw new IllegalArgumentException("Wrong number of arguments after parsing command line flags.\n" + usageMessage);
  }

  VerbatimLogger.info("Building vector index of table in file: " + args[0] + "\n");
  BufferedReader fileReader = new BufferedReader(new FileReader(args[0]));
  String[] columnHeaders = fileReader.readLine().split(",");
  ArrayList<String[]> dataRows = new ArrayList<>();
  String dataLine;
  while((dataLine = fileReader.readLine()) != null) {
    String[] dataEntries = dataLine.split(",");
    if (dataEntries.length != columnHeaders.length) {
      throw new IllegalArgumentException(String.format(
          "Column headers have length %d and this row has length %d. This indicates a data error or a csv parsing error."
          + "\nColumn headers:%s\nData row: %s\n",
          columnHeaders.length, dataEntries.length,
          StringUtils.join(columnHeaders), StringUtils.join(dataEntries)));
    }
    dataRows.add(dataEntries);
  }
  fileReader.close();
  
  Table table = new Table(flagConfig, columnHeaders, dataRows);
  VectorStoreWriter.writeVectors(flagConfig.termvectorsfile(), flagConfig, table.getRowVectorStore());

  queryForSpecialValues(table);
  //queryForName(table, "J. Adams");
 // queryForName(table, "T. Roosevelt");
  
}
 
Example #25
Source File: SemanticVectorsManager.java    From seldon-server with Apache License 2.0 5 votes vote down vote up
private VectorStoreRAM createSVPeer(BufferedReader reader) throws IOException
{
	FlagConfig flagConfig = FlagConfig.getFlagConfig(null);
	String firstLine = reader.readLine();
	FlagConfig.mergeWriteableFlagsFromString(firstLine, flagConfig);
	VectorStoreRAM svstore = new VectorStoreRAM(flagConfig);
	VectorEnumerationText vectorEnumeration = new VectorEnumerationText(reader,flagConfig);
	while (vectorEnumeration.hasMoreElements()) {
		ObjectVector objectVector = vectorEnumeration.nextElement();
		svstore.putVector(objectVector.getObject(), objectVector.getVector());
	}
	return svstore;
}
 
Example #26
Source File: LuceneIndexFromTriples.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Index all text files under a directory. */
public static void main(String[] args) {
  String usage = "java pitt.search.lucene.LuceneIndexFromTriples [triples text file] ";
  if (args.length == 0) {
    System.err.println("Usage: " + usage);
    System.exit(1);
  }
  FlagConfig flagConfig = FlagConfig.getFlagConfig(args);
  // Allow for the specification of a directory to write the index to.
  if (flagConfig.luceneindexpath().length() > 0) {
    INDEX_DIR = FileSystems.getDefault().getPath(flagConfig.luceneindexpath());
  }
  if (Files.exists(INDEX_DIR)) {
     throw new IllegalArgumentException(
         "Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
  }

  try {
    // Create IndexWriter using WhiteSpaceAnalyzer without any stopword list.
    IndexWriterConfig writerConfig = new IndexWriterConfig(new WhitespaceAnalyzer());
    IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), writerConfig);

    final File triplesTextFile = new File(args[0]);
    if (!triplesTextFile.exists() || !triplesTextFile.canRead()) {
      writer.close();
      throw new IOException("Document file '" + triplesTextFile.getAbsolutePath() +
          "' does not exist or is not readable, please check the path");
    }

    System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
    indexDoc(writer, triplesTextFile);
    writer.close();       
  } catch (IOException e) {
    System.out.println(" caught a " + e.getClass() +
        "\n with message: " + e.getMessage());
  }
}
 
Example #27
Source File: AnalogyTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public AnalogyProcessor(FlagConfig flagConfig, VectorStoreRAM termVectors, String inLine, int threadno)
{
	this.flagConfig = flagConfig;
	this.termVectors = termVectors;
	this.inLine = inLine;
	this.threadno = threadno;

	
	}
 
Example #28
Source File: NumberRepresentation.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Initializes an instance of {@link NumberRepresentation} with its start and end vectors,
 * checking that these demarcator vectors are not too close together. 
 * 
 * Allows for the specification of a start and end seed, so mutually near-orthogonal sets
 * of demarcator vectors can be created
 * 
 * @param flagConfig Flag configuration, used in particular to control vectortype and dimension. 
 */
public NumberRepresentation(FlagConfig flagConfig, String startSeed, String endSeed) {
  if (flagConfig == null) throw new NullPointerException("flagConfig cannot be null");

  //enforce probabilistic normalization for binary vectors
  if (flagConfig.vectortype().equals(VectorType.BINARY))
  		BinaryVector.setNormalizationMethod(BinaryVector.NORMALIZE_METHOD.PROBABILISTIC);
  
  this.startRandomSeed = startSeed;
  this.endRandomSeed = endSeed;

  this.flagConfig = flagConfig;

  // Generate a vector for the lowest number and one for the highest and make sure they
  // have no significant overlap.
  Random random = new Random(Bobcat.asLong(startRandomSeed));
  vL = VectorFactory.generateRandomVector(
      flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength(), random);
  vL.normalize();

  random.setSeed(Bobcat.asLong(endRandomSeed));
  vR = VectorFactory.generateRandomVector(
      flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength(), random);
  vR.normalize();

  // Small routine to guarantee that end vector has low similarity with start vector.
  ArrayList<Vector> toOrthogonalize = new ArrayList<Vector>();
  toOrthogonalize.add(vL);
  toOrthogonalize.add(vR);
  VectorUtils.orthogonalizeVectors(toOrthogonalize);
}
 
Example #29
Source File: ProportionVectors.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Constructs an instance from the given flag config. */
public ProportionVectors(FlagConfig flagConfig) {
  Random random = new Random(randomSeed);
  while (true) {
    this.vectorStart = VectorFactory.generateRandomVector(
        flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength(), random);
    this.vectorEnd = VectorFactory.generateRandomVector(
        flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength(), random);
    if (this.vectorStart.measureOverlap(this.vectorEnd) < 0.1) break;
    VerbatimLogger.info("Bookend vectors too similar to each other ... repeating generation.\n");
  }
}
 
Example #30
Source File: VectorStoreOrthographicalTest.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void initAndRetrieveTest() {
  FlagConfig flagConfig = FlagConfig.getFlagConfig(null);
  VectorStoreOrthographical store = new VectorStoreOrthographical(flagConfig);
  Vector fooVector = store.getVector("foo");
  Vector fooVector2 = store.getVector("foo");
  Assert.assertEquals(1, fooVector.measureOverlap(fooVector2), TOL);

  Vector footVector = store.getVector("foot");
  Assert.assertTrue(1 > fooVector.measureOverlap(footVector));

  Vector barVector = store.getVector("bar");
  Assert.assertTrue(fooVector.measureOverlap(barVector) < fooVector.measureOverlap(footVector));

}