Python bsddb.btopen() Examples

The following are 6 code examples of bsddb.btopen(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bsddb , or try the search function

Example #1

Source File: knowledge_resource.py From TaxoRL with MIT License

5 votes

def __init__(self, resource_prefix):
        """
        Init the knowledge resource
        :param resource_prefix - the resource directory and file prefix
        """
        self.term_to_id = bsddb.btopen(resource_prefix + '_term_to_id.db', 'r')
        self.id_to_term = bsddb.btopen(resource_prefix + '_id_to_term.db', 'r')
        self.path_to_id = bsddb.btopen(resource_prefix + '_path_to_id.db', 'r')
        self.id_to_path = bsddb.btopen(resource_prefix + '_id_to_path.db', 'r')
        self.l2r_edges = bsddb.btopen(resource_prefix + '_l2r.db', 'r')

Example #2

Source File: create_resource_from_corpus_2.py From TaxoRL with MIT License

5 votes

def main():
    """
    Creates a "knowledge resource" from triplets file
    """

    # Get the arguments
    args = docopt("""Creates a knowledge resource from triplets file. Second step, uses the resource files
    already created and converts the textual triplet file to a triplet file with IDs.

    Usage:
        create_resource_from_corpus_2.py <triplet_file> <resource_prefix>

        <triplet_file> = a file containing the text triplets, formated as X\tY\tpath.
        You can run this script on multiple portions of the triplet file at once and concatenate the output.
        <resource_prefix> = the file names' prefix for the resource files
    """)

    triplet_file = args['<triplet_file>']
    resource_prefix = args['<resource_prefix>']

    # Load the resource DBs
    term_to_id_db = bsddb.btopen(resource_prefix + '_term_to_id.db')
    path_to_id_db = bsddb.btopen(resource_prefix + '_path_to_id.db')

    with codecs.open(triplet_file) as f_in:
        with codecs.open(triplet_file + '_id', 'w') as f_out:
            for line in f_in:
                try:
                    x, y, path = line.strip().split('\t')
                except:
                    print line
                    continue

                # Frequent path
                x_id, y_id, path_id = term_to_id_db[x], term_to_id_db[y], path_to_id_db.get(path, -1)
                if path_id != -1:
                    print >> f_out, '\t'.join(map(str, (x_id, y_id, path_id)))

Example #3

Source File: test_bsddb.py From medicare-demo with Apache License 2.0

5 votes

def test_keyordering(self):
        if self.openmethod[0] is not bsddb.btopen:
            return
        keys = self.d.keys()
        keys.sort()
        self.assertEqual(self.f.first()[0], keys[0])
        self.assertEqual(self.f.next()[0], keys[1])
        self.assertEqual(self.f.last()[0], keys[-1])
        self.assertEqual(self.f.previous()[0], keys[-2])
        self.assertEqual(list(self.f), keys)

Example #4

Source File: bdb.py From yumbootstrap with GNU General Public License v3.0

5 votes

def db_dump(filename, outfile = sys.stdout):
  try:
    f = bsddb.hashopen(filename, 'r')
    db_type = "hash"
  except:
    f = bsddb.btopen(filename, 'r')
    db_type = "btree"

  outfile.write("VERSION=3\n") # magic
  outfile.write("format=bytevalue\n")
  outfile.write("type=%s\n" % (db_type))

  outfile.write("HEADER=END\n")
  for (key,value) in f.iteritems():
    outfile.write(" ")
    for c in key:
      outfile.write("%02x" % ord(c))
    outfile.write("\n")

    outfile.write(" ")
    for c in value:
      outfile.write("%02x" % ord(c))
    outfile.write("\n")
  outfile.write("DATA=END\n")

#-----------------------------------------------------------------------------
# vim:ft=python

Example #5

Source File: create_resource_from_corpus_3.py From TaxoRL with MIT License

4 votes

def main():
    """
    Creates a "knowledge resource" from triplets file
    """

    # Get the arguments
    args = docopt("""Creates a knowledge resource from triplets file. Third step, uses the ID-based triplet file
    and converts it to the '_l2r.db' file.

    Usage:
        create_resource_from_corpus_3.py <id_triplet_file> <resource_prefix>

        <id_triplet_file> = a file containing the int triplets, formated as X_id\tY_id\tpath_id\tcount, where
        count is the number of times X and Y occurred together in this path. You can obtain such a file by
        counting the number of occurrences of each line in the file produced by the second step, e.g.:
        awk '{i[$0]++} END{for(x in i){print x"\t"i[x]}}' triplet_file > id_triplet_file

        If you split the files in the second step, apply this command to each one of them, and then sum them up, e.g.:
        for each i, run: awk '{i[$0]++} END{for(x in i){print x"\t"i[x]}}' triplet_file_i > id_triplet_file_i
        cat id_triplet_file_* > id_triplet_file_temp

        Then, run: awk -F$'\t' '{i[$1,"\t",$2,"\t",$3]+=$4} END{for(x in i){print x"\t"i[x]}}' id_triplet_file_temp > id_triplet_file

        <resource_prefix> = the file names' prefix for the resource files
    """)

    id_triplet_file = args['<id_triplet_file>']
    resource_prefix = args['<resource_prefix>']

    l2r_db = bsddb.btopen(resource_prefix + '_l2r.db', 'c')
    l2r_dict = defaultdict(str)

    with codecs.open(id_triplet_file) as f_in:
        for ct, line in tqdm(enumerate(f_in)):
            try:
                x, y, path, count = line.strip().split('\t')
            except:
                print line
                continue

            key = '%s###%s' % (x, y)
            current = '%s:%s' % (path, count)
            # previous = l2r_db.get(key, '')
            l2r_dict[key] += current + ','

            # if previous != '':
            #     previous += ','

            # l2r_db[key] = previous + current
            # if ct % 1000000 == 0:
            #     l2r_db.sync()
    for k, v in l2r_dict.items():
        l2r_db[k] = v.rstrip(',')

    l2r_db.sync()

Example #6

Source File: manager.py From scraplat with Do What The F*ck You Want To Public License

4 votes

def __init_dbd__(self):
        print "prepare to initial the bdb"
        print "[#] Check if the db exist"
        if os.path.exists("all_sites.db"):
            os.remove('all_sites.db')

        if os.path.exists('visited.db'):
            os.remove('visited.db')
        print "[#] Cleared the db"
            
        """
>>> import bsddb
>>> db = bsddb.btopen('spam.db', 'c')
>>> for i in range(10): db['%d'%i] = '%d'% (i*i)
...
>>> db['3']
'9'
>>> db.keys()
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
>>> db.first()
('0', '0')
>>> db.next()
('1', '1')
>>> db.last()
('9', '81')
>>> db.set_location('2')
('2', '4')
>>> db.previous()
('1', '1')
>>> for k, v in db.iteritems():
...     print k, v
0 0
1 1
2 4
3 9
4 16
5 25
6 36
7 49
8 64
9 81
>>> '8' in db
True
>>> db.sync()
0

        """
        try:
            self.all_sites  = bsddb.btopen(file = 'all_sites.db',   flag = 'c')
            self.visited    = bsddb.btopen(file = 'visited.db',     flag = 'c') 
            print "[*]Success init BDB"
        except:
            print "[!]Bad ! Can't create BDB!"