Python dataset.connect() Examples

The following are 30 code examples of dataset.connect(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module dataset , or try the search function .
Example #1
Source File: cli.py    From twick with MIT License 7 votes vote down vote up
def add_shared_args(parser):
    parser.add_argument("query")
    parser.add_argument("--db",
        type=dataset.connect,
        help="SQLAlchemy connection string. Default: " + DEFAULT_DB,
        default=DEFAULT_DB)
    parser.add_argument("--throttle",
        type=int,
        default=15,
        help="""Wait X seconds between requests.
        Default: 15 (to stay under rate limits)""")
    parser.add_argument("--credentials",
        nargs=4,
        help="""
            Four space-separated strings for {}.
            Defaults to environment variables by those names.
        """.format(", ".join(CREDENTIAL_NAMES))
    )
    parser.add_argument("--store-raw",
        help="Store raw tweet JSON, in addition to excerpted fields.",
        action="store_true")
    parser.add_argument("--quiet",
        help="Silence logging.",
        action="store_true") 
Example #2
Source File: will.py    From W.I.L.L with MIT License 6 votes vote down vote up
def start(self):
        """
        Initialize db and session monitor thread

        """
        global db
        log.info(":SYS:Starting W.I.L.L")
        db_url = self.configuration_data["db_url"]
        log.info(":SYS:Connecting to database")
        db = dataset.connect(db_url, engine_kwargs={"pool_recycle": 1})
        core.db = db
        API.db = db
        web.db = db
        start_time = self.now.strftime("%I:%M %p %A %m/%d/%Y")
        web.start_time = start_time
        log.info(":SYS:Starting W.I.L.L core")
        core.initialize(db)
        log.info(":SYS:Starting sessions parsing thread")
        core.sessions_monitor(db)
        log.info(":SYS:W.I.L.L started") 
Example #3
Source File: utils.py    From collect-social with MIT License 6 votes vote down vote up
def setup_db(connection_string):
    db = dataset.connect(connection_string)

    pages = db['page']
    users = db['user']
    posts = db['post']
    comments = db['comment']
    interactions = db['interaction']

    users.create_index(['user_id'])
    posts.create_index(['post_id'])
    comments.create_index(['comment_id'])
    comments.create_index(['post_id'])
    interactions.create_index(['comment_id'])
    interactions.create_index(['post_id'])
    interactions.create_index(['user_id'])

    return db 
Example #4
Source File: data_process.py    From isdi with MIT License 6 votes vote down vote up
def create_app_info_dict():
    dlist = []
    conn = dataset.connect(config.APP_INFO_SQLITE_FILE)
    print("Creating app-info dict")
    for k, v in config.source_files.items():
        d = pd.read_csv(v, index_col='appId')

        d['store'] = k

        if 'permissions' not in d.columns:
            print(k, v, d.columns)
            d.assign(permissions=["<not recorded>"]*len(d))
        d.columns = d.columns.str.lower().str.replace(' ', '-').str.replace('-', '_')
        dlist.append(d)
    pd.concat(dlist).to_sql('apps', conn.engine, if_exists='replace')
    conn.engine.execute('create index idx_appId on apps(appId)') 
Example #5
Source File: utils.py    From collect-social with MIT License 6 votes vote down vote up
def setup_db(connection_string):
    db = dataset.connect(connection_string)

    connections = db['connection']
    users = db['user']
    tweets = db['tweet']
    medias = db['media']
    mentions = db['mention']
    urls = db['url']
    hashtags = db['hashtag']

    tweets.create_index(['tweet_id'])
    medias.create_index(['tweet_id'])
    mentions.create_index(['user_id'])
    mentions.create_index(['mentioned_user_id'])
    urls.create_index(['tweet_id'])
    hashtags.create_index(['tweet_id'])
    users.create_index(['user_id'])
    connections.create_index(['friend_id'])
    connections.create_index(['follower_id'])

    return db 
Example #6
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def all_complementary(self):
        """Get all entries of the complementary information table as a list.
        The order is undefined.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return list(db[self.complementary_table_name].all()) 
Example #7
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, url, result_table="results", complementary_table="complementary", space_table="space"):
        super(SQLiteConnection, self).__init__()
        if url.endswith("/"):
            raise RuntimeError("Empty database name {}".format(url))

        if url.endswith((" ", "\t")):
            raise RuntimeError("Database name ends with space {}".format(url))

        if not url.startswith("sqlite://"):
            raise RuntimeError("Missing 'sqlite:///' at the begin of url".format(url))

        if url == "sqlite://" or url == "sqlite:///:memory:":
            raise RuntimeError("Cannot use memory database as it exists only for the time of the connection")

        match = re.search("sqlite:///(.*)", url)
        if match is not None:
            db_path = match.group(1)
        else:
            raise RuntimeError("Cannot find sqlite db path in {}".format(url))

        self.url = url
        self.result_table_name = result_table
        self.complementary_table_name = complementary_table
        self.space_table_name = space_table

        self._lock = filelock.FileLock("{}.lock".format(db_path))
        self.hold_lock = False

        # with self.lock():
        #     db = dataset.connect(self.url)

        #     # Initialize a result table and ensure float for loss
        #     results = db[self.result_table_name]
        #     results.create_column("_loss", sqlalchemy.Float) 
Example #8
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def all_results(self):
        """Get a list of all entries of the result table. The order is
        undefined.
        """
        # Only way to ensure old db instances are closed is to force garbage collection
        # See dataset note : https://dataset.readthedocs.io/en/latest/api.html#notes
        gc.collect()
        db = dataset.connect(self.url)
        return list(db[self.result_table_name].all()) 
Example #9
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def find_results(self, filter):
        """Get a list of all results associated with *filter*. The order is
        undefined.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return list(db[self.result_table_name].find(**filter)) 
Example #10
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def insert_result(self, document):
        """Insert a new *document* in the result table. The columns must not
        be defined nor all present. Any new column will be added to the
        database and any missing column will get value None.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return db[self.result_table_name].insert(document) 
Example #11
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def update_result(self, filter, values):
        """Update or add *values* of given rows in the result table.

        Args:
            filter: An identifier of the rows to update.
            values: A mapping of values to update or add.
        """
        gc.collect()
        filter = filter.copy()
        keys = list(filter.keys())
        filter.update(values)
        db = dataset.connect(self.url)
        return db[self.result_table_name].update(filter, keys) 
Example #12
Source File: AlarmDb.py    From LightUpPi-Alarm with MIT License 5 votes vote down vote up
def __connect_alarms(self):
        """ Connecting to a SQLite database table 'alarms'. """
        alarms_table = dataset.connect(self.db_file)['alarms']
        return alarms_table 
Example #13
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def insert_complementary(self, document):
        """Insert a new document (row) in the complementary information table.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return db[self.complementary_table_name].insert(document) 
Example #14
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def find_complementary(self, filter):
        """Find a document (row) from the complementary information table.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return db[self.complementary_table_name].find_one(**filter) 
Example #15
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_space(self):
        """Returns the space used for previous experiments.

        Raises:
            AssertionError: If there are more than one space in the database.
        """
        gc.collect()
        db = dataset.connect(self.url)
        entry_count = db[self.space_table_name].count()
        if entry_count == 0:
            return None

        assert entry_count == 1, "Space table unexpectedly contains more than one space."
        return pickle.loads(db[self.space_table_name].find_one()["space"]) 
Example #16
Source File: sqlite.py    From chocolate with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def insert_space(self, space):
        """Insert a space in the database.

        Raises:
            AssertionError: If a space is already present in the database.
        """
        gc.collect()
        db = dataset.connect(self.url)
        assert db[self.space_table_name].count() == 0, ("Space table cannot contain more than one space, "
                                                        "clear table first.")
        return db[self.space_table_name].insert({"space": pickle.dumps(space)}) 
Example #17
Source File: core.py    From memorious with MIT License 5 votes vote down vote up
def load_datastore():
    if not hasattr(settings, '_datastore'):
        # do not pool connections for the datastore
        engine_kwargs = {'poolclass': NullPool}
        settings._datastore = dataset.connect(settings.DATASTORE_URI,
                                              engine_kwargs=engine_kwargs)
        # Use bigint to store integers by default
        settings._datastore.types.integer = settings._datastore.types.bigint
    return settings._datastore 
Example #18
Source File: db.py    From discord-roombot with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, connection_string): 
            db = dataset.connect(connection_string)
            self.rooms = db.get_table('rooms', primary_id='role_id', primary_type=db.types.bigint)
            self.settings = db.get_table('settings', primary_id='guild_id', primary_type=db.types.bigint)
            self.invites = db.get_table('invites') 
Example #19
Source File: AlarmDb.py    From LightUpPi-Alarm with MIT License 5 votes vote down vote up
def __connect_settings(self):
        """ Connecting to a SQLite database table 'settings'. """
        settings_table = dataset.connect(self.db_file)['settings']
        return settings_table

    #
    # member functions to set settings
    # 
Example #20
Source File: Data_Grabber.py    From Crypto_Trader with MIT License 5 votes vote down vote up
def __init__(self):
        """"""
        self.exchange = ccxt.poloniex()
        self.exchange.load_markets()
        self.delay_seconds = self.exchange.rateLimit / 1000
        self.symbols = self.exchange.markets
        self.timeframe = '1d'
        self.db_url = 'sqlite:///databases/market_prices.db'
        self.deques = dict()
        self.ohlcv = dict()
        self.database = dataset.connect(self.db_url)
        ensure_dir('databases')
        for symbol in self.symbols:
            if self.exchange.has['fetchOHLCV']:
                print('Obtaining OHLCV data')
                data = self.exchange.fetch_ohlcv(symbol, self.timeframe)
                data = list(zip(*data))
                data[0] = [datetime.datetime.fromtimestamp(ms / 1000)
                           for ms in data[0]]
                self.ohlcv[symbol] = data
                time.sleep(self.delay_seconds)
            else:
                print('No OHLCV data available')
            self.deques[symbol] = deque()
            if len(self.database[symbol]):
                for e in self.database[symbol]:
                    entry = (e['bid'], e['ask'], e['spread'], e['time'])
                    self.deques[symbol].append(entry)
        del self.database
        self.thread = threading.Thread(target=self.__update)
        self.thread.daemon = True
        self.thread.start() 
Example #21
Source File: main.py    From W.I.L.L with MIT License 5 votes vote down vote up
def help(bot, update):
    '''Echo the help string'''
    bot.sendMessage(update.message.chat_id, help_str)

#def socket_io_thread(bot,session_id, chat_id ):
#    socketIO = SocketIO(SERVER_URL, 80)
#    log.info("In socket_io thread")
#    socketIO.on('connect', lambda: socketIO.emit("get_updates", session_id))
#    socketIO.on('update', lambda x: bot.sendMessage(chat_id, (x["value"])))
#    socketIO.on('disconnect', lambda x: bot.sendMessage(chat_id, "Update server has disconnected"))
#    socketIO.on('debug', lambda x: log.info("Got debug message {0} from socketIO".format(x["value"])))
#    socketIO.wait() 
Example #22
Source File: get_profiles.py    From collect-social with MIT License 5 votes vote down vote up
def run(consumer_key, consumer_secret, access_key, access_secret,
        connection_string):

    db = dataset.connect(connection_string)
    api = get_api(consumer_key, consumer_secret, access_key, access_secret)

    user_table = db['user']
    users = user_table.find(user_table.table.columns.user_id != 0,
                            profile_collected=0)
    users = [u for u in users]

    if len(users) == 0:
        print('No users without profiles')
        return None

    ids_to_lookup = []
    for user in users:
        ids_to_lookup.append(user['user_id'])
        if len(ids_to_lookup) == 100:
            print('Getting profiles')
            profiles = get_profiles(api, user_ids=ids_to_lookup)
            print('Updating 100 profiles')
            upsert_profiles(db, profiles)
            ids_to_lookup = []
            print('Sleeping, timestamp: ' + str(datetime.now()))
            time.sleep(5)

    print('Getting profiles')
    profiles = get_profiles(api, user_ids=ids_to_lookup)
    print('Updating ' + str(len(ids_to_lookup)) + ' profiles')
    upsert_profiles(db, profiles)

    print('Finished getting profiles') 
Example #23
Source File: get_friends.py    From collect-social with MIT License 5 votes vote down vote up
def run(consumer_key, consumer_secret, access_key, access_secret,
        connection_string, threshold=5000, seed_only=True):

    db = dataset.connect(connection_string)
    api = get_api(consumer_key, consumer_secret, access_key, access_secret)

    if seed_only:
        is_seed = 1
    else:
        is_seed = 0

    user_table = db['user']
    users = user_table.find(user_table.table.columns.friends_count < threshold,
                            friends_collected=0, is_seed=is_seed)
    users = [u for u in users]
    all_users = len(users)
    remaining = all_users

    for u in users:
        try:
            print('Getting friend ids for ' + u['screen_name'])
            next, prev, friend_ids = get_friend_ids(
                api, screen_name=u['screen_name'])

            print('Adding ' + str(len(friend_ids)) + ' user ids to db')
            insert_if_missing(db, user_ids=friend_ids)

            print('Creating relationships for ' + str(u['user_id']))
            create_connections(db, u['user_id'], friend_ids=friend_ids)

            update_dict = dict(id=u['id'], friends_collected=1)
            user_table.update(update_dict, ['id'])

            # Can only make 15 calls in a 15 minute window to this endpoint
            remaining -= 1
            time_left = remaining / 60.0
            print(str(time_left) + ' hours to go')
            print('Sleeping for 1 minute, timestamp: ' + str(datetime.now()))
            time.sleep(60)
        except:
            continue 
Example #24
Source File: cli.py    From collectors with MIT License 5 votes vote down vote up
def cli(argv):
    # Prepare conf dict
    conf = helpers.get_variables(config, str.isupper)

    # Prepare conn dict
    conn = {
        'warehouse': dataset.connect(config.WAREHOUSE_URL),
    }

    # Get and call collector
    collect = importlib.import_module('collectors.%s' % argv[1]).collect
    collect(conf, conn, *argv[2:]) 
Example #25
Source File: pipelines.py    From collectors with MIT License 5 votes vote down vote up
def open_spider(self, spider):
        if spider.conf and spider.conn:
            self.__conf = spider.conf
            self.__conn = spider.conn
        else:
            # For runs trigered by scrapy CLI utility
            self.__conf = helpers.get_variables(config, str.isupper)
            self.__conn = {'warehouse': dataset.connect(config.WAREHOUSE_URL)} 
Example #26
Source File: conftest.py    From collectors with MIT License 5 votes vote down vote up
def conn():
    warehouse = dataset.connect(config.WAREHOUSE_URL)
    for table in warehouse.tables:
        warehouse[table].delete()
    return {'warehouse': warehouse} 
Example #27
Source File: github_v3_spider_async_parent_class_complex.py    From ruia with Apache License 2.0 5 votes vote down vote up
def start(cls):
        cls.db = dataset.connect(cls.database_url)
        super().start() 
Example #28
Source File: Data_Grabber.py    From Crypto_Trader with MIT License 5 votes vote down vote up
def __update(self):
        """
        https://github.com/ccxt-dev/ccxt/wiki/Manual#market-price
        """
        self.database = dataset.connect(self.db_url)
        while True:
            for symbol in self.symbols:

                start_time = time.clock()
                orders = self.exchange.fetch_order_book(symbol)
                bid = orders['bids'][0][0] if len(orders['bids']) > 0 else None
                ask = orders['asks'][0][0] if len(orders['asks']) > 0 else None
                spread = (ask - bid) if (bid and ask) else None
                dtime = datetime.datetime.now()
                self.deques[symbol].append((bid, ask, spread, dtime))
                self.database.begin()
                try:
                    self.database[symbol].insert({
                        'ask': ask,
                        'bid': bid,
                        'spread': spread,
                        'time': dtime
                    })
                    self.database.commit()
                except:
                    self.database.rollback()

                time.sleep(self.delay_seconds - (time.clock() - start_time)) 
Example #29
Source File: __init__.py    From CTFd with Apache License 2.0 4 votes vote down vote up
def export_ctf():
    # TODO: For some unknown reason dataset is only able to see alembic_version during tests.
    # Even using a real sqlite database. This makes this test impossible to pass in sqlite.
    db = dataset.connect(get_app_config("SQLALCHEMY_DATABASE_URI"))

    # Backup database
    backup = tempfile.NamedTemporaryFile()

    backup_zip = zipfile.ZipFile(backup, "w")

    tables = db.tables
    for table in tables:
        result = db[table].all()
        result_file = BytesIO()
        freeze_export(result, fileobj=result_file)
        result_file.seek(0)
        backup_zip.writestr("db/{}.json".format(table), result_file.read())

    # # Guarantee that alembic_version is saved into the export
    if "alembic_version" not in tables:
        result = {
            "count": 1,
            "results": [{"version_num": get_current_revision()}],
            "meta": {},
        }
        result_file = BytesIO()
        json.dump(result, result_file)
        result_file.seek(0)
        backup_zip.writestr("db/alembic_version.json", result_file.read())

    # Backup uploads
    uploader = get_uploader()
    uploader.sync()

    upload_folder = os.path.join(
        os.path.normpath(app.root_path), app.config.get("UPLOAD_FOLDER")
    )
    for root, dirs, files in os.walk(upload_folder):
        for file in files:
            parent_dir = os.path.basename(root)
            backup_zip.write(
                os.path.join(root, file),
                arcname=os.path.join("uploads", parent_dir, file),
            )

    backup_zip.close()
    backup.seek(0)
    return backup 
Example #30
Source File: dataset_backend.py    From design-patterns with MIT License 4 votes vote down vote up
def connect_to_db(db_name=None, db_engine="sqlite"):
    """Connect to a database. Create the database if there isn't one yet.

    The database can be a SQLite DB (either a DB file or an in-memory DB), or a
    PostgreSQL DB. In order to connect to a PostgreSQL DB you have first to
    create a database, create a user, and finally grant him all necessary
    privileges on that database and tables.
    'postgresql://<username>:<password>@localhost:<PostgreSQL port>/<db name>'
    Note: at the moment it looks it's not possible to close a connection
    manually (e.g. like calling conn.close() in sqlite3).


    Parameters
    ----------
    db_name : str or None
        database name (without file extension .db)
    db_engine : str
        database engine ('sqlite' or 'postgres')

    Returns
    -------
    dataset.persistence.database.Database
        connection to a database
    """
    engines = {"sqlite", "postgres"}
    if db_name is None:
        db_string = "sqlite:///:memory:"
        print("New connection to in-memory SQLite DB...")
    else:
        if db_engine == "sqlite":
            db_string = "sqlite:///{}.db".format(DB_name)
            print("New connection to SQLite DB...")
        elif db_engine == "postgres":
            db_string = "postgresql://test_user:test_password@localhost:5432/testdb"
            # db_string = \
            #     'postgresql://test_user2:test_password2@localhost:5432/testdb'
            print("New connection to PostgreSQL DB...")
        else:
            raise UnsupportedDatabaseEngine(
                "No database engine with this name. "
                "Choose one of the following: {}".format(engines)
            )

    return dataset.connect(db_string)