Python config.db() Examples

The following are 30 code examples of config.db(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module config , or try the search function .
Example #1
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 6 votes vote down vote up
def get_urls(self):
    # fills in URLs for papers that are for some reason missing them. Determines URLs
    # by resolving the DOI.
    self.log.record('Fetching URLs for papers without them', 'info')
    to_save = []
    with self.connection.db.cursor() as cursor:
      cursor.execute(f"SELECT id, doi FROM {config.db['schema']}.articles WHERE url IS NULL OR url='';")
      for x in cursor:
        try:
          r = requests.get(f"https://doi.org/{x[1]}", timeout=10)
        except Exception as e:
          self.log.record(f'Problem resolving DOI: {e}', 'error')
          continue
        if r.status_code != 200:
          self.log.record(f"Got weird status code resolving DOI {x[1]}: {r.status_code}", "error")
          continue
        to_save.append((r.url, x[0]))
        self.log.record(f'Found URL for {x[0]}: {r.url}', 'debug')
    with self.connection.db.cursor() as cursor:
      self.log.record(f'Saving {len(to_save)} URLS.', 'info')
      cursor.executemany(f"UPDATE {config.db['schema']}.articles SET url=%s WHERE id=%s;", to_save) 
Example #2
Source File: db.py    From rxivist with GNU Affero General Public License v3.0 6 votes vote down vote up
def __init__(self, host, db, user, password):
    dbname = db
    self.db = None
    self._ensure_database_exists(dbname, host, user, password)

    self.db = psycopg2.connect(
      host=host,
      dbname=dbname,
      user=user,
      password=password,
      options=f'-c search_path={config.db["schema"]}'
    )
    self.db.set_session(autocommit=True)
    self.cursor = self.db.cursor()

    self._ensure_tables_exist() 
Example #3
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 6 votes vote down vote up
def fill_in_author_vectors(self):
    self.log.record("Filling in empty author_vector fields.", 'debug')
    article_ids = []
    with self.connection.db.cursor() as cursor:
      cursor.execute("SELECT id FROM articles WHERE author_vector IS NULL;")
      for record in cursor:
        if len(record) > 0:
          article_ids.append(record[0])

    to_do = len(article_ids)
    if to_do > 0:
      self.log.record(f"Obtained {to_do} article IDs.", 'debug')
    with self.connection.db.cursor() as cursor:
      for article in article_ids:
        author_string = ""
        cursor.execute("SELECT authors.name FROM article_authors as aa INNER JOIN authors ON authors.id=aa.author WHERE aa.article=%s;", (article,))
        for record in cursor:
          author_string += f"{record[0]}, "
        cursor.execute(f"UPDATE {config.db['schema']}.articles SET author_vector=to_tsvector(coalesce(%s,'')) WHERE id=%s;", (author_string, article))
        to_do -= 1
        # if to_do % 100 == 0:
        #   self.log.record(f"{datetime.now()} - {to_do} left to go.", 'debug') 
Example #4
Source File: init.py    From sentinel with MIT License 6 votes vote down vote up
def is_database_correctly_configured():
    import peewee
    import config

    configured = False

    cannot_connect_message = "Cannot connect to database. Please ensure database service is running and user access is properly configured in 'sentinel.conf'."

    try:
        db = config.db
        db.connect()
        configured = True
    except (peewee.ImproperlyConfigured, peewee.OperationalError, ImportError) as e:
        print("[error]: %s" % e)
        print(cannot_connect_message)
        sys.exit(1)

    return configured 
Example #5
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 6 votes vote down vote up
def activate_tables(self, table):
    self.log.record(f"Activating tables for {table}", 'debug')
    queries = [
      f"ALTER TABLE {table} RENAME TO {table}_temp",
      f"ALTER TABLE {table}_working RENAME TO {table}",
      f"ALTER TABLE {table}_temp RENAME TO {table}_working"
    ]
    to_delete = f"{table}_working.csv"
    with self.connection.db.cursor() as cursor:
      for query in queries:
        cursor.execute(query)
    if config.delete_csv == True:
      self.log.record(f"Deleting {to_delete}", 'debug')
      try:
        os.remove(to_delete)
      except Exception as e:
        if to_delete not in [ # HACK These aren't there on the last loop
          'category_ranks_working.csv',
          'author_ranks_category_working.csv'
        ]:
          self.log.record(f"Problem deleting {to_delete}: {e}", "warn") 
Example #6
Source File: models.py    From sentinel with MIT License 6 votes vote down vote up
def check_db_schema_version():
    """ Ensure DB schema is correct version. Drop tables if not. """
    db_schema_version = None

    try:
        db_schema_version = Setting.get(Setting.name == 'DB_SCHEMA_VERSION').value
    except (peewee.OperationalError, peewee.DoesNotExist, peewee.ProgrammingError) as e:
        printdbg("[info]: Can't get DB_SCHEMA_VERSION...")

    printdbg("[info]: SCHEMA_VERSION (code) = [%s]" % SCHEMA_VERSION)
    printdbg("[info]: DB_SCHEMA_VERSION = [%s]" % db_schema_version)
    if (SCHEMA_VERSION != db_schema_version):
        printdbg("[info]: Schema version mis-match. Syncing tables.")
        try:
            existing_table_names = db.get_tables()
            existing_models = [m for m in db_models() if m._meta.db_table in existing_table_names]
            if (existing_models):
                printdbg("[info]: Dropping tables...")
                db.drop_tables(existing_models, safe=False, cascade=False)
        except (peewee.InternalError, peewee.OperationalError, peewee.ProgrammingError) as e:
            print("[error] Could not drop tables: %s" % e) 
Example #7
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _rank_articles_categories(self, category):
    self.log.record(f"Ranking papers by popularity in category {category}...", 'debug')
    with self.connection.db.cursor() as cursor:
      query = """
        SELECT t.article, SUM(t.pdf) as downloads
        FROM article_traffic AS t
        INNER JOIN articles AS a ON t.article=a.id
        WHERE a.collection=%s
        GROUP BY t.article
        ORDER BY downloads DESC
      """
      cursor.execute(query, (category,))
      params = [(record[0], rank) for rank, record in enumerate(cursor, start=1)]
    record_ranks_file(params, "category_ranks_working") 
Example #8
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def fetch_abstracts(self):
    with self.connection.db.cursor() as cursor:
      # find abstracts for any articles without them
      cursor.execute(f"SELECT id, url FROM {config.db['schema']}.articles WHERE abstract IS NULL OR abstract='';")
      for article in cursor:
        url = article[1]
        article_id = article[0]
        try:
          abstract = self.get_article_abstract(url)
          self.update_article(article_id, abstract)
        except ValueError as e:
          self.log.record(f"Error retrieving abstract for {article[1]}: {e}", "error") 
Example #9
Source File: xinling.py    From cc98 with MIT License 5 votes vote down vote up
def createTable(boardid, big=""):
    """
    建表函数 需要传入板块id 和 大表前缀("big"或""),尝试进行建表sql语句,忽视错误(如表已经存在)
    :param boardid: 板块id,如"100"
    :param big: 传入空字符串表示普通表如bbs_100,传入"big"表示历史大表 如bigbbs_100
    :return:
    """
    sql = """
CREATE TABLE `{big}bbs_{boardid}` (
  `id` int(11) NOT NULL,
  `lc` int(255) NOT NULL,
  `posttime` datetime NOT NULL,
  `edittime` datetime NOT NULL,
  `user` varchar(66) NOT NULL,
  `content` longtext NOT NULL,
  `gettime` datetime NOT NULL,
  PRIMARY KEY (`id`,`lc`,`edittime`,`posttime`,`user`),
  KEY `a1` (`posttime`),
  KEY `a2` (`user`),
  KEY `a3` (`gettime`),
  KEY `a4` (`id`),
  KEY `a5` (`lc`),
  KEY `a6` (`edittime`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;
""".format(big=big, boardid=boardid)
    global conn
    conn = db()  # 强制重新与数据库重新连接 TODO: 是否有必要?
    cur = conn.cursor()
    try:
        cur.execute(sql)
        conn.commit()
    except:
        pass
    conn = db() 
Example #10
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def save_article_stats(self, article_id, stats):
    # First, delete the most recently fetched month, because it was probably recorded before
    # that month was complete:
    with self.connection.db.cursor() as cursor:
      cursor.execute("SELECT MAX(year) FROM article_traffic WHERE article=%s;", (article_id,))
      max_year = cursor.fetchone()
    if max_year is not None and len(max_year) > 0:
      max_year = max_year[0]
      with self.connection.db.cursor() as cursor:
        cursor.execute("SELECT MAX(month) FROM article_traffic WHERE year = %s AND article=%s;", (max_year, article_id))
        month = cursor.fetchone()
        if month is not None and len(month) > 0:
          cursor.execute("DELETE FROM article_traffic WHERE year = %s AND month = %s AND article=%s", (max_year, month[0], article_id))

    with self.connection.db.cursor() as cursor:
      # we check for which ones are already recorded because
      # the postgres UPSERT feature is bananas
      cursor.execute("SELECT month, year FROM article_traffic WHERE article=%s", (article_id,))
      # associate each year with which months are already recorded
      done = defaultdict(list)
      for record in cursor:
        done[record[1]].append(record[0])
      # make a list that excludes the records we already know about
      to_record = []
      for i, record in enumerate(stats):
        month = record[0]
        year = record[1]
        if year not in done.keys() or month not in done[year]:
          to_record.append(record)
      # save the remaining ones in the DB
      sql = f"INSERT INTO {config.db['schema']}.article_traffic (article, month, year, abstract, pdf) VALUES (%s, %s, %s, %s, %s);"
      params = [(article_id, x[0], x[1], x[2], x[3]) for x in to_record]
      cursor.executemany(sql, params)

      cursor.execute(f"UPDATE {config.db['schema']}.articles SET last_crawled = CURRENT_DATE WHERE id=%s", (article_id,))

      self.log.record(f"Recorded {len(to_record)} stats for ID {article_id}", "debug") 
Example #11
Source File: db.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _ensure_tables_exist(self):
    """Creates any missing tables that are expected to exist in the application database.
    Does NOT verify whether the current columns are accurate.

    """
    self.cursor.execute("CREATE TABLE IF NOT EXISTS articles (id SERIAL PRIMARY KEY, url text UNIQUE, title text NOT NULL, abstract text, doi text UNIQUE, posted date, collection text, title_vector tsvector, abstract_vector tsvector, author_vector tsvector, last_crawled DATE NOT NULL DEFAULT CURRENT_DATE);")

    # The "doi" column can't have a "UNIQUE" constraint because sometimes a paper will be
    # posted to bioRxiv under two different titles, so they will show up as two different
    # bioRxiv papers that will eventually share a single journal DOI.
    self.cursor.execute("CREATE TABLE IF NOT EXISTS article_publications (article integer PRIMARY KEY, doi text, publication text);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS publication_dates (article integer PRIMARY KEY, date date);")

    self.cursor.execute("CREATE TABLE IF NOT EXISTS authors (id SERIAL PRIMARY KEY, name text NOT NULL, institution text, orcid text UNIQUE, noperiodname text);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS author_emails (id SERIAL PRIMARY KEY, author integer NOT NULL, email text);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS article_authors (id SERIAL PRIMARY KEY, article integer NOT NULL, author integer NOT NULL, institution text, UNIQUE (article, author));")

    self.cursor.execute("CREATE TABLE IF NOT EXISTS institutions (id SERIAL PRIMARY KEY, name text NOT NULL, ror text, grid text, country text);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS affiliation_institutions (affiliation text PRIMARY KEY, institution integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS countries (alpha2 text PRIMARY KEY, name text NOT NULL, continent text);")

    self.cursor.execute("CREATE TABLE IF NOT EXISTS article_traffic (id SERIAL PRIMARY KEY, article integer NOT NULL, month integer, year integer NOT NULL, abstract integer, pdf integer, UNIQUE (article, month, year));")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS crossref_daily (id SERIAL PRIMARY KEY, source_date DATE, doi text NOT NULL, count integer, crawled DATE NOT NULL DEFAULT CURRENT_DATE, UNIQUE(doi, source_date));")

    self.cursor.execute("CREATE TABLE IF NOT EXISTS alltime_ranks          (article integer PRIMARY KEY, rank integer NOT NULL, downloads integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS alltime_ranks_working  (article integer PRIMARY KEY, rank integer NOT NULL, downloads integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS category_ranks         (article integer PRIMARY KEY, rank integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS category_ranks_working (article integer PRIMARY KEY, rank integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS ytd_ranks              (article integer PRIMARY KEY, rank integer NOT NULL, downloads integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS ytd_ranks_working      (article integer PRIMARY KEY, rank integer NOT NULL, downloads integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS month_ranks            (article integer PRIMARY KEY, rank integer NOT NULL, downloads integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS month_ranks_working    (article integer PRIMARY KEY, rank integer NOT NULL, downloads integer NOT NULL);")

    self.cursor.execute("CREATE TABLE IF NOT EXISTS author_ranks           (author integer PRIMARY KEY, rank integer NOT NULL, tie boolean, downloads integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS author_ranks_working   (author integer PRIMARY KEY, rank integer NOT NULL, tie boolean, downloads integer NOT NULL);")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS author_ranks_category  (id SERIAL PRIMARY KEY, author integer, category text NOT NULL, rank integer NOT NULL, tie boolean, downloads integer NOT NULL, UNIQUE (author, category));")
    self.cursor.execute("CREATE TABLE IF NOT EXISTS author_ranks_category_working   (id SERIAL PRIMARY KEY, author integer, category text NOT NULL,  rank integer NOT NULL, tie boolean, downloads integer NOT NULL, UNIQUE (author, category));")

    self.cursor.execute("CREATE TABLE IF NOT EXISTS download_distribution (id SERIAL PRIMARY KEY, bucket integer NOT NULL, count integer NOT NULL, category text NOT NULL);")
    self.db.commit() 
Example #12
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def fetch_category_list(self):
    categories = []
    with self.connection.db.cursor() as cursor:
      cursor.execute("SELECT DISTINCT collection FROM articles ORDER BY collection;")
      for cat in cursor:
        if len(cat) > 0:
          categories.append(cat[0])
    return categories 
Example #13
Source File: db.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _ensure_database_exists(self, dbname, host, user, password):
    """Connects to the database server and makes sure the specified database exists there;
    if it doesn't, this method creates it.

    """
    params = f'host={host} dbname={dbname} user={user} password={password}'
    db = psycopg2.connect(params)
    cursor = db.cursor()
    cursor.execute("SELECT datname FROM pg_database WHERE datistemplate = false;")
    for result in cursor:
      if result[0] == dbname: break
    else:
      cursor.execute("CREATE DATABASE %s;", (dbname,))
    db.close() 
Example #14
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def __init__(self):
    self.connection = db.Connection(config.db["host"], config.db["db"], config.db["user"], config.db["password"])
    self.session = HTMLSession(mock_browser=False)
    self.session.headers['User-Agent'] = config.user_agent
    self.log = Logger() 
Example #15
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _rank_articles_ytd(self):
    self.log.record("Ranking papers by popularity, year to date...")
    with self.connection.db.cursor() as cursor:
      cursor.execute("SELECT MAX(year) FROM article_traffic;")
      max_year = cursor.fetchone()
      if max_year is None or len(max_year) == 0:
        self.log.record("Could not determine current year for ranking YTD; exiting", "fatal")
    with self.connection.db.cursor() as cursor:
      cursor.execute("TRUNCATE ytd_ranks_working")
      cursor.execute("SELECT article, SUM(pdf) as downloads FROM article_traffic WHERE year = %s GROUP BY article ORDER BY downloads DESC", (max_year,))
      params = [(record[0], rank, record[1]) for rank, record in enumerate(cursor, start=1)]

    record_ranks_file(params, "ytd_ranks_working") 
Example #16
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _rank_articles_month(self):
    with self.connection.db.cursor() as cursor:
      cursor.execute("SELECT MAX(year) FROM article_traffic")
      year = cursor.fetchone()
      if year is None or len(year) == 0:
        self.log.record("Couldn't determine year for monthly rankings; bailing on this ranking", "error")
        return
      year = year[0]
    self.log.record("Ranking papers by popularity, since last month...")
    with self.connection.db.cursor() as cursor:
      # Determine most recent month
      cursor.execute("SELECT MAX(month) FROM article_traffic WHERE year = %s;", (year,))
      month = cursor.fetchone()
      if month is None or len(month) < 1:
        self.log.record("Could not determine current month.", "error")
        return
      month = month[0] - 1 # "since LAST month" prevents nonsense results early in the current month
      if month == 0: # if it's January, roll back one year
        month =  12
        year = year - 1
    with self.connection.db.cursor() as cursor:
      self.log.record(f"Ranking articles based on traffic since {month}/{year}")
      cursor.execute("TRUNCATE month_ranks_working")
      cursor.execute("SELECT article, SUM(pdf) as downloads FROM article_traffic WHERE year = %s AND month >= %s GROUP BY article ORDER BY downloads DESC", (year, month))
      params = [(record[0], rank, record[1]) for rank, record in enumerate(cursor, start=1)]

    record_ranks_file(params, "month_ranks_working") 
Example #17
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _rank_authors_alltime(self):
    # NOTE: The main query of this function (three lines down from here)
    # relies on data generated during the spider._rank_articles_alltime()
    # method, so that one should be called first.
    self.log.record("Ranking authors by popularity...")
    with self.connection.db.cursor() as cursor:
      cursor.execute("TRUNCATE author_ranks_working")
      cursor.execute("""
      SELECT article_authors.author, SUM(alltime_ranks.downloads) as downloads
      FROM article_authors
      LEFT JOIN alltime_ranks ON article_authors.article=alltime_ranks.article
      WHERE downloads > 0
      GROUP BY article_authors.author
      ORDER BY downloads DESC, article_authors.author DESC
      """)
      self.log.record("Retrieved download data.", "debug")
      ranks = []
      rankNum = 0
      for record in cursor:
        rankNum = rankNum + 1
        tie = False
        rank = rankNum # changes if it's a tie

        # if the author has the same download count as the
        # previous author in the list, record a tie:
        if len(ranks) > 0:
          if record[1] == ranks[len(ranks) - 1]["downloads"]:
            ranks[len(ranks) - 1]["tie"] = True
            tie = True
            rank = ranks[len(ranks) - 1]["rank"]
        ranks.append({
          "id": record[0],
          "downloads": record[1],
          "rank": rank,
          "tie": tie
        })
    params = [(record["id"], record["rank"], record["downloads"], record["tie"]) for record in ranks]

    record_ranks_file(params, "author_ranks_working") 
Example #18
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _rank_authors_category(self, category):
    self.log.record(f"Ranking authors by popularity in category {category}...", 'debug')
    with self.connection.db.cursor() as cursor:
      cursor.execute("""
      SELECT article_authors.author, SUM(alltime_ranks.downloads) as downloads
      FROM article_authors
      LEFT JOIN alltime_ranks ON article_authors.article=alltime_ranks.article
      LEFT JOIN articles ON article_authors.article=articles.id
      WHERE downloads > 0 AND
      articles.collection=%s
      GROUP BY article_authors.author
      ORDER BY downloads DESC, article_authors.author DESC
      """, (category,))
      ranks = []
      rankNum = 0
      for record in cursor:
        rankNum = rankNum + 1
        tie = False
        rank = rankNum # changes if it's a tie

        # if the author has the same download count as the
        # previous author in the list, record a tie:
        if len(ranks) > 0:
          if record[1] == ranks[len(ranks) - 1]["downloads"]:
            ranks[len(ranks) - 1]["tie"] = True
            tie = True
            rank = ranks[len(ranks) - 1]["rank"]
        ranks.append({
          "id": record[0],
          "downloads": record[1],
          "rank": rank,
          "tie": tie
        })

    params = [(record["id"], category, record["rank"], record["downloads"], record["tie"]) for record in ranks]

    record_ranks_file(params, "author_ranks_category_working") 
Example #19
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def update_article(self, article_id, abstract):
    with self.connection.db.cursor() as cursor:
      cursor.execute(f"UPDATE {config.db['schema']}.articles SET abstract = %s WHERE id = %s;", (abstract, article_id))
      self.connection.db.commit()
      self.log.record(f"Recorded abstract for ID {article_id}", "debug") 
Example #20
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def calculate_vectors(self):
    self.log.record("Calculating vectors...")
    with self.connection.db.cursor() as cursor:
      cursor.execute(f"UPDATE {config.db['schema']}.articles SET title_vector = to_tsvector(coalesce(title,'')) WHERE title_vector IS NULL;")
      cursor.execute(f"UPDATE {config.db['schema']}.articles SET abstract_vector = to_tsvector(coalesce(abstract,'')) WHERE abstract_vector IS NULL;")
      self.fill_in_author_vectors() 
Example #21
Source File: spider.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def load_rankings_from_file(batch, log):
  os.environ["PGPASSWORD"] = config.db["password"]
  to_delete = None
  log.record(f"Loading {batch} from file.", 'debug')
  if batch in ["alltime_ranks", "ytd_ranks", "month_ranks"]:
    query = f'\copy {config.db["schema"]}.{batch}_working (article, rank, downloads) FROM \'{batch}_working.csv\' with (format csv);'
  elif batch == "author_ranks":
    query = f'\copy {config.db["schema"]}.author_ranks_working (author, rank, downloads, tie) FROM \'author_ranks_working.csv\' with (format csv);'
  elif batch == "author_ranks":
    query = f'\copy author_ranks_working (author, rank, downloads, tie) FROM \'author_ranks_working.csv\' with (format csv);'
  elif batch == "author_ranks_category":
    query = f'\copy {config.db["schema"]}.author_ranks_category_working (author, category, rank, downloads, tie) FROM \'author_ranks_category_working.csv\' with (format csv);'
    to_delete = "author_ranks_category_working.csv"
  elif batch == "author_ranks_category":
    query = f'\copy {config.db["schema"]}.author_ranks_category_working (author, category, rank, downloads, tie) FROM \'author_ranks_category_working.csv\' with (format csv);'
    to_delete = "author_ranks_category_working.csv"
  elif batch == "category_ranks":
    query = f'\copy {config.db["schema"]}.category_ranks_working (article, rank) FROM \'category_ranks_working.csv\' with (format csv);'
    to_delete = "category_ranks_working.csv"
  else:
    log.record(f'Unrecognized rankings source passed to load_rankings_from_file: {batch}', "warn")
    return

  subprocess.run(["psql", "-h", config.db["host"], "-U", config.db["user"], "-d", config.db["db"], "-c", query], check=True)
  # Some files get rewritten a bunch of times; if we encounter one of those,
  # delete it before the next iteration starts.
  if to_delete is not None:
    os.remove(to_delete) 
Example #22
Source File: endpoints.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def author_rankings(connection, category=""):
  """Fetches a list of authors with the most cumulative downloads.

  Arguments:
    - connection: a database Connection object.
    - category: (Optionally) a single bioRxiv collection to base download rankings on.
  Returns:
    - A list of Author objects that meet the search criteria.

  """
  if category == "": # all time, all categories
    table = "author_ranks"
    where = ""
    params = ()
  else:
    table = "author_ranks_category"
    where = "WHERE r.category=%s"
    params = (category,)
  query = f"""
    SELECT a.id, a.name, r.rank, r.downloads, r.tie
    FROM authors AS a
    INNER JOIN {config.db["schema"]}.{table} r ON a.id=r.author
    {where}
    ORDER BY r.rank
    LIMIT {config.author_ranks_limit}
  """

  authors = connection.read(query, params)
  return [models.SearchResultAuthor(*a) for a in authors] 
Example #23
Source File: db.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def __del__(self):
    """Closes the database connection when the Connection object
    is destroyed."""

    if self.db is not None:
      self.db.close() 
Example #24
Source File: getBoardId.py    From cc98 with MIT License 5 votes vote down vote up
def runsql():
    global conn
    conn=db()
    cur = conn.cursor()
    cur.execute(sql)
    conn.commit() 
Example #25
Source File: getBoardId.py    From cc98 with MIT License 5 votes vote down vote up
def getworkset():
    conn=db()
    workset=[]
    for i in rawlist2:
        try:
            runsql("desc bigbbs_{}".format(i))
        except:#表不存在就说明我要get咯,加入workset
            workset.append(i)
    return(workset) 
Example #26
Source File: update_big_data.py    From cc98 with MIT License 5 votes vote down vote up
def runsql(sql):
    global conn
    conn=db()
    cur = conn.cursor()
    try:
        cur.execute(sql)
        conn.commit()
    except Exception as e:
        print("Error:")
        print(e) 
Example #27
Source File: db.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def read(self, query, params=None):
    """Helper function that converts results returned stored in a
    Psycopg cursor into a less temperamental list format. Note that
    there IS recursive retry logic here; when the connection to the
    database is dropped, the query will fail, prompting this method
    to re-connect and try the query again. This will continue trying
    to reconnect indefinitely. This is probably not ideal.

    Arguments:
      - query: The SQL query to be executed.
      - params: Any parameters to be substituted into the query. It's
          important to let Psycopg handle this rather than using Python
          string interpolation because it helps mitigate SQL injection.
    Returns:
      - A list of tuples, one for each row of results.

    """

    results = []
    try:
      with self.db.cursor() as cursor:
        if params is not None:
          cursor.execute(query, params)
        else:
          cursor.execute(query)
        for result in cursor:
          results.append(result)
      return results
    except psycopg2.OperationalError as e:
      print(f"ERROR with db query execution: {e}")
      print("Reconnecting.")
      self._attempt_connect()
      print("Sending query again.")
      return self.read(query, params) 
Example #28
Source File: db.py    From rxivist with GNU Affero General Public License v3.0 5 votes vote down vote up
def _attempt_connect(self, attempts=0):
    """Initiates a connection to the database and tracks retry attempts.

    Arguments:
      - attempts: How many failed attempts have already happened.

    Side effects:
      - self.db: Set on a successful connection attempt.
    """

    attempts += 1
    print(f'Connecting. Attempt {attempts} of {config.db["connection"]["max_attempts"]}.')
    try:
      self.db = psycopg2.connect(
        host=self.host,
        dbname=self.dbname,
        user=self.user,
        password=self.password,
        connect_timeout=config.db["connection"]["timeout"],
        options=f'-c search_path={config.db["schema"]}'
      )
      self.db.set_session(autocommit=True)
    except:
      if attempts >= config.db["connection"]["max_attempts"]:
        print("Giving up.")
        raise RuntimeError("Failed to connect to database.")
      print(f'Connection to DB failed. Retrying in {config.db["connection"]["attempt_pause"]} seconds.')
      time.sleep(config.db["connection"]["attempt_pause"])
      self._attempt_connect(attempts) 
Example #29
Source File: app.py    From shorty with MIT License 5 votes vote down vote up
def search():
	s_tag = request.form.get('search_url')
	if s_tag == "":
		return render_template('index.html', error = "Please enter a search term")
	else:
		conn = MySQLdb.connect(host , user , passwrd, db)
		cursor = conn.cursor()
		
		search_tag_sql = "SELECT * FROM WEB_URL WHERE TAG = %s" 
		cursor.execute(search_tag_sql , (s_tag, ) )
		search_tag_fetch = cursor.fetchall()
		conn.close()
		return render_template('search.html' , host = shorty_host , search_tag = s_tag , table = search_tag_fetch ) 
Example #30
Source File: display_list.py    From shorty with MIT License 5 votes vote down vote up
def list_data(shorty_url):
	
	"""
		Takes short_url for input.
		Returns counter , browser , platform ticks. 
	"""
	conn = MySQLdb.connect(host , user , passwrd, db)
	cursor = conn.cursor()
	su =[shorty_url]
	info_sql = "SELECT URL , S_URL ,TAG FROM WEB_URL WHERE S_URL= %s; "
	counter_sql = "SELECT COUNTER FROM WEB_URL WHERE S_URL= %s; "
	browser_sql = "SELECT CHROME , FIREFOX , SAFARI, OTHER_BROWSER FROM WEB_URL WHERE S_URL =%s;"
	platform_sql = "SELECT ANDROID , IOS , WINDOWS, LINUX , MAC , OTHER_PLATFORM FROM WEB_URL WHERE S_URL = %s;"	
	
	
	# MySQLdb's execute() function expects a list
	# of objects to be converted so we use [arg ,]
	# But for sqlite ( args,) works. 
	
	
	cursor.execute(info_sql , su)
	info_fetch = cursor.fetchone()
	cursor.execute(counter_sql , su)
	counter_fetch = cursor.fetchone()
	cursor.execute(browser_sql,su)
	browser_fetch = cursor.fetchone()
	cursor.execute(platform_sql, su)
	platform_fetch = cursor.fetchone()
	conn.close()
	return info_fetch , counter_fetch , browser_fetch , platform_fetch