Python fuzzywuzzy.process.extractOne() Examples

The following are 21 code examples for showing how to use fuzzywuzzy.process.extractOne(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module fuzzywuzzy.process , or try the search function .

Example 1
Project: gnewsclient   Author: nikhilkumarsingh   File: gnewsclient.py    License: MIT License 6 votes vote down vote up
def params_dict(self):
        """
        function to get params dict for HTTP request
        """
        location_code = 'US'
        language_code = 'en'
        if len(self.location):
            location_code = locationMap[process.extractOne(self.location, self.locations)[0]]
        if len(self.language):
            language_code = langMap[process.extractOne(self.language, self.languages)[0]]
        params = {
            'hl': language_code,
            'gl': location_code,
            'ceid': '{}:{}'.format(location_code, language_code)
        }
        return params 
Example 2
Project: anime-downloader   Author: vn-ki   File: watch.py    License: The Unlicense 5 votes vote down vote up
def get(self, anime_name):
        animes = self._read_from_watch_file()

        if isinstance(anime_name, int):
            return animes[anime_name]

        match = process.extractOne(anime_name, animes, score_cutoff=40)
        if match:
            anime = match[0]
            logger.debug('Anime: {!r}, episodes_done: {}'.format(
                anime, anime.episodes_done))

            if (time() - anime._timestamp) > 4*24*60*60:
                anime = self.update_anime(anime)
            return anime 
Example 3
Project: geemusic   Author: stevenleeg   File: music.py    License: GNU General Public License v3.0 5 votes vote down vote up
def get_artist(self, name):
        """
        Fetches information about an artist given its name
        """
        if self.use_store:
            search = self._search("artist", name)

            if len(search) == 0:
                return False

            return self._api.get_artist_info(search[0]['artistId'],
                                             max_top_tracks=100)
        else:
            search = {}
            search['topTracks'] = []
            # Find the best artist we have, and then match songs to that artist
            likely_artist, score = process.extractOne(name, self.artists)
            if score < 70:
                return False
            for song_id, song in self.library.items():
                if 'artist' in song and song['artist'].lower() == likely_artist.lower() and 'artistId' in song:
                    if not search['topTracks']:  # First entry
                        # Copy artist details from the first song into the general artist response
                        try:
                            search['artistArtRef'] = song['artistArtRef'][0]['url']
                        except KeyError:
                            pass
                        search['name'] = song['artist']
                        search['artistId'] = song['artistId']
                    search['topTracks'].append(song)
            random.shuffle(search['topTracks'])  # This is all music, not top, but the user probably would prefer it shuffled.
            if not search['topTracks']:
                return False

            return search 
Example 4
Project: geemusic   Author: stevenleeg   File: music.py    License: GNU General Public License v3.0 5 votes vote down vote up
def get_album(self, name, artist_name=None):
        if self.use_store:
            if artist_name:
                name = "%s %s" % (name, artist_name)

            search = self._search("album", name)

            if len(search) == 0:
                return False

            return self._api.get_album_info(search[0]['albumId'])
        else:
            search = {}
            search['tracks'] = []
            if artist_name:
                artist_name, score = process.extractOne(artist_name, self.artists)
                if score < 70:
                    return False
            name, score = process.extractOne(name, self.albums)
            if score < 70:
                return False
            for song_id, song in self.library.items():
                if 'album' in song and song['album'].lower() == name.lower():
                    if not artist_name or ('artist' in song and song['artist'].lower() == artist_name.lower()):
                        if not search['tracks']:  # First entry
                            search['albumArtist'] = song['albumArtist']
                            search['name'] = song['album']
                            try:
                                search['albumId'] = song['albumId']
                            except KeyError:
                                pass

                        search['tracks'].append(song)
            if not search['tracks']:
                return False

            return search 
Example 5
Project: geemusic   Author: stevenleeg   File: music.py    License: GNU General Public License v3.0 5 votes vote down vote up
def get_song(self, name, artist_name=None, album_name=None):
        if self.use_store:
            if artist_name:
                name = "%s %s" % (artist_name, name)
            elif album_name:
                name = "%s %s" % (album_name, name)

            search = self._search("song", name)

            if len(search) == 0:
                return False

            if album_name:
                for i in range(0, len(search) - 1):
                    if album_name in search[i]['album']:
                        return search[i]
            return search[0]
        else:
            search = {}
            if not name:
                return False
            if artist_name:
                artist_name, score = process.extractOne(artist_name, self.artists)
                if score < 70:
                    return False
            if album_name:
                album_name, score = process.extractOne(album_name, self.albums)
                if score < 70:
                    return False
            possible_songs = {song_id: song['title'] for song_id, song in self.library.items() if (not artist_name or ('artist' in song and song['artist'].lower() == artist_name.lower())) and (not album_name or ('album' in song and song['album'].lower() == album_name.lower()))}
            song, score, song_id = process.extractOne(name.lower(), possible_songs)
            if score < 70:
                return False
            else:
                return self.library[song_id] 
Example 6
Project: pyinrail   Author: nikhilkumarsingh   File: pyinrail.py    License: MIT License 5 votes vote down vote up
def get_stn_code(self, query):
        """
        utility function to get correst station code
        """
        try:
            return self.stations[query.upper()]
        except KeyError:
            return process.extractOne(query, self.stations.values())[0] 
Example 7
Project: fluids   Author: CalebBell   File: friction.py    License: MIT License 5 votes vote down vote up
def fuzzy_match(name, strings):
    global fuzzy_match_fun
    if fuzzy_match_fun is not None:
        return fuzzy_match_fun(name, strings)

    try:
        from fuzzywuzzy import process, fuzz
        fuzzy_match_fun = lambda name, strings: process.extractOne(name, strings, scorer=fuzz.partial_ratio)[0]
    except ImportError: # pragma: no cover
        import difflib
        fuzzy_match_fun = lambda name, strings: difflib.get_close_matches(name, strings, n=1, cutoff=0)[0]
    return fuzzy_match_fun(name, strings) 
Example 8
Project: gnewsclient   Author: nikhilkumarsingh   File: gnewsclient.py    License: MIT License 5 votes vote down vote up
def get_news(self):
        """
        function to get news articles
        """
        if self.topic is None or self.topic == 'Top Stories':
            resp = requests.get(top_news_url, params=self.params_dict)
        else:
            topic_code = topicMap[process.extractOne(self.topic, self.topics)[0]]
            resp = requests.get(topic_url.format(topic_code), params=self.params_dict)
        return self.parse_feed(resp.content) 
Example 9
Project: mopidy_skill   Author: forslund   File: __init__.py    License: GNU General Public License v3.0 5 votes vote down vote up
def query_song(self, song):
        best_found = None
        best_conf = 0
        library_type = None
        for t in self.track_names:
            found, conf = (extract_one(song, self.track_names[t].keys()) or
                           (None, 0))
            if conf > best_conf and conf > 50:
                best_conf = conf
                best_found = found
                library_type = t
        return best_found, best_conf, 'song', library_type 
Example 10
Project: mopidy_skill   Author: forslund   File: __init__.py    License: GNU General Public License v3.0 5 votes vote down vote up
def query_artist(self, artist):
        best_found = None
        best_conf = 0.0
        library_type = None
        for t in self.artists:
            found, conf = (extract_one(artist, self.artists[t].keys()) or
                           (None, 0))
            if conf > best_conf and conf > 50:
                best_conf = conf
                best_found = found
                library_type = t
        return best_found, best_conf, 'artist', library_type 
Example 11
Project: mopidy_skill   Author: forslund   File: __init__.py    License: GNU General Public License v3.0 5 votes vote down vote up
def query_album(self, album):
        best_found = None
        best_conf = 0
        library_type = None
        for t in self.albums:
            self.log.info(self.albums[t].keys())
            found, conf = (extract_one(album, self.albums[t].keys()) or
                           (None, 0))
            if conf > best_conf and conf > 50:
                best_conf = conf
                best_found = found
                library_type = t
        self.log.info('ALBUMS')
        self.log.info((best_found, best_conf))
        return best_found, best_conf, 'album', library_type 
Example 12
Project: mopidy_skill   Author: forslund   File: __init__.py    License: GNU General Public License v3.0 5 votes vote down vote up
def generic_query(self, phrase):
        found, conf = extract_one(phrase, self.playlist.keys())
        if conf > 50:
            return found, conf, 'generic', ''
        else:
            return NOTHING_FOUND 
Example 13
Project: HoshinoBot   Author: Ice-Cirno   File: chara.py    License: GNU General Public License v3.0 5 votes vote down vote up
def guess_id(self, name):
        """@return: id, name, score"""
        name, score = process.extractOne(name, self._all_name_list)
        return self._roster[name], name, score 
Example 14
Project: squeeze-alexa   Author: declension   File: main.py    License: GNU General Public License v3.0 5 votes vote down vote up
def on_play_playlist(self, intent, session, pid=None):
        server = self._server
        try:
            slot = intent['slots']['Playlist']['value']
            print_d("Extracted playlist slot: {slot}", slot=slot)
        except KeyError:
            print_d("Couldn't process playlist from: {intent}", intent=intent)
            if not server.playlists:
                return speech_response(speech=_("There are no playlists"))
            pl = random.choice(server.playlists)
            text = _("Didn't hear a playlist there. "
                     "You could try the \"{name}\" playlist?").format(name=pl)
            return speech_response(speech=text)
        else:
            if not server.playlists:
                return speech_response(
                    speech=_("No Squeezebox playlists found"))
            result = process.extractOne(slot, server.playlists)
            print_d("{guess} was the best guess for '{slot}' from {choices}",
                    guess=str(result), slot=slot, choices=server.playlists)
            if result and int(result[1]) >= MinConfidences.PLAYLIST:
                pl = result[0]
                server.playlist_resume(pl, player_id=pid)
                name = sanitise_text(pl)
                return self.smart_response(
                    speech=_("Playing \"{name}\" playlist").format(name=name),
                    text=_("Playing \"{name}\" playlist").format(name=name))
            pl = random.choice(server.playlists)
            title = (_("Couldn't find a playlist matching \"{name}\".")
                     .format(name=slot))
            extra = (_("How about the \"{suggestion}\" playlist?")
                     .format(suggestion=pl))
            return speech_response(title=title, text=extra,
                                   speech=title + extra) 
Example 15
Project: squeeze-alexa   Author: declension   File: main.py    License: GNU General Public License v3.0 5 votes vote down vote up
def player_id_from(self, intent, defaulting=True):
        srv = self._server
        try:
            player_name = intent['slots']['Player']['value']
        except KeyError:
            pass
        else:
            by_name = {s.name: s for s in srv.players.values()}
            choices = by_name.keys()
            result = process.extractOne(player_name, choices)
            print_d("{guess} was the best guess for '{value}' from {choices}",
                    guess=result, value=player_name, choices=set(choices))
            if result and int(result[1]) >= MinConfidences.PLAYER:
                return by_name.get(result[0]).id
        return srv.cur_player_id if defaulting else None 
Example 16
Project: tf_CFO   Author: AlexYangLi   File: preprocess_dataset.py    License: MIT License 5 votes vote down vote up
def reverseLinking(sent, text_candidate):
    tokens = sent.split()
    label = ["O"] * len(tokens)
    text_attention_indices = None
    exact_match = False

    if text_candidate is None or len(text_candidate) == 0:
        return '<UNK>', label, exact_match

    # sorted by length
    for text in sorted(text_candidate, key=lambda x: len(x), reverse=True):
        pattern = r'(^|\s)(%s)($|\s)' % (re.escape(text))
        if re.search(pattern, sent):
            text_attention_indices = get_indices(tokens, text.split())
            break
    if text_attention_indices:
        exact_match = True
        for i in text_attention_indices:
            label[i] = 'I'
    else:
        try:
            v, score = process.extractOne(sent, text_candidate, scorer=fuzz.partial_ratio)
        except :
            print("Extraction Error with FuzzyWuzzy : {} || {}".format(sent, text_candidate))
            return '<UNK>', label, exact_match
        v = v.split()
        n_gram_candidate = get_ngram(tokens)
        n_gram_candidate = sorted(n_gram_candidate, key=lambda x: fuzz.ratio(x[0], v), reverse=True)
        top = n_gram_candidate[0]
        for i in range(top[1], top[2]):
            label[i] = 'I'

    entity_text = []
    for l, t in zip(label, tokens):
        if l == 'I':
            entity_text.append(t)
    entity_text = " ".join(entity_text)
    label = " ".join(label)
    return entity_text, label, exact_match 
Example 17
Project: plexMusicPlayer   Author: Tyzer34   File: methods.py    License: GNU General Public License v3.0 5 votes vote down vote up
def fuzzy_match(query, media_type):
    dirs = get_music_directories()
    names = get_names_by_first_letter(dirs, query[0].upper(), media_type)
    if query.lower().startswith('the '):
        # dropping 'the ' off of queries where it might have been mistakenly added
        # (i.e. play the red house painters > red house painters)
        # the reverse is already handled by plex not sorting on the, los, la, etc...
        # (i.e. play head and the heart > the head and the heart)
        names.extend(get_names_by_first_letter(dirs, query[4].upper(), media_type))
    best_match = process.extractOne(query, names)
    return best_match[0] if best_match and best_match[1] > 60 else None 
Example 18
Project: marvin   Author: sdss   File: results.py    License: BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def merge_tables(self, tables, direction='vert', **kwargs):
        ''' Merges a list of Astropy tables of results together

        Combines two Astropy tables using either the Astropy
        vstack or hstack method.  vstack refers to vertical stacking of table rows.
        hstack refers to horizonal stacking of table columns.  hstack assumes the rows in each
        table refer to the same object.  Buyer beware: stacking tables without proper understanding
        of your rows and columns may results in deleterious results.

        merge_tables also accepts all keyword arguments that Astropy vstack and hstack method do.
        See `vstack <http://docs.astropy.org/en/stable/table/operations.html#stack-vertically>`_
        See `hstack <http://docs.astropy.org/en/stable/table/operations.html#stack-horizontally>`_

        Parameters:
            tables (list):
                A list of Astropy Table objects.  Required.
            direction (str):
                The direction of the table stacking, either vertical ('vert') or horizontal ('hor').
                Default is 'vert'.  Direction string can be fuzzy.

        Returns:
            A new Astropy table that is the stacked combination of all input tables

        Example:
            >>> # query 1
            >>> q, r = doQuery(search_filter='nsa.z < 0.1', returnparams=['g_r', 'cube.ra', 'cube.dec'])
            >>> # query 2
            >>> q2, r2 = doQuery(search_filter='nsa.z < 0.1')
            >>>
            >>> # convert to tables
            >>> table_1 = r.toTable()
            >>> table_2 = r2.toTable()
            >>> tables = [table_1, table_2]
            >>>
            >>> # vertical (row) stacking
            >>> r.merge_tables(tables, direction='vert')
            >>> # horizontal (column) stacking
            >>> r.merge_tables(tables, direction='hor')

        '''
        choices = ['vertical', 'horizontal']
        stackdir, score = process.extractOne(direction, choices)
        if stackdir == 'vertical':
            return vstack(tables, **kwargs)
        elif stackdir == 'horizontal':
            return hstack(tables, **kwargs) 
Example 19
Project: eegsynth   Author: eegsynth   File: outputcvgate.py    License: GNU General Public License v3.0 4 votes vote down vote up
def _start():
    '''Start the module
    This uses the global variables from setup and adds a set of global variables
    '''
    global parser, args, config, r, response, patch, name
    global monitor, duration_scale, duration_offset, serialdevice, s, lock, trigger, chanindx, chanstr, redischannel, thread

    # this can be used to show parameters that have changed
    monitor = EEGsynth.monitor(name=name, debug=patch.getint('general', 'debug'))

    # values between 0 and 1 work well for the duration
    duration_scale = patch.getfloat('duration', 'scale', default=1)
    duration_offset = patch.getfloat('duration', 'offset', default=0)

    # get the specified serial device, or the one that is the closest match
    serialdevice = patch.getstring('serial', 'device')
    serialdevice = EEGsynth.trimquotes(serialdevice)
    serialdevice = process.extractOne(serialdevice, [comport.device for comport in serial.tools.list_ports.comports()])[0] # select the closest match

    try:
        s = serial.Serial(serialdevice, patch.getint('serial', 'baudrate'), timeout=3.0)
        monitor.success("Connected to serial port")
    except:
        raise RuntimeError("cannot connect to serial port")

    # this is to prevent two triggers from being activated at the same time
    lock = threading.Lock()

    trigger = []
    # configure the trigger threads for the control voltages
    for chanindx in range(1, 5):
        chanstr = "cv%d" % chanindx
        if patch.hasitem('trigger', chanstr):
            redischannel = patch.getstring('trigger', chanstr)
            trigger.append(TriggerThread(redischannel, chanindx, chanstr))
            monitor.info("configured " + redischannel + " on " + str(chanindx))
    # configure the trigger threads for the gates
    for chanindx in range(1, 5):
        chanstr = "gate%d" % chanindx
        if patch.hasitem('trigger', chanstr):
            redischannel = patch.getstring('trigger', chanstr)
            trigger.append(TriggerThread(redischannel, chanindx, chanstr))
            monitor.info("configured " + redischannel + " on " + str(chanindx))

    # start the thread for each of the triggers
    for thread in trigger:
        thread.start()

    # there should not be any local variables in this function, they should all be global
    if len(locals()):
        print('LOCALS: ' + ', '.join(locals().keys())) 
Example 20
Project: eegsynth   Author: eegsynth   File: endorphines.py    License: GNU General Public License v3.0 4 votes vote down vote up
def _start():
    """Start the module
    This uses the global variables from setup and adds a set of global variables
    """
    global parser, args, config, r, response, patch, name
    global monitor, debug, mididevice, outputport, lock, trigger, port, channel, previous_val, previous_port_val

    # this can be used to show parameters that have changed
    monitor = EEGsynth.monitor(name=name, debug=patch.getint('general', 'debug'))

    # get the options from the configuration file
    debug = patch.getint('general', 'debug')
    mididevice = patch.getstring('midi', 'device')
    mididevice = EEGsynth.trimquotes(mididevice)
    mididevice = process.extractOne(mididevice, mido.get_output_names())[0]  # select the closest match

    # this is only for debugging, check which MIDI devices are accessible
    monitor.info('------ OUTPUT ------')
    for port in mido.get_output_names():
        monitor.info(port)
    monitor.info('-------------------------')

    try:
        outputport = mido.open_output(mididevice)
        monitor.success('Connected to MIDI output')
    except:
        raise RuntimeError("cannot connect to MIDI output")

    # this is to prevent two messages from being sent at the same time
    lock = threading.Lock()

    # each of the gates that can be triggered is mapped onto a different message
    trigger = []
    for channel in range(0, 16):

        # channels are one-offset in the ini file, zero-offset in the code
        name = 'channel{}'.format(channel + 1)
        if config.has_option('gate', name):

            # start the background thread that deals with this channel
            this = TriggerThread(patch.getstring('gate', name), channel)
            trigger.append(this)
            monitor.debug(name + ' trigger configured')

    # start the thread for each of the notes
    for thread in trigger:
        thread.start()

    # control values are only relevant when different from the previous value
    previous_val = {}
    previous_port_val = {}
    for channel in range(0, 16):
        name = 'channel{}'.format(channel + 1)
        previous_val[name] = None
        previous_port_val[name] = None

    # there should not be any local variables in this function, they should all be global
    if len(locals()):
        print("LOCALS: " + ", ".join(locals().keys())) 
Example 21
Project: eegsynth   Author: eegsynth   File: outputdmx.py    License: GNU General Public License v3.0 4 votes vote down vote up
def _start():
    '''Start the module
    This uses the global variables from setup and adds a set of global variables
    '''
    global parser, args, config, r, response, patch, name
    global monitor, debug, serialdevice, s, dmxsize, chanlist, chanvals, chanindx, chanstr, dmxframe, prevtime, START_VAL, END_VAL, TX_DMX_PACKET, FRAME_PAD

    # this can be used to show parameters that have changed
    monitor = EEGsynth.monitor(name=name, debug=patch.getint('general', 'debug'))

    # get the options from the configuration file
    debug = patch.getint('general', 'debug')

    # get the specified serial device, or the one that is the closest match
    serialdevice = patch.getstring('serial', 'device')
    serialdevice = EEGsynth.trimquotes(serialdevice)
    serialdevice = process.extractOne(serialdevice, [comport.device for comport in serial.tools.list_ports.comports()])[0]  # select the closest match

    try:
        s = serial.Serial(serialdevice, patch.getint('serial', 'baudrate'), timeout=3.0)
        monitor.info("Connected to serial port")
    except:
        raise RuntimeError("cannot connect to serial port")

    # determine the size of the universe
    dmxsize = 0
    chanlist, chanvals = list(map(list, list(zip(*config.items('input')))))
    for chanindx in range(0, 512):
        chanstr = "channel%03d" % (chanindx + 1)
        if chanstr in chanlist:
            # the last channel determines the size
            dmxsize = chanindx + 1

    # my fixture won't work if the frame size is too small
    dmxsize = max(dmxsize, 16)
    monitor.info("universe size = %d" % dmxsize)

    # make an empty frame
    dmxframe = [0] * dmxsize
    # blank out
    sendframe(s, dmxframe)

    # keep a timer to send a packet every now and then
    prevtime = time.time()

    # there should not be any local variables in this function, they should all be global
    if len(locals()):
        print('LOCALS: ' + ', '.join(locals().keys()))