Python fuzzywuzzy.process.extractOne() Examples

The following are 21 code examples of fuzzywuzzy.process.extractOne(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module fuzzywuzzy.process , or try the search function .
Example #1
Source File: gnewsclient.py    From gnewsclient with MIT License 6 votes vote down vote up
def params_dict(self):
        """
        function to get params dict for HTTP request
        """
        location_code = 'US'
        language_code = 'en'
        if len(self.location):
            location_code = locationMap[process.extractOne(self.location, self.locations)[0]]
        if len(self.language):
            language_code = langMap[process.extractOne(self.language, self.languages)[0]]
        params = {
            'hl': language_code,
            'gl': location_code,
            'ceid': '{}:{}'.format(location_code, language_code)
        }
        return params 
Example #2
Source File: __init__.py    From mopidy_skill with GNU General Public License v3.0 5 votes vote down vote up
def query_artist(self, artist):
        best_found = None
        best_conf = 0.0
        library_type = None
        for t in self.artists:
            found, conf = (extract_one(artist, self.artists[t].keys()) or
                           (None, 0))
            if conf > best_conf and conf > 50:
                best_conf = conf
                best_found = found
                library_type = t
        return best_found, best_conf, 'artist', library_type 
Example #3
Source File: methods.py    From plexMusicPlayer with GNU General Public License v3.0 5 votes vote down vote up
def fuzzy_match(query, media_type):
    dirs = get_music_directories()
    names = get_names_by_first_letter(dirs, query[0].upper(), media_type)
    if query.lower().startswith('the '):
        # dropping 'the ' off of queries where it might have been mistakenly added
        # (i.e. play the red house painters > red house painters)
        # the reverse is already handled by plex not sorting on the, los, la, etc...
        # (i.e. play head and the heart > the head and the heart)
        names.extend(get_names_by_first_letter(dirs, query[4].upper(), media_type))
    best_match = process.extractOne(query, names)
    return best_match[0] if best_match and best_match[1] > 60 else None 
Example #4
Source File: preprocess_dataset.py    From tf_CFO with MIT License 5 votes vote down vote up
def reverseLinking(sent, text_candidate):
    tokens = sent.split()
    label = ["O"] * len(tokens)
    text_attention_indices = None
    exact_match = False

    if text_candidate is None or len(text_candidate) == 0:
        return '<UNK>', label, exact_match

    # sorted by length
    for text in sorted(text_candidate, key=lambda x: len(x), reverse=True):
        pattern = r'(^|\s)(%s)($|\s)' % (re.escape(text))
        if re.search(pattern, sent):
            text_attention_indices = get_indices(tokens, text.split())
            break
    if text_attention_indices:
        exact_match = True
        for i in text_attention_indices:
            label[i] = 'I'
    else:
        try:
            v, score = process.extractOne(sent, text_candidate, scorer=fuzz.partial_ratio)
        except :
            print("Extraction Error with FuzzyWuzzy : {} || {}".format(sent, text_candidate))
            return '<UNK>', label, exact_match
        v = v.split()
        n_gram_candidate = get_ngram(tokens)
        n_gram_candidate = sorted(n_gram_candidate, key=lambda x: fuzz.ratio(x[0], v), reverse=True)
        top = n_gram_candidate[0]
        for i in range(top[1], top[2]):
            label[i] = 'I'

    entity_text = []
    for l, t in zip(label, tokens):
        if l == 'I':
            entity_text.append(t)
    entity_text = " ".join(entity_text)
    label = " ".join(label)
    return entity_text, label, exact_match 
Example #5
Source File: main.py    From squeeze-alexa with GNU General Public License v3.0 5 votes vote down vote up
def player_id_from(self, intent, defaulting=True):
        srv = self._server
        try:
            player_name = intent['slots']['Player']['value']
        except KeyError:
            pass
        else:
            by_name = {s.name: s for s in srv.players.values()}
            choices = by_name.keys()
            result = process.extractOne(player_name, choices)
            print_d("{guess} was the best guess for '{value}' from {choices}",
                    guess=result, value=player_name, choices=set(choices))
            if result and int(result[1]) >= MinConfidences.PLAYER:
                return by_name.get(result[0]).id
        return srv.cur_player_id if defaulting else None 
Example #6
Source File: main.py    From squeeze-alexa with GNU General Public License v3.0 5 votes vote down vote up
def on_play_playlist(self, intent, session, pid=None):
        server = self._server
        try:
            slot = intent['slots']['Playlist']['value']
            print_d("Extracted playlist slot: {slot}", slot=slot)
        except KeyError:
            print_d("Couldn't process playlist from: {intent}", intent=intent)
            if not server.playlists:
                return speech_response(speech=_("There are no playlists"))
            pl = random.choice(server.playlists)
            text = _("Didn't hear a playlist there. "
                     "You could try the \"{name}\" playlist?").format(name=pl)
            return speech_response(speech=text)
        else:
            if not server.playlists:
                return speech_response(
                    speech=_("No Squeezebox playlists found"))
            result = process.extractOne(slot, server.playlists)
            print_d("{guess} was the best guess for '{slot}' from {choices}",
                    guess=str(result), slot=slot, choices=server.playlists)
            if result and int(result[1]) >= MinConfidences.PLAYLIST:
                pl = result[0]
                server.playlist_resume(pl, player_id=pid)
                name = sanitise_text(pl)
                return self.smart_response(
                    speech=_("Playing \"{name}\" playlist").format(name=name),
                    text=_("Playing \"{name}\" playlist").format(name=name))
            pl = random.choice(server.playlists)
            title = (_("Couldn't find a playlist matching \"{name}\".")
                     .format(name=slot))
            extra = (_("How about the \"{suggestion}\" playlist?")
                     .format(suggestion=pl))
            return speech_response(title=title, text=extra,
                                   speech=title + extra) 
Example #7
Source File: chara.py    From HoshinoBot with GNU General Public License v3.0 5 votes vote down vote up
def guess_id(self, name):
        """@return: id, name, score"""
        name, score = process.extractOne(name, self._all_name_list)
        return self._roster[name], name, score 
Example #8
Source File: __init__.py    From mopidy_skill with GNU General Public License v3.0 5 votes vote down vote up
def generic_query(self, phrase):
        found, conf = extract_one(phrase, self.playlist.keys())
        if conf > 50:
            return found, conf, 'generic', ''
        else:
            return NOTHING_FOUND 
Example #9
Source File: __init__.py    From mopidy_skill with GNU General Public License v3.0 5 votes vote down vote up
def query_album(self, album):
        best_found = None
        best_conf = 0
        library_type = None
        for t in self.albums:
            self.log.info(self.albums[t].keys())
            found, conf = (extract_one(album, self.albums[t].keys()) or
                           (None, 0))
            if conf > best_conf and conf > 50:
                best_conf = conf
                best_found = found
                library_type = t
        self.log.info('ALBUMS')
        self.log.info((best_found, best_conf))
        return best_found, best_conf, 'album', library_type 
Example #10
Source File: watch.py    From anime-downloader with The Unlicense 5 votes vote down vote up
def get(self, anime_name):
        animes = self._read_from_watch_file()

        if isinstance(anime_name, int):
            return animes[anime_name]

        match = process.extractOne(anime_name, animes, score_cutoff=40)
        if match:
            anime = match[0]
            logger.debug('Anime: {!r}, episodes_done: {}'.format(
                anime, anime.episodes_done))

            if (time() - anime._timestamp) > 4*24*60*60:
                anime = self.update_anime(anime)
            return anime 
Example #11
Source File: __init__.py    From mopidy_skill with GNU General Public License v3.0 5 votes vote down vote up
def query_song(self, song):
        best_found = None
        best_conf = 0
        library_type = None
        for t in self.track_names:
            found, conf = (extract_one(song, self.track_names[t].keys()) or
                           (None, 0))
            if conf > best_conf and conf > 50:
                best_conf = conf
                best_found = found
                library_type = t
        return best_found, best_conf, 'song', library_type 
Example #12
Source File: gnewsclient.py    From gnewsclient with MIT License 5 votes vote down vote up
def get_news(self):
        """
        function to get news articles
        """
        if self.topic is None or self.topic == 'Top Stories':
            resp = requests.get(top_news_url, params=self.params_dict)
        else:
            topic_code = topicMap[process.extractOne(self.topic, self.topics)[0]]
            resp = requests.get(topic_url.format(topic_code), params=self.params_dict)
        return self.parse_feed(resp.content) 
Example #13
Source File: friction.py    From fluids with MIT License 5 votes vote down vote up
def fuzzy_match(name, strings):
    global fuzzy_match_fun
    if fuzzy_match_fun is not None:
        return fuzzy_match_fun(name, strings)

    try:
        from fuzzywuzzy import process, fuzz
        fuzzy_match_fun = lambda name, strings: process.extractOne(name, strings, scorer=fuzz.partial_ratio)[0]
    except ImportError: # pragma: no cover
        import difflib
        fuzzy_match_fun = lambda name, strings: difflib.get_close_matches(name, strings, n=1, cutoff=0)[0]
    return fuzzy_match_fun(name, strings) 
Example #14
Source File: pyinrail.py    From pyinrail with MIT License 5 votes vote down vote up
def get_stn_code(self, query):
        """
        utility function to get correst station code
        """
        try:
            return self.stations[query.upper()]
        except KeyError:
            return process.extractOne(query, self.stations.values())[0] 
Example #15
Source File: music.py    From geemusic with GNU General Public License v3.0 5 votes vote down vote up
def get_song(self, name, artist_name=None, album_name=None):
        if self.use_store:
            if artist_name:
                name = "%s %s" % (artist_name, name)
            elif album_name:
                name = "%s %s" % (album_name, name)

            search = self._search("song", name)

            if len(search) == 0:
                return False

            if album_name:
                for i in range(0, len(search) - 1):
                    if album_name in search[i]['album']:
                        return search[i]
            return search[0]
        else:
            search = {}
            if not name:
                return False
            if artist_name:
                artist_name, score = process.extractOne(artist_name, self.artists)
                if score < 70:
                    return False
            if album_name:
                album_name, score = process.extractOne(album_name, self.albums)
                if score < 70:
                    return False
            possible_songs = {song_id: song['title'] for song_id, song in self.library.items() if (not artist_name or ('artist' in song and song['artist'].lower() == artist_name.lower())) and (not album_name or ('album' in song and song['album'].lower() == album_name.lower()))}
            song, score, song_id = process.extractOne(name.lower(), possible_songs)
            if score < 70:
                return False
            else:
                return self.library[song_id] 
Example #16
Source File: music.py    From geemusic with GNU General Public License v3.0 5 votes vote down vote up
def get_album(self, name, artist_name=None):
        if self.use_store:
            if artist_name:
                name = "%s %s" % (name, artist_name)

            search = self._search("album", name)

            if len(search) == 0:
                return False

            return self._api.get_album_info(search[0]['albumId'])
        else:
            search = {}
            search['tracks'] = []
            if artist_name:
                artist_name, score = process.extractOne(artist_name, self.artists)
                if score < 70:
                    return False
            name, score = process.extractOne(name, self.albums)
            if score < 70:
                return False
            for song_id, song in self.library.items():
                if 'album' in song and song['album'].lower() == name.lower():
                    if not artist_name or ('artist' in song and song['artist'].lower() == artist_name.lower()):
                        if not search['tracks']:  # First entry
                            search['albumArtist'] = song['albumArtist']
                            search['name'] = song['album']
                            try:
                                search['albumId'] = song['albumId']
                            except KeyError:
                                pass

                        search['tracks'].append(song)
            if not search['tracks']:
                return False

            return search 
Example #17
Source File: music.py    From geemusic with GNU General Public License v3.0 5 votes vote down vote up
def get_artist(self, name):
        """
        Fetches information about an artist given its name
        """
        if self.use_store:
            search = self._search("artist", name)

            if len(search) == 0:
                return False

            return self._api.get_artist_info(search[0]['artistId'],
                                             max_top_tracks=100)
        else:
            search = {}
            search['topTracks'] = []
            # Find the best artist we have, and then match songs to that artist
            likely_artist, score = process.extractOne(name, self.artists)
            if score < 70:
                return False
            for song_id, song in self.library.items():
                if 'artist' in song and song['artist'].lower() == likely_artist.lower() and 'artistId' in song:
                    if not search['topTracks']:  # First entry
                        # Copy artist details from the first song into the general artist response
                        try:
                            search['artistArtRef'] = song['artistArtRef'][0]['url']
                        except KeyError:
                            pass
                        search['name'] = song['artist']
                        search['artistId'] = song['artistId']
                    search['topTracks'].append(song)
            random.shuffle(search['topTracks'])  # This is all music, not top, but the user probably would prefer it shuffled.
            if not search['topTracks']:
                return False

            return search 
Example #18
Source File: results.py    From marvin with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def merge_tables(self, tables, direction='vert', **kwargs):
        ''' Merges a list of Astropy tables of results together

        Combines two Astropy tables using either the Astropy
        vstack or hstack method.  vstack refers to vertical stacking of table rows.
        hstack refers to horizonal stacking of table columns.  hstack assumes the rows in each
        table refer to the same object.  Buyer beware: stacking tables without proper understanding
        of your rows and columns may results in deleterious results.

        merge_tables also accepts all keyword arguments that Astropy vstack and hstack method do.
        See `vstack <http://docs.astropy.org/en/stable/table/operations.html#stack-vertically>`_
        See `hstack <http://docs.astropy.org/en/stable/table/operations.html#stack-horizontally>`_

        Parameters:
            tables (list):
                A list of Astropy Table objects.  Required.
            direction (str):
                The direction of the table stacking, either vertical ('vert') or horizontal ('hor').
                Default is 'vert'.  Direction string can be fuzzy.

        Returns:
            A new Astropy table that is the stacked combination of all input tables

        Example:
            >>> # query 1
            >>> q, r = doQuery(search_filter='nsa.z < 0.1', returnparams=['g_r', 'cube.ra', 'cube.dec'])
            >>> # query 2
            >>> q2, r2 = doQuery(search_filter='nsa.z < 0.1')
            >>>
            >>> # convert to tables
            >>> table_1 = r.toTable()
            >>> table_2 = r2.toTable()
            >>> tables = [table_1, table_2]
            >>>
            >>> # vertical (row) stacking
            >>> r.merge_tables(tables, direction='vert')
            >>> # horizontal (column) stacking
            >>> r.merge_tables(tables, direction='hor')

        '''
        choices = ['vertical', 'horizontal']
        stackdir, score = process.extractOne(direction, choices)
        if stackdir == 'vertical':
            return vstack(tables, **kwargs)
        elif stackdir == 'horizontal':
            return hstack(tables, **kwargs) 
Example #19
Source File: outputcvgate.py    From eegsynth with GNU General Public License v3.0 4 votes vote down vote up
def _start():
    '''Start the module
    This uses the global variables from setup and adds a set of global variables
    '''
    global parser, args, config, r, response, patch, name
    global monitor, duration_scale, duration_offset, serialdevice, s, lock, trigger, chanindx, chanstr, redischannel, thread

    # this can be used to show parameters that have changed
    monitor = EEGsynth.monitor(name=name, debug=patch.getint('general', 'debug'))

    # values between 0 and 1 work well for the duration
    duration_scale = patch.getfloat('duration', 'scale', default=1)
    duration_offset = patch.getfloat('duration', 'offset', default=0)

    # get the specified serial device, or the one that is the closest match
    serialdevice = patch.getstring('serial', 'device')
    serialdevice = EEGsynth.trimquotes(serialdevice)
    serialdevice = process.extractOne(serialdevice, [comport.device for comport in serial.tools.list_ports.comports()])[0] # select the closest match

    try:
        s = serial.Serial(serialdevice, patch.getint('serial', 'baudrate'), timeout=3.0)
        monitor.success("Connected to serial port")
    except:
        raise RuntimeError("cannot connect to serial port")

    # this is to prevent two triggers from being activated at the same time
    lock = threading.Lock()

    trigger = []
    # configure the trigger threads for the control voltages
    for chanindx in range(1, 5):
        chanstr = "cv%d" % chanindx
        if patch.hasitem('trigger', chanstr):
            redischannel = patch.getstring('trigger', chanstr)
            trigger.append(TriggerThread(redischannel, chanindx, chanstr))
            monitor.info("configured " + redischannel + " on " + str(chanindx))
    # configure the trigger threads for the gates
    for chanindx in range(1, 5):
        chanstr = "gate%d" % chanindx
        if patch.hasitem('trigger', chanstr):
            redischannel = patch.getstring('trigger', chanstr)
            trigger.append(TriggerThread(redischannel, chanindx, chanstr))
            monitor.info("configured " + redischannel + " on " + str(chanindx))

    # start the thread for each of the triggers
    for thread in trigger:
        thread.start()

    # there should not be any local variables in this function, they should all be global
    if len(locals()):
        print('LOCALS: ' + ', '.join(locals().keys())) 
Example #20
Source File: endorphines.py    From eegsynth with GNU General Public License v3.0 4 votes vote down vote up
def _start():
    """Start the module
    This uses the global variables from setup and adds a set of global variables
    """
    global parser, args, config, r, response, patch, name
    global monitor, debug, mididevice, outputport, lock, trigger, port, channel, previous_val, previous_port_val

    # this can be used to show parameters that have changed
    monitor = EEGsynth.monitor(name=name, debug=patch.getint('general', 'debug'))

    # get the options from the configuration file
    debug = patch.getint('general', 'debug')
    mididevice = patch.getstring('midi', 'device')
    mididevice = EEGsynth.trimquotes(mididevice)
    mididevice = process.extractOne(mididevice, mido.get_output_names())[0]  # select the closest match

    # this is only for debugging, check which MIDI devices are accessible
    monitor.info('------ OUTPUT ------')
    for port in mido.get_output_names():
        monitor.info(port)
    monitor.info('-------------------------')

    try:
        outputport = mido.open_output(mididevice)
        monitor.success('Connected to MIDI output')
    except:
        raise RuntimeError("cannot connect to MIDI output")

    # this is to prevent two messages from being sent at the same time
    lock = threading.Lock()

    # each of the gates that can be triggered is mapped onto a different message
    trigger = []
    for channel in range(0, 16):

        # channels are one-offset in the ini file, zero-offset in the code
        name = 'channel{}'.format(channel + 1)
        if config.has_option('gate', name):

            # start the background thread that deals with this channel
            this = TriggerThread(patch.getstring('gate', name), channel)
            trigger.append(this)
            monitor.debug(name + ' trigger configured')

    # start the thread for each of the notes
    for thread in trigger:
        thread.start()

    # control values are only relevant when different from the previous value
    previous_val = {}
    previous_port_val = {}
    for channel in range(0, 16):
        name = 'channel{}'.format(channel + 1)
        previous_val[name] = None
        previous_port_val[name] = None

    # there should not be any local variables in this function, they should all be global
    if len(locals()):
        print("LOCALS: " + ", ".join(locals().keys())) 
Example #21
Source File: outputdmx.py    From eegsynth with GNU General Public License v3.0 4 votes vote down vote up
def _start():
    '''Start the module
    This uses the global variables from setup and adds a set of global variables
    '''
    global parser, args, config, r, response, patch, name
    global monitor, debug, serialdevice, s, dmxsize, chanlist, chanvals, chanindx, chanstr, dmxframe, prevtime, START_VAL, END_VAL, TX_DMX_PACKET, FRAME_PAD

    # this can be used to show parameters that have changed
    monitor = EEGsynth.monitor(name=name, debug=patch.getint('general', 'debug'))

    # get the options from the configuration file
    debug = patch.getint('general', 'debug')

    # get the specified serial device, or the one that is the closest match
    serialdevice = patch.getstring('serial', 'device')
    serialdevice = EEGsynth.trimquotes(serialdevice)
    serialdevice = process.extractOne(serialdevice, [comport.device for comport in serial.tools.list_ports.comports()])[0]  # select the closest match

    try:
        s = serial.Serial(serialdevice, patch.getint('serial', 'baudrate'), timeout=3.0)
        monitor.info("Connected to serial port")
    except:
        raise RuntimeError("cannot connect to serial port")

    # determine the size of the universe
    dmxsize = 0
    chanlist, chanvals = list(map(list, list(zip(*config.items('input')))))
    for chanindx in range(0, 512):
        chanstr = "channel%03d" % (chanindx + 1)
        if chanstr in chanlist:
            # the last channel determines the size
            dmxsize = chanindx + 1

    # my fixture won't work if the frame size is too small
    dmxsize = max(dmxsize, 16)
    monitor.info("universe size = %d" % dmxsize)

    # make an empty frame
    dmxframe = [0] * dmxsize
    # blank out
    sendframe(s, dmxframe)

    # keep a timer to send a packet every now and then
    prevtime = time.time()

    # there should not be any local variables in this function, they should all be global
    if len(locals()):
        print('LOCALS: ' + ', '.join(locals().keys()))