Python string.punctuation() Examples

The following are 30 code examples for showing how to use string.punctuation(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module string , or try the search function .

Example 1
Project: vault   Author: gabfl   File: Encryption.py    License: MIT License 7 votes vote down vote up
def gen_salt(self, set_=True):
        """
            Generate a random salt
        """

        min_char = 8
        max_char = 12
        allchar = string.ascii_letters + string.punctuation + string.digits
        salt = "".join(choice(allchar)
                       for x in range(randint(min_char, max_char))).encode()

        # Set the salt in the same instance if required
        if set_:
            self.set_salt(salt)

        return salt 
Example 2
Project: Image-Caption-Generator   Author: dabasajay   File: preprocessing.py    License: MIT License 6 votes vote down vote up
def clean_captions(captions):
	# Prepare translation table for removing punctuation
	table = str.maketrans('', '', string.punctuation)
	for _, caption_list in captions.items():
		for i in range(len(caption_list)):
			caption = caption_list[i]
			# Tokenize i.e. split on white spaces
			caption = caption.split()
			# Convert to lowercase
			caption = [word.lower() for word in caption]
			# Remove punctuation from each token
			caption = [w.translate(table) for w in caption]
			# Remove hanging 's' and 'a'
			caption = [word for word in caption if len(word)>1]
			# Remove tokens with numbers in them
			caption = [word for word in caption if word.isalpha()]
			# Store as string
			caption_list[i] =  ' '.join(caption) 
Example 3
Project: BAMnet   Author: hugochan   File: generic_utils.py    License: Apache License 2.0 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove extra whitespace."""
    def remove_articles(text):
        return re_art.sub(' ', text)

    def remove_punc(text):
        return re_punc.sub(' ', text)  # convert punctuation to spaces

    def white_space_fix(text):
        return ' '.join(text.split())

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 4
Project: ICDAR-2019-SROIE   Author: zzzDavid   File: my_utils.py    License: MIT License 6 votes vote down vote up
def random_string(n):
    if n == 0:
        return ""

    x = random.random()
    if x > 0.5:
        pad = " " * n
    elif x > 0.3:
        pad = "".join(random.choices(digits + " \t\n", k=n))
    elif x > 0.2:
        pad = "".join(random.choices(ascii_uppercase + " \t\n", k=n))
    elif x > 0.1:
        pad = "".join(random.choices(ascii_uppercase + digits + " \t\n", k=n))
    else:
        pad = "".join(
            random.choices(ascii_uppercase + digits + punctuation + " \t\n", k=n)
        )

    return pad 
Example 5
Project: edm   Author: Wluper   File: data_structures.py    License: GNU General Public License v2.0 6 votes vote down vote up
def tokenize_sentence(sentence):
    """
    Splits a sentence into words, strips punctuation and turns it to lowercase.

    :param sentence           : the sentence to tokenize.
    :type sentence            : str

    :return                   : list of words
    """

    # Get rid of non-ascii characters to avoid errors with unrecognised characters
    sentence = "".join([c for c in sentence if 0 < ord(c) < 127])

    sentence = sentence.encode("ascii", errors="ignore").decode()

    # Only works in Python 3
    sentenceNoPunctuation = sentence.translate(str.maketrans("", "", string.punctuation))

    sentenceLower         = sentenceNoPunctuation.lower()
    sentenceWords         = sentenceLower.split()

    return sentenceWords 
Example 6
Project: interpret-text   Author: interpretml   File: utils_classical.py    License: MIT License 6 votes vote down vote up
def __init__(
        self,
        parser,
        stop_words=spacy.lang.en.stop_words.STOP_WORDS,
        punctuations=string.punctuation,
    ):
        """Initialize the BOWTokenizer object.

        Arguments:
            parser {spacy.lang.en.English - by default} -- Any parser object
                that supports parser(sentence) call on it.

        Keyword Arguments:
            stop_words {iterable over str} -- Set of stop words to be removed.
            (default: {spacy.lang.en.stop_words.STOP_WORDS})
            punctuations {iterable over str} -- Set of punctuations to be
            removed. (default: {string.punctuation})
        """
        self.parser = parser
        # list of stop words and punctuation marks
        self.stop_words = stop_words
        self.punctuations = punctuations 
Example 7
Project: MnemonicReader   Author: HKUST-KnowComp   File: evaluate-v1.1.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 8
Project: MnemonicReader   Author: HKUST-KnowComp   File: utils.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 9
Project: timefhuman   Author: alvinwan   File: tokenize.py    License: Apache License 2.0 6 votes vote down vote up
def get_character_type(character):
    """
    >>> get_character_type('a')
    'alpha'
    >>> get_character_type('1')
    'numeric'
    >>> get_character_type('.')
    'punctuation'
    >>> get_character_type(' ')
    """
    if character.isalpha():
        return 'alpha'
    elif character.isnumeric():
        return 'numeric'
    elif character in string.punctuation:
        return 'punctuation'
    return None 
Example 10
Project: wanggeService   Author: pchaos   File: base.py    License: MIT License 6 votes vote down vote up
def getRandomStr(types='letter', length=8):
        """ 随机产生length长度的字符串

        :param types: 随机字符串的类型
        types in ['letter', 'ascii'] 返回包含字母的字符串
        types in ['digit', 'num']: 返回包含数字的字符串
        其他:返回混合字母和数字的字符串

        :param length: 返回字符串的长度
        :return: 长度为length,类型为types的字符串

        todo string.punctuation

        """
        import random
        import string
        if types in ['letter', 'ascii']:
            return ''.join(random.sample(string.ascii_letters, length))
        if types in ['digit', 'num']:
            return ''.join(random.sample(string.digits, length))
        else:
            return ''.join(random.sample(string.ascii_letters + string.digits, length)) 
Example 11
Project: BERT-for-Chinese-Question-Answering   Author: eva-n27   File: eval.py    License: Apache License 2.0 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation + zh.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 12
Project: mipsqa   Author: google   File: squad_data.py    License: Apache License 2.0 6 votes vote down vote up
def _normalize_answer(s):
  """Lower text and remove punctuation, articles and extra whitespace.

  Directly copied from official SQuAD eval script, SHOULD NOT BE MODIFIED.

  Args:
    s: Input text.
  Returns:
    Normalized text.
  """

  def remove_articles(text):
    return re.sub(r'\b(a|an|the)\b', ' ', text)

  def white_space_fix(text):
    return ' '.join(text.split())

  def remove_punc(text):
    exclude = set(string.punctuation)
    return ''.join(ch for ch in text if ch not in exclude)

  def lower(text):
    return text.lower()

  return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 13
Project: cs224n-win18-squad   Author: abisee   File: evaluate.py    License: Apache License 2.0 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 14
Project: indic_nlp_library   Author: anoopkunchukuttan   File: indic_tokenize.py    License: MIT License 6 votes vote down vote up
def trivial_tokenize_urdu(text): 
    """tokenize Urdu string 

    A trivial tokenizer which just tokenizes on the punctuation boundaries. 
    This also includes punctuations for the Urdu script.
    These punctuations characters were identified from the Unicode database 
    for Arabic script by looking for punctuation symbols.

    Args:
        text (str): text to tokenize

    Returns:
        list: list of tokens
    """
    tok_str=triv_tokenizer_urdu_pat.sub(r' \1 ',text.replace('\t',' '))
    return re.sub(r'[ ]+',' ',tok_str).strip(' ').split(' ') 
Example 15
Project: cdQA   Author: cdqa-suite   File: evaluation.py    License: Apache License 2.0 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""

    def remove_articles(text):
        return re.sub(r"\b(a|an|the)\b", " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 16
Project: OpenQA   Author: thunlp   File: utils.py    License: MIT License 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 17
Project: ReadableWebProxy   Author: fake-name   File: titleParseNew.py    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_preceeding_text(self, prefix_arr):
		intermediate = ""
		consumed = 0

		# print("Get preceeding text:", prefix_arr)
		for idx in range(len(prefix_arr)-1, 0-1, -1):
			if isinstance(prefix_arr[idx], TokenBase):
				# print("Get preceeding text returning:", (prefix_arr[:idx+1], None, intermediate))
				return prefix_arr[:idx+1], None, intermediate
			if all([char in string.punctuation+string.whitespace for char in prefix_arr[idx]]):
				intermediate = prefix_arr[idx] + intermediate
				consumed += 1
			else:
				# print("Get preceeding text returning:", (prefix_arr[:idx], prefix_arr[idx], intermediate))
				return prefix_arr[:idx], prefix_arr[idx], intermediate

		# print("get_preceeding_text", ([], None, intermediate))
		return [], None, intermediate 
Example 18
Project: justcopy-backend   Author: ailabstw   File: utils.py    License: MIT License 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 19
Project: pytorch_pretrained_BERT   Author: Meelfy   File: evaluate-v1.1.py    License: Apache License 2.0 6 votes vote down vote up
def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace."""
    def remove_articles(text):
        return re.sub(r'\b(a|an|the)\b', ' ', text)

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s)))) 
Example 20
Project: borgmatic   Author: witten   File: test_validate.py    License: GNU General Public License v3.0 6 votes vote down vote up
def test_parse_configuration_passes_through_quoted_punctuation():
    escaped_punctuation = string.punctuation.replace('\\', r'\\').replace('"', r'\"')

    mock_config_and_schema(
        '''
        location:
            source_directories:
                - /home

            repositories:
                - "{}.borg"
        '''.format(
            escaped_punctuation
        )
    )

    result = module.parse_configuration('config.yaml', 'schema.yaml')

    assert result == {
        'location': {
            'source_directories': ['/home'],
            'repositories': ['{}.borg'.format(string.punctuation)],
        }
    } 
Example 21
Project: galaxy-sdk-python   Author: XiaoMi   File: requestchecker.py    License: Apache License 2.0 6 votes vote down vote up
def isJavaIdentifierPart(c):
    if c in string.ascii_letters:
      return True
    if c in string.digits:
      return True
    if c in string.punctuation:
      return True
    if category(unicode(c)) == 'Sc':
      return True
    if category(unicode(c)) == 'Mn':
      return True
    if category(unicode(c)) == 'N1':
      return True
    if category(unicode(c)) == 'Mc':
      return False
    return False 
Example 22
Project: locality-sensitive-hashing   Author: singhj   File: strings_utils.py    License: MIT License 6 votes vote down vote up
def normalize(str):
    """
        Normalizes the string making string all lower case and removes all punctuation.
        :param str: string to be normalized
        :return: normalized string, if str is None or empty it returns the original string
    """

    if str:
        if isinstance(str, unicode):
            not_letters_or_digits = u'!"#%\'()*+,-./:;<=>?@[\]^_`{|}~'
            translate_to = u''
            translate_table = dict((ord(char), translate_to) for char in not_letters_or_digits)
            return str.translate(translate_table)
        else:
            return str.lower().translate(string.maketrans("",""), string.punctuation)
    else:
        return str 
Example 23
Project: news-corpus-builder   Author: skillachie   File: news_corpus_generator.py    License: MIT License 5 votes vote down vote up
def _remove_punctuations(self,title):
        # TODO optimize for python 3
        #title.translate(title.maketrans("",""), string.punctuation)
        return "".join(char for char in title if char not in string.punctuation) 
Example 24
Project: fine-lm   Author: akzaidi   File: wikisum.py    License: MIT License 5 votes vote down vote up
def _normalize_text(text):
  text = text.lower()
  # Space around punctuation
  text = re.sub("[%s]" % re.escape(string.punctuation), r" \g<0> ", text)
  text = re.sub(r"\s+", " ", text)
  text = text.strip()
  return text 
Example 25
Project: steppy-toolkit   Author: minerva-ml   File: text.py    License: MIT License 5 votes vote down vote up
def punctuation_count(x):
    return occurrence(x, string.punctuation) 
Example 26
Project: mercari-price-suggestion   Author: aerdem4   File: main.py    License: MIT License 5 votes vote down vote up
def preprocess(df):
    df["name_ori"] = df["name"].values
    df["item_description_ori"] = df["item_description"].values

    df["strange_char"] = df["name"].apply(lambda x: max([ord(c) for c in list(x)]) > 1000)
    df["item_description"] = df["item_description"].replace('No description yet', "missing2")

    translator = str.maketrans(string.punctuation, ' ' * len(string.punctuation))
    df["name"] = df["name"].apply(lambda x: normalize_text(translator, x))
    df["item_description"] = df["item_description"].apply(lambda x: normalize_text(translator, x))
    return df 
Example 27
Project: product-classifier   Author: georgetown-analytics   File: features.py    License: MIT License 5 votes vote down vote up
def __init__(self, stoplist=None, punct=None, lemmatizer=None):
        # Load stopwords, punctuation, and lemmatizer
        # This takes a bit of work, so we only want to do it once!
        self.stopwords   = stoplist or stopwords.words('english')
        self.punctuation = punct or string.punctuation
        self.lemmatizer  = lemmatizer or WordNetLemmatizer() 
Example 28
Project: product-classifier   Author: georgetown-analytics   File: features.py    License: MIT License 5 votes vote down vote up
def tokenize(self, text):
        """
        Returns a list of individual tokens from the text utilizing NLTK's
        tokenize built in utility (far better than split on space). It also
        removes any stopwords and punctuation from the text, as well as
        ensure that every token is normalized.

        For now, token = word as in bag of words (the feature we're using).
        """
        for token in wordpunct_tokenize(text):
            token = self.normalize(token)
            if token in self.punctuation: continue
            if token in self.stopwords: continue
            yield token 
Example 29
Project: razzy-spinner   Author: rafasashi   File: vader.py    License: GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, text):
        if not isinstance(text, str):
            text = str(text.encode('utf-8'))
        self.text = text
        self.words_and_emoticons = self._words_and_emoticons()
        # doesn't separate words from\
        # adjacent punctuation (keeps emoticons & contractions)
        self.is_cap_diff = allcap_differential(self.words_and_emoticons) 
Example 30
Project: razzy-spinner   Author: rafasashi   File: vader.py    License: GNU General Public License v3.0 5 votes vote down vote up
def _words_only(self):
        text_mod = REGEX_REMOVE_PUNCTUATION.sub('', self.text)
        # removes punctuation (but loses emoticons & contractions)
        words_only = text_mod.split()
        # get rid of empty items or single letter "words" like 'a' and 'I'
        words_only = [word for word in words_only if len(word) > 1]
        return words_only