Python codecs.open() Examples

The following are 30 code examples for showing how to use codecs.open(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module codecs , or try the search function .

Example 1
Project: Financial-NLP   Author: Coldog2333   File: NLP.py    License: Apache License 2.0 7 votes vote down vote up
def txt2sentence(self, filename):    
        """
        read a <cut_file> and return an iterator sentences
        (that is a list of some lists, and the second 'list' is a list of words ).
        """
        sentences=[]
        try:
            fp=open(filename,'r',encoding='utf-8')
            lines=fp.readlines()
        except:
            fp=open(filename,'r',encoding='gbk')
            lines=fp.readlines()

        for line in lines:
            line = line.strip()
            if len(line)<=1:
                continue
            line=line.replace('\n','').replace('\r','').split(' ')
            sentences.append(line)
        return sentences 
Example 2
Project: L.E.S.M.A   Author: NatanaelAntonioli   File: setup.py    License: Apache License 2.0 7 votes vote down vote up
def find_version(*file_paths):
    # Open in Latin-1 so that we avoid encoding errors.
    # Use codecs.open for Python 2 compatibility
    try:
        f = codecs.open(os.path.join(here, *file_paths), 'r', 'latin1')
        version_file = f.read()
        f.close()
    except:
        raise RuntimeError("Unable to find version string.")

    # The version line must have the form
    # __version__ = 'ver'
    version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
                              version_file, re.M)
    if version_match:
        return version_match.group(1)
    raise RuntimeError("Unable to find version string.")


# Get the long description from the relevant file 
Example 3
Project: Financial-NLP   Author: Coldog2333   File: NLP.py    License: Apache License 2.0 6 votes vote down vote up
def loadWordNet(self):
        """
        load zh_wordnet into the object.
        将cow-not-full文件中的数据集整合成set
        """
        f = codecs.open(self.wordnet_txt, "rb", "utf-8")
        self.known = dict()
        #self.known = set()
        for l in f:
            if l.startswith('\ufeff#') or not l.strip():
                continue
            row = l.strip().split("\t")
            (synset,lemma)=row
            #if len(row) == 2:
            #    (synset, lemma) = row 
            #elif len(row) == 3:
            #    (synset, lemma, status) = row #根本就没有三个东西的项
            #else:
            #    print("illformed line: ", l.strip())
            #if not (synset.strip(), lemma.strip()) in self.known:
            #    self.known.add((synset.strip(), lemma.strip()))
            if not lemma.strip() in self.known.keys():
                self.known[lemma.strip()]=[]
            self.known[lemma.strip()].append(synset) 
Example 4
Project: Att-ChemdNER   Author: lingluodlut   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def get_perf(filename):
    ''' run conlleval.pl perl script to obtain
    precision/recall and F1 score '''
    _conlleval = PREFIX + 'conlleval'
    if not isfile(_conlleval):
        #download('http://www-etud.iro.umontreal.ca/~mesnilgr/atis/conlleval.pl') 
        os.system('wget https://www.comp.nus.edu.sg/%7Ekanmy/courses/practicalNLP_2008/packages/conlleval.pl')
        chmod('conlleval.pl', stat.S_IRWXU) # give the execute permissions
    
    out = []
    proc = subprocess.Popen(["perl", _conlleval], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
    stdout, _ = proc.communicate(open(filename).read())
    for line in stdout.split('\n'):
        if 'accuracy' in line:
            out = line.split()
            break
    
    # out = ['accuracy:', '16.26%;', 'precision:', '0.00%;', 'recall:', '0.00%;', 'FB1:', '0.00']
    precision = float(out[3][:-2])
    recall    = float(out[5][:-2])
    f1score   = float(out[7])

    return {'p':precision, 'r':recall, 'f1':f1score} 
Example 5
Project: Att-ChemdNER   Author: lingluodlut   File: model.py    License: Apache License 2.0 6 votes vote down vote up
def save_mappings(self, id_to_word, id_to_char, id_to_tag):
#{{{
        """
        We need to save the mappings if we want to use the model later.
        """
        self.id_to_word = id_to_word
        self.id_to_char = id_to_char
        self.id_to_tag = id_to_tag
        with open(self.mappings_path, 'wb') as f:
            mappings = {
                'id_to_word': self.id_to_word,
                'id_to_char': self.id_to_char,
                'id_to_tag': self.id_to_tag,
            }
            cPickle.dump(mappings, f)
#}}} 
Example 6
Project: Att-ChemdNER   Author: lingluodlut   File: loader.py    License: Apache License 2.0 6 votes vote down vote up
def load_sentences(path, lower, zeros):
#{{{
    """
    Load sentences. A line must contain at least a word and its tag.
    Sentences are separated by empty lines.
    """
    sentences = []
    sentence = []
    for line in codecs.open(path, 'r', 'utf8'):
        line = zero_digits(line.rstrip()) if zeros else line.rstrip()
        if not line:
            if len(sentence) > 0:
                if 'DOCSTART' not in sentence[0][0]:
                    sentences.append(sentence)
                sentence = []
        else:
            word = line.split()
            assert len(word) >= 2
            sentence.append(word)
    if len(sentence) > 0:
        if 'DOCSTART' not in sentence[0][0]:
            sentences.append(sentence)
    return sentences
#}}} 
Example 7
Project: ciocheck   Author: ContinuumIO   File: formatters.py    License: MIT License 6 votes vote down vote up
def _add_missing_init_py(self, paths):
        """Add missing __init__.py files in the module subdirectories."""
        results = []
        folders = [os.path.dirname(p) for p in paths]

        # Avoid adding an init on repo level if setup.py or other script on the
        # top level has changed
        if self.cmd_root in folders:
            folders.remove(self.cmd_root)

        for folder in folders:
            init_py = os.path.join(folder, "__init__.py")
            exists = os.path.exists(init_py)
            if not exists:
                with codecs.open(init_py, 'w', 'utf-8') as handle:
                    handle.flush()
                result = {
                    'path': init_py,
                    'created': not exists,
                    'diff': diff('', ''),
                    'error': None,
                }
                results.append(result)
        return results 
Example 8
Project: text-rank   Author: ouprince   File: Segmentation.py    License: MIT License 6 votes vote down vote up
def __init__(self, stop_words_file = None, allow_speech_tags = util.allow_speech_tags):
        """
        Keyword arguments:
        stop_words_file    -- 保存停止词的文件路径,utf8编码,每行一个停止词。若不是str类型,则使用默认的停止词
        allow_speech_tags  -- 词性列表,用于过滤
        """     
        
        allow_speech_tags = [util.as_text(item) for item in allow_speech_tags]

        self.default_speech_tag_filter = allow_speech_tags
        self.stop_words = set()
        self.stop_words_file = get_default_stop_words_file()
        if type(stop_words_file) is str:
            self.stop_words_file = stop_words_file
        for word in codecs.open(self.stop_words_file, 'r', 'utf-8', 'ignore'):
            self.stop_words.add(word.strip()) 
Example 9
Project: open-sesame   Author: swabhs   File: preprocess.py    License: Apache License 2.0 6 votes vote down vote up
def write_to_conll(outf, fsp, firstex, sentid):
    mode = "a"
    if firstex:
        mode = "w"

    with codecs.open(outf, mode, "utf-8") as outf:
        for i in xrange(fsp.sent.size()):
            token, postag, nltkpostag, nltklemma, lu, frm, role = fsp.info_at_idx(i)

            outf.write(str(i+1) + "\t") # ID = 0
            outf.write(token.encode('utf-8') + "\t") # FORM = 1
            outf.write("_\t" + nltklemma + "\t") # LEMMA PLEMMA = 2,3
            outf.write(postag + "\t" + nltkpostag + "\t") # POS PPOS = 4,5
            outf.write(str(sentid-1) + "\t_\t") # FEAT PFEAT = 6,7 ~ replacing FEAT with sentence number
            outf.write("_\t_\t") # HEAD PHEAD = 8,9
            outf.write("_\t_\t") # DEPREL PDEPREL = 10,11
            outf.write(lu + "\t" + frm + "\t") # FILLPRED PRED = 12,13
            outf.write(role + "\n") #APREDS = 14

        outf.write("\n") # end of sentence
        outf.close() 
Example 10
def load_data_and_labels():
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
    # download dataset
    get_chinese_text()

    # Load data from files
    positive_examples = list(codecs.open("./data/pos.txt", "r", "utf-8").readlines())
    positive_examples = [s.strip() for s in positive_examples]
    positive_examples = [pe for pe in positive_examples if len(pe) < 100]
    negative_examples = list(codecs.open("./data/neg.txt", "r", "utf-8").readlines())
    negative_examples = [s.strip() for s in negative_examples]
    negative_examples = [ne for ne in negative_examples if len(ne) < 100]
    # Split by words
    x_text = positive_examples + negative_examples
    # x_text = [clean_str(sent) for sent in x_text]
    x_text = [list(s) for s in x_text]

    # Generate labels
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    return [x_text, y] 
Example 11
Project: CyberTK-Self   Author: CyberTKR   File: Self.py    License: GNU General Public License v2.0 6 votes vote down vote up
def sendImage(self, to_, path):
      M = Message(to=to_,contentType = 1)
      M.contentMetadata = None
      M.contentPreview = None
      M_id = self.Talk.client.sendMessage(0,M).id
      files = {
         'file': open(path, 'rb'),
      }
      params = {
         'name': 'media',
         'oid': M_id,
         'size': len(open(path, 'rb').read()),
         'type': 'image',
         'ver': '1.0',
      }
      data = {
         'params': json.dumps(params)
      }
      r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
      if r.status_code != 201:
         raise Exception('Upload image failure.')
      return True 
Example 12
Project: CyberTK-Self   Author: CyberTKR   File: Self.py    License: GNU General Public License v2.0 6 votes vote down vote up
def sendAudio(self, to_, path):
        M = Message(to=to_, text=None, contentType = 3)
        M_id = self.Talk.client.sendMessage(0,M).id
        files = {
            'file': open(path, 'rb'),
        }
        params = {
            'name': 'media',
            'oid': M_id,
            'size': len(open(path, 'rb').read()),
            'type': 'audio',
            'ver': '1.0',
        }
        data = {
            'params': json.dumps(params)            
        }       

        r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
        print r
        if r.status_code != 201:
            raise Exception('Upload audio failure.') 
Example 13
Project: CyberTK-Self   Author: CyberTKR   File: Self.py    License: GNU General Public License v2.0 6 votes vote down vote up
def sendVoice(self, to_, path):
        M = Message(to=to_, text=None, contentType = 3)
        M.contentPreview = None
        M_id = self._client.sendMessage(0,M).id
        files = {
            'file': open(path, 'rb'),
        }
        params = {
            'name': 'voice_message',
            'oid': M_id,
            'size': len(open(path, 'rb').read()),
            'type': 'audio',
            'ver': '1.0',
        }
        data = {
            'params': json.dumps(params)
        }
        r = self.post_content('https://os.line.naver.jp/talk/m/upload.nhn', data=data, files=files)
        if r.status_code != 201:
            raise Exception('Upload voice failure.')
        return True 
Example 14
Project: DOTA_models   Author: ringringyi   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def bod2darknet(subpath, label, extractclassname):
    labelpath = os.path.join(subpath, label)
    filelist = GetFileFromThisRootDir(labelpath)
    outpath = r'/home/dj/data/bod-subset/labels'
    for fullname in filelist:
        objects = parse_bod_poly(fullname)
        name = os.path.splitext(os.path.basename(fullname))[0]
        with open(os.path.join(outpath, name + '.txt'), 'w') as f_out:
            for obj in objects:
                poly = obj['poly']
                bbox = np.array(dots4ToRecC(poly)) / 1024
                if (sum(bbox <= 0) + sum(bbox >= 1)) >= 1:
                    continue
                if (obj['name'] in extractclassname):
                    id = extractclassname.index(obj['name'])
                else:
                    continue
                outline = str(id) + ' ' + ' '.join(list(map(str, bbox)))
                f_out.write(outline + '\n') 
Example 15
Project: DOTA_models   Author: ringringyi   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def bodpolyToRec(self, label):
        Recpath = os.path.join(self.basepath, r'ReclabelTxt')
        for basename in self.namelist:
#            objects = parse_bod_poly(os.path.join(self.labelpath, basename + '.txt'))
            objects = parse_bod_poly(os.path.join(self.basepath, label, basename + '.txt'))
            f_out = codecs.open(os.path.join(Recpath, basename + '.txt'), 'w', 'utf_16')
            for obj in objects:
                bbox = dots4ToRec8(obj['poly'])
                name = obj['name']
                difficult = obj['difficult']
                bbox = list(map(str, bbox))
                outline = ' '.join(bbox)
                outline = outline + ' ' + name
                if difficult:
                    outline = outline + ' ' + str(difficult)
                f_out.write(outline + '\n') 
Example 16
Project: DOTA_models   Author: ringringyi   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def TransTo15Word_gt(self):
        dstpath = r'wordlabel'
        self.ParseTxtAndWrite(self.labelpath, dstpath, datamap_15)
    # def TransTo15class(self, path):
    #     filelist = GetFileFromThisRootDir(self.labelpath)
    #     for fullname in filelist:
    #         objects = parse_bod_poly2(fullname)
    #         name = mybasename(fullname)
    #         outname = os.path.join(self.basepath, path, name + '.txt')
    #         f_out = codecs.open(outname, 'w', 'utf_16')
    #
    #         for obj in objects:
    #             if obj['name'] in classname_15:
    #                 if path == 'wordlabel':
    #                     outline = ' '.join(map(str, obj['poly'])) + ' ' + datamap_15[obj['name']] + ' ' + str(obj['difficult'])
    #                     print('outline:', outline)
    #                     #f_out.write(outline + '\n')
    #                 elif path == 'label15Txt':
    #                     outline = ' '.join(map(str, obj['poly'])) + ' ' + obj['name'] + ' ' + str(obj['difficult'])
    #                     print('outline:', outline)
    #                     f_out.write(outline + '\n') 
Example 17
Project: DOTA_models   Author: ringringyi   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def wordlabel2dark(self):
        filelist = GetFileFromThisRootDir(self.wordlabelpath)
        #print(filelist)
        for fullname in filelist:
            objects = parse_bod_poly(fullname)
            name = mybasename(fullname)
            with open(os.path.join(self.darkpath, name + '.txt'), 'w') as f_out:
                for obj in objects:
                    poly = obj['poly']
                    bbox = np.array(dots4ToRecC(poly)) / 1024
                    ## note: the box is x_center, y_center, w, h, that means the whole box can be out of border
                    if (str(obj['difficult']) == '1'):
                        continue
                    if (sum(bbox <= 0) + sum(bbox >= 1)) >= 1:
                        continue
                    if (obj['name'] in wordname_15):
                        id = wordname_15.index(obj['name'])
                    else:
                        continue
                    outline = str(id) + ' ' + ' '.join(list(map(str, bbox)))
                    f_out.write(outline + '\n') 
Example 18
Project: DOTA_models   Author: ringringyi   File: utils.py    License: Apache License 2.0 6 votes vote down vote up
def bodpolyToRec(srcpath, dstpath):
    #dstpath = os.path.join(r'E:\bod-dataset\patches\subcategorylabel\results\ReclabelTxt')
    filelist = GetFileFromThisRootDir(srcpath)
    namelist = [mybasename(x.strip()) for x in filelist]
    for basename in namelist:
#            objects = parse_bod_poly(os.path.join(self.labelpath, basename + '.txt'))
        objects = parse_bod_poly(os.path.join(srcpath,basename + '.txt'))
        f_out = codecs.open(os.path.join(dstpath, basename + '.txt'), 'w', 'utf_16')
        for obj in objects:
            bbox = dots4ToRec8(obj['poly'])
            name = obj['name']
            difficult = obj['difficult']
            bbox = list(map(str, bbox))
            outline = ' '.join(bbox)
            outline = outline + ' ' + name
            if difficult:
                outline = outline + ' ' + str(difficult)
            f_out.write(outline + '\n') 
Example 19
def getreview(domain, cityid, activity, reviewid, timeout, maxretries, basepath, force, pause):
    baseurl = 'http://www.tripadvisor.' + domain + '/ShowUserReviews-g'
    reviewurl = '%s%s-d%s-r%s' % (baseurl, cityid, activity, reviewid)

    path = os.sep.join((basepath, domain, str(cityid), str(activity)))
    filename = os.sep.join((path, str(reviewid) + '.html'))
    if force or not os.path.exists(filename):
        htmlpage = download_page(reviewurl, maxretries, timeout, pause)

        if htmlpage is None:
            print('Error downloading the review URL: ' + reviewurl)
        else:
            if not os.path.exists(path):
                os.makedirs(path)

            with codecs.open(filename, mode='w', encoding='utf8') as file:
                file.write(htmlpage.decode('utf-8')) 
Example 20
Project: keras-gpt-2   Author: CyberZHG   File: bpe.py    License: MIT License 6 votes vote down vote up
def get_bpe_from_files(encoder_path, vocab_path):
    """Get initialized BPE.

    :param encoder_path: Path to 'encoder.json'.
    :param vocab_path: Path to 'vocab.bpe'
    :return: The object from encode and decode strings.
    """
    with codecs.open(encoder_path, 'r', 'utf8') as reader:
        token_dict = json.load(reader)
    bpe_rank = {}
    with codecs.open(vocab_path, 'r', 'utf8') as reader:
        reader.readline()
        for rank, line in enumerate(reader):
            line = line.strip()
            if line:
                bpe_rank[tuple(line.split())] = rank
    return BytePairEncoding(token_dict, bpe_rank) 
Example 21
Project: lyrebird-api-coverage   Author: Meituan-Dianping   File: load_base.py    License: MIT License 6 votes vote down vote up
def auto_load_base():
    lyrebird_conf = lyrebird.context.application.conf
    # 读取指定base文件,写入到base.json
    if lyrebird_conf.get('hunter.base'):
        base_path = lyrebird_conf.get('hunter.base')
        base = codecs.open(base_path, 'r', 'utf-8').read()
        f = codecs.open(DEFAULT_BASE, 'w', 'utf-8')
        f.write(base)
        f.close()
        app_context.base_sha1 = get_file_sha1(DEFAULT_BASE)
        return json.loads(base)
    # 通过本地默认base文件获取base
    elif not os.path.exists(DEFAULT_BASE):
        copy_file(DEFAULT_BASE)
    with codecs.open(DEFAULT_BASE, 'r', 'utf-8') as f:
        json_obj = json.load(f)
        app_context.base_sha1 = get_file_sha1(DEFAULT_BASE)
        return json_obj 
Example 22
Project: mutatest   Author: EvanKepner   File: conf.py    License: MIT License 5 votes vote down vote up
def read(*parts):
    """
    Build an absolute path from *parts* and and return the contents of the
    resulting file.  Assume UTF-8 encoding.
    """
    with codecs.open(os.path.join(HERE, *parts), "rb", "utf-8") as f:
        return f.read() 
Example 23
Project: mutatest   Author: EvanKepner   File: setup.py    License: MIT License 5 votes vote down vote up
def read(*parts):
    """
    Build an absolute path from *parts* and and return the contents of the
    resulting file.  Assume UTF-8 encoding.
    """
    with codecs.open(os.path.join(HERE, *parts), "rb", "utf-8") as f:
        return f.read() 
Example 24
Project: Financial-NLP   Author: Coldog2333   File: NLP.py    License: Apache License 2.0 5 votes vote down vote up
def loadstopwords(self):
        """
        load stopwords into the object.
        """
        self.stop_words=list()
        stop_f=open(self.stopwords_txt,'r',encoding='utf-8')
        for line in stop_f.readlines():
            line=line.strip()
            if not len(line):
                continue
            self.stop_words.append(line)
        stop_f.close() 
Example 25
Project: Financial-NLP   Author: Coldog2333   File: NLP.py    License: Apache License 2.0 5 votes vote down vote up
def txt2wordbag(self, origin_file, cutflag=False, remove_stopwords=True): #testing
        """
        please remember to set a corresponding processing file.
        """
        if origin_file.split('.')[0][-3:]!='cut':
            cut_file=self.cut(origin_file, remove_stopwords=True, swith_to_newtxt=True)
        else:
            cut_file=origin_file
    
        try:
            fp=open(cut_file,'r',encoding='utf-8')
            rawtxt=fp.read()
        except:
            fp=open(cut_file,'r',encoding='gbk')
            rawtxt=fp.read()
        words_list=rawtxt.split(' ')
        new_words_list=[]
        for word in words_list:
            if word=='' or (ord(word[0])<1024):
                continue
            else:
                new_words_list.append(word)
        if new_words_list=='\u3000':
            return new_words_list[1:]
        else:
            return new_words_list 
Example 26
Project: Att-ChemdNER   Author: lingluodlut   File: tagger.py    License: Apache License 2.0 5 votes vote down vote up
def load_sentences(path):
    sentences = []
    for line in codecs.open(path, 'r', 'utf8'):
        sentence =[];
        line = line.rstrip()
        if line:
            word = line.split()
            for elem in word:
                sentence.append([elem]);
            sentences.append(sentence)
    return sentences 
Example 27
Project: Att-ChemdNER   Author: lingluodlut   File: utils.py    License: Apache License 2.0 5 votes vote down vote up
def findNotSame(fNameX,fNameY):
#{{{
    """
    verify two file is same or not 
    """
    space='space';
    def loadFile(fName):
        word=[];
        import codecs;
        for line in codecs.open(fName,'r','utf8'):
            line=line.rstrip();
            if len(line)>0:
                word.append(line[0]);
            else:
                word.append(space);
        return word;
    word1=loadFile(fNameX);
    word2=loadFile(fNameY);
    i=0;
    j=0;
    while i<len(word1) and j<len(word2):
        if word1[i]==word2[j]:
            i+=1;
            j+=1;
            continue;
        elif word1[i] ==space:
            i+=1;
        elif word2[j]==space:
            j+=1;
        else:
            print "not same,X:",word1[i],",line:",i,',Y:',word2[j],',line:',j;
            break;
#}}} 
Example 28
Project: Att-ChemdNER   Author: lingluodlut   File: utils.py    License: Apache License 2.0 5 votes vote down vote up
def evaluate(parameters, f_eval, raw_sentences, parsed_sentences,
             id_to_tag, dictionary_tags,filename,
             useAttend=True):
#{{{
    """
    Evaluate current model using CoNLL script.
    """
    n_tags = len(id_to_tag)
    predictions = []
    count = np.zeros((n_tags, n_tags), dtype=np.int32)

    for raw_sentence, data in zip(raw_sentences, parsed_sentences):
        input = create_input(data, parameters, False,useAttend=useAttend)
        if parameters['crf']:
            y_preds = np.array(f_eval(*input))
        else:
            y_preds = f_eval(*input).argmax(axis=1)
        y_reals = np.array(data['tags']).astype(np.int32)
        assert len(y_preds) == len(y_reals)
        p_tags = [id_to_tag[y_pred] for y_pred in y_preds]
        r_tags = [id_to_tag[y_real] for y_real in y_reals]
        if parameters['tag_scheme'] == 'iobes':
            p_tags = iobes_iob(p_tags)
            r_tags = iobes_iob(r_tags)
        for i, (y_pred, y_real) in enumerate(zip(y_preds, y_reals)):
            new_line = " ".join(raw_sentence[i][:-1] + [r_tags[i], p_tags[i]])
            predictions.append(new_line)
            count[y_real, y_pred] += 1
        predictions.append("")
    #write to file 
    with codecs.open(filename, 'w', 'utf8') as f:
        f.write("\n".join(predictions))
    return get_perf(filename) 
#}}} 
Example 29
Project: Att-ChemdNER   Author: lingluodlut   File: Atten_tagger.py    License: Apache License 2.0 5 votes vote down vote up
def load_sentences(path):
    sentences = []
    for line in codecs.open(path, 'r', 'utf8'):
        sentence =[];
        line = line.rstrip()
        if line:
            word = line.split()
            for elem in word:
                sentence.append([elem]);
            sentences.append(sentence)
    return sentences 
Example 30
Project: Att-ChemdNER   Author: lingluodlut   File: model.py    License: Apache License 2.0 5 votes vote down vote up
def reload_mappings(self):
#{{{
        """
        Load mappings from disk.
        """
        with open(self.mappings_path, 'rb') as f:
            mappings = cPickle.load(f)
        self.id_to_word = mappings['id_to_word']
        self.id_to_char = mappings['id_to_char']
        self.id_to_tag = mappings['id_to_tag']
#}}}