Python collections.defaultdict() Examples
The following are 30 code examples for showing how to use collections.defaultdict(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
You may check out the related API usage on the sidebar.
You may also want to check out all available functions/classes of the module
collections
, or try the search function
.
Example 1
Project: svviz Author: svviz File: datahub.py License: MIT License | 6 votes |
def __init__(self): self.args = None self.alignDistance = 0 self.samples = collections.OrderedDict() self.genome = None self.sources = {} self.annotationSets = collections.OrderedDict() # for storing axes, annotations, etc, by allele self.alleleTracks = collections.defaultdict(collections.OrderedDict) self.trackCompositor = None self.dotplots = {} self.info = {} self.reset()
Example 2
Project: EDeN Author: fabriziocosta File: graph.py License: MIT License | 6 votes |
def _add_sparse_vector_labes(self, graph, vertex_v, node_feature_list): # add the vector with a feature resulting from hashing # the discrete labeled graph sparse encoding with the sparse vector # feature, the val is then multiplied. svec = graph.nodes[vertex_v].get(self.key_svec, None) if svec: vec_feature_list = defaultdict(lambda: defaultdict(float)) for radius_dist_key in node_feature_list: for feature in node_feature_list[radius_dist_key]: val = node_feature_list[radius_dist_key][feature] for i in svec: vec_val = svec[i] key = fast_hash_2(feature, i, self.bitmask) vec_feature_list[radius_dist_key][key] += val * vec_val node_feature_list = vec_feature_list return node_feature_list
Example 3
Project: EDeN Author: fabriziocosta File: iterated_maximum_subarray.py License: MIT License | 6 votes |
def extract_sequence_and_score(graph=None): # make dict with positions as keys and lists of ids as values pos_to_ids = defaultdict(list) for u in graph.nodes(): if 'position' not in graph.node[u]: # no position attributes in graph, use the vertex id instead raise Exception('Missing "position" attribute in node:%s %s' % (u, graph.node[u])) else: pos = graph.node[u]['position'] # accumulate all node ids pos_to_ids[pos] += [u] # extract sequence of labels and importances seq = [None] * len(pos_to_ids) score = [0] * len(pos_to_ids) for pos in sorted(pos_to_ids): ids = pos_to_ids[pos] labels = [graph.node[u].get('label', 'N/A') for u in ids] # check that all labels for the same position are identical assert(sum([1 for label in labels if label == labels[0]]) == len(labels) ), 'ERROR: non identical labels referring to same position: %s %s' % (pos, labels) seq[pos] = labels[0] # average all importance score for the same position importances = [graph.node[u].get('importance', 0) for u in ids] score[pos] = np.mean(importances) return seq, score
Example 4
Project: EDeN Author: fabriziocosta File: __init__.py License: MIT License | 6 votes |
def compute_matching_neighborhoods_fraction(GA, GB, pairings): count = 0 matches = dict([(i, j) for i, j in enumerate(pairings)]) matching_edges = defaultdict(list) for i, j in GA.edges(): ii = matches[i] jj = matches[j] if (ii, jj) in GB.edges(): matching_edges[i].append(j) matching_edges[j].append(i) for u in GA.nodes(): if matching_edges.get(u, False): neighbors = nx.neighbors(GA, u) matches_neighborhood = True for v in neighbors: if v not in matching_edges[u]: matches_neighborhood = False break if matches_neighborhood: count += 1 return float(count) / len(GA.nodes())
Example 5
Project: aegea Author: kislyuk File: s3.py License: Apache License 2.0 | 6 votes |
def lifecycle(args): if args.delete: return resources.s3.BucketLifecycle(args.bucket_name).delete() rule = defaultdict(list, Prefix=args.prefix, Status="Enabled") if args.transition_to_infrequent_access is not None: rule["Transitions"].append(dict(StorageClass="STANDARD_IA", Days=args.transition_to_infrequent_access)) if args.transition_to_glacier is not None: rule["Transitions"].append(dict(StorageClass="GLACIER", Days=args.transition_to_glacier)) if args.expire is not None: rule["Expiration"] = dict(Days=args.expire) if args.abort_incomplete_multipart_upload is not None: rule["AbortIncompleteMultipartUpload"] = dict(DaysAfterInitiation=args.abort_incomplete_multipart_upload) if len(rule) > 2: clients.s3.put_bucket_lifecycle_configuration(Bucket=args.bucket_name, LifecycleConfiguration=dict(Rules=[rule])) try: for rule in resources.s3.BucketLifecycle(args.bucket_name).rules: print(json.dumps(rule)) except ClientError as e: expect_error_codes(e, "NoSuchLifecycleConfiguration") logger.error("No lifecycle configuration for bucket %s", args.bucket_name)
Example 6
Project: DDPAE-video-prediction Author: jthsieh File: DDPAE.py License: MIT License | 6 votes |
def sample_latent(self, input, input_latent_mu, input_latent_sigma, pred_latent_mu, pred_latent_sigma, initial_pose_mu, initial_pose_sigma, sample=True): ''' Return latent variables: dictionary containing pose and content. Then, crop objects from the images and encode into z. ''' latent = defaultdict(lambda: None) beta = self.get_transitions(input_latent_mu, input_latent_sigma, pred_latent_mu, pred_latent_sigma, sample) pose = self.accumulate_pose(beta) # Sample initial pose initial_pose = self.pyro_sample('initial_pose', dist.Normal, initial_pose_mu, initial_pose_sigma, sample) pose += initial_pose.view(-1, 1, self.n_components, self.pose_latent_size) pose = self.constrain_pose(pose) # Get input objects input_pose = pose[:, :self.n_frames_input, :, :] input_obj = self.get_objects(input, input_pose) # Encode the sampled objects z = self.object_encoder(input_obj) z = self.sample_content(z, sample) latent.update({'pose': pose, 'content': z}) return latent
Example 7
Project: iSDX Author: sdn-ixp File: replay.py License: Apache License 2.0 | 6 votes |
def __init__(self, config, flows_dir, ports_dir, num_timesteps, debug=False): self.logger = logging.getLogger("LogHistory") if debug: self.logger.setLevel(logging.DEBUG) self.log_entry = namedtuple("LogEntry", "source destination type") self.ports = defaultdict(list) self.flows = defaultdict(list) self.data = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) self.current_timestep = 0 self.total_timesteps = num_timesteps self.parse_config(config) self.parse_logs(num_timesteps, flows_dir, ports_dir) self.info() pretty(self.data)
Example 8
Project: QA Author: shuaihuaiyi File: qaData.py License: GNU General Public License v3.0 | 6 votes |
def loadEmbedding(filename): """ 加载词向量文件 :param filename: 文件名 :return: embeddings列表和它对应的索引 """ embeddings = [] word2idx = defaultdict(list) with open(filename, mode="r", encoding="utf-8") as rf: for line in rf: arr = line.split(" ") embedding = [float(val) for val in arr[1: -1]] word2idx[arr[0]] = len(word2idx) embeddings.append(embedding) return embeddings, word2idx
Example 9
Project: mmdetection Author: open-mmlab File: analyze_logs.py License: Apache License 2.0 | 6 votes |
def load_json_logs(json_logs): # load and convert json_logs to log_dict, key is epoch, value is a sub dict # keys of sub dict is different metrics, e.g. memory, bbox_mAP # value of sub dict is a list of corresponding values of all iterations log_dicts = [dict() for _ in json_logs] for json_log, log_dict in zip(json_logs, log_dicts): with open(json_log, 'r') as log_file: for line in log_file: log = json.loads(line.strip()) # skip lines without `epoch` field if 'epoch' not in log: continue epoch = log.pop('epoch') if epoch not in log_dict: log_dict[epoch] = defaultdict(list) for k, v in log.items(): log_dict[epoch][k].append(v) return log_dicts
Example 10
Project: neural-fingerprinting Author: StephanZheng File: fingerprint.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, tau=0, name="", ds_name=""): self.name = name self.ds_name = ds_name self.tau = tau self.ids = set() self.ids_correct = set() self.ids_correct_fp = set() self.ids_agree = set() # Legal = there is a fingerprint match below threshold tau self.ids_legal = set() self.counts = defaultdict(lambda: 0) self.counts_legal = defaultdict(lambda: 0) self.counts_correct = defaultdict(lambda: 0) # Total number of examples self.i = 0
Example 11
Project: neural-fingerprinting Author: StephanZheng File: master.py License: BSD 3-Clause "New" or "Revised" License | 6 votes |
def _save_sorted_results(self, run_stats, scores, image_count, filename): """Saves sorted (by score) results of the evaluation. Args: run_stats: dictionary with runtime statistics for submissions, can be generated by WorkPiecesBase.compute_work_statistics scores: dictionary mapping submission ids to scores image_count: dictionary with number of images processed by submission filename: output filename """ with open(filename, 'w') as f: writer = csv.writer(f) writer.writerow(['SubmissionID', 'ExternalTeamId', 'Score', 'MedianTime', 'ImageCount']) get_second = lambda x: x[1] for s_id, score in sorted(iteritems(scores), key=get_second, reverse=True): external_id = self.submissions.get_external_id(s_id) stat = run_stats.get( s_id, collections.defaultdict(lambda: float('NaN'))) writer.writerow([s_id, external_id, score, stat['median_eval_time'], image_count[s_id]])
Example 12
Project: query-exporter Author: albertodonato File: test_loop.py License: GNU General Public License v3.0 | 6 votes |
def metric_values(metric, by_labels=()): """Return values for the metric.""" if metric._type == "gauge": suffix = "" elif metric._type == "counter": suffix = "_total" values = defaultdict(list) for sample_suffix, labels, value in metric._samples(): if sample_suffix == suffix: if by_labels: label_values = tuple(labels[label] for label in by_labels) values[label_values] = value else: values[sample_suffix].append(value) return values if by_labels else values[suffix]
Example 13
Project: news-corpus-builder Author: skillachie File: news_corpus_generator.py License: MIT License | 6 votes |
def __init__(self,corpus_dir,datastore_type='file',db_name='corpus.db'): ''' Read links and associated categories for specified articles in text file seperated by a space Args: corpus_dir (str): The directory to save the generated corpus datastore_type (Optional[str]): Format to save generated corpus. Specify either 'file' or 'sqlite'. db_name (Optional[str]): Name of database if 'sqlite' is selected. ''' self.g = Goose({'browser_user_agent': 'Mozilla','parser_class':'soup'}) #self.g = Goose({'browser_user_agent': 'Mozilla'}) self.corpus_dir = corpus_dir self.datastore_type = datastore_type self.db_name = db_name self.stats = defaultdict(int) self._create_corpus_dir(self.corpus_dir) self.db = None if self.datastore_type == 'sqlite': self.db = self.corpus_dir + '/' + self.db_name self._set_up_db(self.db)
Example 14
Project: goodtables-py Author: frictionlessdata File: unique_constraint.py License: MIT License | 6 votes |
def _create_unique_fields_cache(cells): primary_key_column_numbers = [] cache = {} # Unique for _, cell in enumerate(cells, start=1): field = cell.get('field') column_number = cell.get('column-number') if field is not None: if field.descriptor.get('primaryKey'): primary_key_column_numbers.append(column_number) if field.constraints.get('unique'): cache[tuple([column_number])] = defaultdict(list) # Primary key if primary_key_column_numbers: cache[tuple(primary_key_column_numbers)] = defaultdict(list) return cache
Example 15
Project: dynamic-training-with-apache-mxnet-on-aws Author: awslabs File: coco.py License: Apache License 2.0 | 6 votes |
def __init__(self, annotation_file=None): """ Constructor of Microsoft COCO helper class for reading and visualizing annotations. :param annotation_file (str): location of annotation file :param image_folder (str): location to the folder that hosts images. :return: """ # load dataset self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) if not annotation_file == None: print('loading annotations into memory...') tic = time.time() dataset = json.load(open(annotation_file, 'r')) assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) print('Done (t={:0.2f}s)'.format(time.time()- tic)) self.dataset = dataset self.createIndex()
Example 16
Project: deep-summarization Author: harpribot File: bleu_scorer.py License: MIT License | 6 votes |
def precook(s, n=4, out=False): """ Takes a string as input and returns an object that can be given to either cook_refs or cook_test. This is optional: cook_refs and cook_test can take string arguments as well. :param s: :param n: :param out: :return: """ words = s.split() counts = defaultdict(int) for k in xrange(1,n+1): for i in xrange(len(words)-k+1): ngram = tuple(words[i:i+k]) counts[ngram] += 1 return (len(words), counts)
Example 17
Project: svviz Author: svviz File: summarystats.py License: MIT License | 5 votes |
def addVariantResults(self, dataHub): variant = str(dataHub.variant) for sampleName, sample in dataHub.samples.items(): counts = collections.Counter() reasons = {} alnScores = collections.defaultdict(list) insertSizes = collections.defaultdict(list) # collect stats for alnCollection in sample.alnCollections: allele = alnCollection.choice counts[allele] += 1 if not allele in reasons: reasons[allele] = collections.Counter() reasons[allele][alnCollection.why] += 1 alnScores[allele].append(sum(aln.score for aln in alnCollection.chosenSet().getAlignments())) insertSizes[allele].append(len(alnCollection.chosenSet())) # record stats for allele, count in counts.items(): self.stats.append([variant, sampleName, allele, "count", count]) for allele in reasons: for reason in reasons[allele]: self.stats.append([variant, sampleName, allele, "reason_{}".format(reason), reasons[allele][reason]]) for allele in alnScores: self.stats.append([variant, sampleName, allele, "alnScore_mean", numpy.mean(alnScores[allele])]) self.stats.append([variant, sampleName, allele, "alnScore_std", numpy.std(alnScores[allele])]) for allele in insertSizes: self.stats.append([variant, sampleName, allele, "insertSize_mean", numpy.mean(insertSizes[allele])]) self.stats.append([variant, sampleName, allele, "insertSize_std", numpy.std(insertSizes[allele])])
Example 18
Project: svviz Author: svviz File: pairfinder.py License: MIT License | 5 votes |
def getToMatch(self): tomatch = set() readsByID = collections.defaultdict(ReadSet) for region in self.regions: for read in self.loadRegion(region.chr(), region.start(), region.end()): tomatch.add(read) readsByID[read.qname].add(read) if self.sampleReads is not None and len(readsByID) > self.sampleReads: return None, None if self.maxReads and len(tomatch) > self.maxReads: raise TooManyReadsException return tomatch, readsByID
Example 19
Project: svviz Author: svviz File: gff.py License: MIT License | 5 votes |
def getAnnotations(self, chrom, start, end, clip=False, extension=1000000): chrom = self.fixChromFormat(chrom) lines = self.tabix.fetch(chrom, max(0, start-extension), end+extension) transcriptsToLines = collections.defaultdict(list) for i, line in enumerate(lines): if len(line) < 2: continue try: tx = re.match(RE_TRANSCRIPT, line).group(1) except AttributeError: tx = "anno{}".format(i) transcriptsToLines[tx].append(line) genes = [] for transcript, lines in transcriptsToLines.items(): genes.append(GTFGene(lines)) if extension > 0: genes = [gene for gene in genes if not (end<gene.start or start>gene.end)]#start<=gene.start<=end or start<=gene.end<=end)] if clip: for gene in genes: gene.clip(start, end) return genes
Example 20
Project: incubator-spot Author: apache File: proxy.py License: Apache License 2.0 | 5 votes |
def create_storyboard(uri,date,title,text,expanded_search,top_results): clientips = defaultdict(int) reqmethods = defaultdict(int) rescontype = defaultdict(int) referers = defaultdict(int) refered = defaultdict(int) requests = [] for row in expanded_search: clientips[row['clientIp']]+=1 reqmethods[row['requestMethod']]+=1 rescontype[row['responseContentType']]+=1 if row['uri'] == uri: #Source URI's that refered the user to the threat referers[row['referer']]+=1 requests += [{'clientip':row['clientIp'], 'referer':row['referer'],'reqmethod':row['requestMethod'], 'resconttype':row['responseContentType']}] else: #Destination URI's refered by the threat refered[row['uri']]+=1 create_incident_progression(uri,requests,refered,date) create_timeline(uri,clientips,date,top_results) save_comments(uri,title,text,date) return True
Example 21
Project: EDeN Author: fabriziocosta File: graph.py License: MIT License | 5 votes |
def _transform(self, original_graph): graph = self._graph_preprocessing(original_graph) # collect all features for all vertices for each label_index feature_list = defaultdict(lambda: defaultdict(float)) for v in graph.nodes(): # only for vertices of type 'node', i.e. not for the 'edge' type if graph.nodes[v].get('node', False): self._transform_vertex(graph, v, feature_list) _clean_graph(graph) return self._normalization(feature_list)
Example 22
Project: EDeN Author: fabriziocosta File: graph.py License: MIT License | 5 votes |
def _add_vector_labes(self, graph, vertex_v, node_feature_list): # add the vector with an offset given by the feature, multiplied by val vec = graph.nodes[vertex_v].get(self.key_vec, None) if vec: vec_feature_list = defaultdict(lambda: defaultdict(float)) for radius_dist_key in node_feature_list: for feature in node_feature_list[radius_dist_key]: val = node_feature_list[radius_dist_key][feature] for i, vec_val in enumerate(vec): key = (feature + i) % self.bitmask vec_feature_list[radius_dist_key][key] += val * vec_val node_feature_list = vec_feature_list return node_feature_list
Example 23
Project: EDeN Author: fabriziocosta File: graph.py License: MIT License | 5 votes |
def _compute_vertex_based_features(self, graph): feature_rows = [] for v in graph.nodes(): # only for vertices of type 'node', i.e. not for the 'edge' type if graph.nodes[v].get('node', False): feature_list = defaultdict(lambda: defaultdict(float)) self._transform_vertex(graph, v, feature_list) feature_rows.append(self._normalization(feature_list)) data_matrix = self._convert_dict_to_sparse_matrix(feature_rows) return data_matrix # -------------------------------------------------------------------
Example 24
Project: aegea Author: kislyuk File: cost.py License: Apache License 2.0 | 5 votes |
def cost(args): if not (args.group_by or args.group_by_tag): args.group_by = ["SERVICE"] get_cost_and_usage_args = dict(get_common_method_args(args), Metrics=args.metrics) get_cost_and_usage_args["GroupBy"] = [dict(Type="DIMENSION", Key=k) for k in args.group_by] get_cost_and_usage_args["GroupBy"] += [dict(Type="TAG", Key=k) for k in args.group_by_tag] rows = collections.defaultdict(dict) try: account_name = clients.iam.list_account_aliases()["AccountAliases"][0] except Exception: account_name = boto3.session.Session().profile_name title = "{} ({})".format(args.group_by[0] if args.group_by else "Tag:" + args.group_by_tag[0], account_name) args.columns, cell_transforms = [title], {"TOTAL": format_float} for page in clients.ce.get_cost_and_usage(**get_cost_and_usage_args)["ResultsByTime"]: args.columns.append(page["TimePeriod"]["Start"]) cell_transforms[page["TimePeriod"]["Start"]] = format_float for i, group in enumerate(page["Groups"]): value = group["Metrics"][args.metrics[0]] if isinstance(value, dict) and "Amount" in value: value = float(value["Amount"]) rows[group["Keys"][0]].setdefault(title, group["Keys"][0]) rows[group["Keys"][0]].setdefault("TOTAL", 0) rows[group["Keys"][0]]["TOTAL"] += value rows[group["Keys"][0]][page["TimePeriod"]["Start"]] = value args.columns.append("TOTAL") rows = [row for row in rows.values() if row["TOTAL"] > args.min_total] rows = sorted(rows, key=lambda row: -row["TOTAL"]) page_output(tabulate(rows, args, cell_transforms=cell_transforms))
Example 25
Project: arm_now Author: nongiach File: download.py License: MIT License | 5 votes |
def scrawl_kernel(arch): re_href = re.compile('href="?({arch}[^ <>"]*)"?'.format(arch=arch)) url = "https://toolchains.bootlin.com/downloads/releases/toolchains/{arch}/test-system/".format(arch=arch) response = requests.get(url + "?C=M;O=D") text = response.text links = re_href.findall(text) links_dict = defaultdict(lambda: defaultdict(dict)) for link in links: version = get_link_version(link) libc = get_link_libc(link) filetype = get_link_filetype(link) # TODO: make sure they have been compiled at the same time if filetype not in links_dict[version][libc]: if filetype is None: return None, None, None links_dict[version][libc][filetype] = url + link state = "bleeding-edge" if "stable" in links_dict: state = "stable" for libc in ["glibc", "uclibc", "musl"]: if libc in links_dict[state]: break else: libc = None target = links_dict[state][libc] dtb = target.get("dtb", None) rootfs = target.get("rootfs", None) kernel = target.get("kernel", None) return kernel, dtb, rootfs
Example 26
Project: BASS Author: Cisco-Talos File: binary_database.py License: GNU General Public License v2.0 | 5 votes |
def __init__(self, data): self.data = data self.callees = defaultdict(set) for func in self.data["functions"]: for call in func["called_from"]: self.callees[call].add(func["entry_point"])
Example 27
Project: BASS Author: Cisco-Talos File: binary_database.py License: GNU General Public License v2.0 | 5 votes |
def __init__(self, data): self.data = data self.callees = defaultdict(set) for func in self.data["functions"]: for call in func["called_from"]: self.callees[call].add(func["entry_point"])
Example 28
Project: Att-ChemdNER Author: lingluodlut File: common.py License: Apache License 2.0 | 5 votes |
def reset_uids(): global _UID_PREFIXES _UID_PREFIXES = defaultdict(int)
Example 29
Project: everyclass-server Author: everyclass File: views.py License: Mozilla Public License 2.0 | 5 votes |
def get_classroom(url_rid, url_semester): """教室查询""" # decrypt identifier in URL try: _, room_id = decrypt(url_rid, resource_type='room') except ValueError: return render_template("common/error.html", message=MSG_INVALID_IDENTIFIER) # todo 支持没有学期的room # RPC to get classroom timetable try: room = entity_service.get_classroom_timetable(url_semester, room_id) except Exception as e: return handle_exception_with_error_page(e) with tracer.trace('process_rpc_result'): cards = defaultdict(list) for card in room.cards: day, time = lesson_string_to_tuple(card.lesson) cards[(day, time)].append(card) empty_5, empty_6, empty_sat, empty_sun = _empty_column_check(cards) available_semesters = semester_calculate(url_semester, room.semesters) return render_template('entity/room.html', room=room, cards=cards, empty_sat=empty_sat, empty_sun=empty_sun, empty_6=empty_6, empty_5=empty_5, available_semesters=available_semesters, current_semester=url_semester)
Example 30
Project: everyclass-server Author: everyclass File: service.py License: Mozilla Public License 2.0 | 5 votes |
def generate_ics_file(type_: str, identifier: str, semester: str) -> str: """生成ics文件并返回文件名""" from everyclass.server import statsd # 需要在这里导入,否则导入的结果是None cal_filename = f"{type_}_{identifier}_{semester}.ics" cal_full_path = os.path.join(calendar_dir(), cal_filename) # 有缓存、且缓存时间小于一天,且不用强刷缓存 if os.path.exists(cal_full_path) \ and use_cache(cal_filename): logger.info("ics cache hit") statsd.increment("calendar.ics.cache.hit") return cal_filename statsd.increment("calendar.ics.cache.miss") # 无缓存、或需要强刷缓存 with tracer.trace('rpc'): # 获得原始学号或教工号 if type_ == 'student': rpc_result = entity_service.get_student_timetable(identifier, semester) else: # teacher rpc_result = entity_service.get_teacher_timetable(identifier, semester) semester = Semester(semester) cards: Dict[Tuple[int, int], List[Dict]] = defaultdict(list) for card in rpc_result.cards: cards[lesson_string_to_tuple(card.lesson)].append(dict(name=card.name, teacher=teacher_list_to_name_str(card.teachers), week=card.weeks, week_string=card.week_string, classroom=card.room, cid=card.card_id_encoded)) ics_generator.generate(name=rpc_result.name, cards=cards, semester=semester, filename=cal_filename) return cal_filename