Python collections.defaultdict() Examples
The following are 30
code examples of collections.defaultdict().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
collections
, or try the search function
.

Example #1
Source File: datahub.py From svviz with MIT License | 6 votes |
def __init__(self): self.args = None self.alignDistance = 0 self.samples = collections.OrderedDict() self.genome = None self.sources = {} self.annotationSets = collections.OrderedDict() # for storing axes, annotations, etc, by allele self.alleleTracks = collections.defaultdict(collections.OrderedDict) self.trackCompositor = None self.dotplots = {} self.info = {} self.reset()
Example #2
Source File: graph.py From EDeN with MIT License | 6 votes |
def _add_sparse_vector_labes(self, graph, vertex_v, node_feature_list): # add the vector with a feature resulting from hashing # the discrete labeled graph sparse encoding with the sparse vector # feature, the val is then multiplied. svec = graph.nodes[vertex_v].get(self.key_svec, None) if svec: vec_feature_list = defaultdict(lambda: defaultdict(float)) for radius_dist_key in node_feature_list: for feature in node_feature_list[radius_dist_key]: val = node_feature_list[radius_dist_key][feature] for i in svec: vec_val = svec[i] key = fast_hash_2(feature, i, self.bitmask) vec_feature_list[radius_dist_key][key] += val * vec_val node_feature_list = vec_feature_list return node_feature_list
Example #3
Source File: iterated_maximum_subarray.py From EDeN with MIT License | 6 votes |
def extract_sequence_and_score(graph=None): # make dict with positions as keys and lists of ids as values pos_to_ids = defaultdict(list) for u in graph.nodes(): if 'position' not in graph.node[u]: # no position attributes in graph, use the vertex id instead raise Exception('Missing "position" attribute in node:%s %s' % (u, graph.node[u])) else: pos = graph.node[u]['position'] # accumulate all node ids pos_to_ids[pos] += [u] # extract sequence of labels and importances seq = [None] * len(pos_to_ids) score = [0] * len(pos_to_ids) for pos in sorted(pos_to_ids): ids = pos_to_ids[pos] labels = [graph.node[u].get('label', 'N/A') for u in ids] # check that all labels for the same position are identical assert(sum([1 for label in labels if label == labels[0]]) == len(labels) ), 'ERROR: non identical labels referring to same position: %s %s' % (pos, labels) seq[pos] = labels[0] # average all importance score for the same position importances = [graph.node[u].get('importance', 0) for u in ids] score[pos] = np.mean(importances) return seq, score
Example #4
Source File: __init__.py From EDeN with MIT License | 6 votes |
def compute_matching_neighborhoods_fraction(GA, GB, pairings): count = 0 matches = dict([(i, j) for i, j in enumerate(pairings)]) matching_edges = defaultdict(list) for i, j in GA.edges(): ii = matches[i] jj = matches[j] if (ii, jj) in GB.edges(): matching_edges[i].append(j) matching_edges[j].append(i) for u in GA.nodes(): if matching_edges.get(u, False): neighbors = nx.neighbors(GA, u) matches_neighborhood = True for v in neighbors: if v not in matching_edges[u]: matches_neighborhood = False break if matches_neighborhood: count += 1 return float(count) / len(GA.nodes())
Example #5
Source File: s3.py From aegea with Apache License 2.0 | 6 votes |
def lifecycle(args): if args.delete: return resources.s3.BucketLifecycle(args.bucket_name).delete() rule = defaultdict(list, Prefix=args.prefix, Status="Enabled") if args.transition_to_infrequent_access is not None: rule["Transitions"].append(dict(StorageClass="STANDARD_IA", Days=args.transition_to_infrequent_access)) if args.transition_to_glacier is not None: rule["Transitions"].append(dict(StorageClass="GLACIER", Days=args.transition_to_glacier)) if args.expire is not None: rule["Expiration"] = dict(Days=args.expire) if args.abort_incomplete_multipart_upload is not None: rule["AbortIncompleteMultipartUpload"] = dict(DaysAfterInitiation=args.abort_incomplete_multipart_upload) if len(rule) > 2: clients.s3.put_bucket_lifecycle_configuration(Bucket=args.bucket_name, LifecycleConfiguration=dict(Rules=[rule])) try: for rule in resources.s3.BucketLifecycle(args.bucket_name).rules: print(json.dumps(rule)) except ClientError as e: expect_error_codes(e, "NoSuchLifecycleConfiguration") logger.error("No lifecycle configuration for bucket %s", args.bucket_name)
Example #6
Source File: DDPAE.py From DDPAE-video-prediction with MIT License | 6 votes |
def sample_latent(self, input, input_latent_mu, input_latent_sigma, pred_latent_mu, pred_latent_sigma, initial_pose_mu, initial_pose_sigma, sample=True): ''' Return latent variables: dictionary containing pose and content. Then, crop objects from the images and encode into z. ''' latent = defaultdict(lambda: None) beta = self.get_transitions(input_latent_mu, input_latent_sigma, pred_latent_mu, pred_latent_sigma, sample) pose = self.accumulate_pose(beta) # Sample initial pose initial_pose = self.pyro_sample('initial_pose', dist.Normal, initial_pose_mu, initial_pose_sigma, sample) pose += initial_pose.view(-1, 1, self.n_components, self.pose_latent_size) pose = self.constrain_pose(pose) # Get input objects input_pose = pose[:, :self.n_frames_input, :, :] input_obj = self.get_objects(input, input_pose) # Encode the sampled objects z = self.object_encoder(input_obj) z = self.sample_content(z, sample) latent.update({'pose': pose, 'content': z}) return latent
Example #7
Source File: replay.py From iSDX with Apache License 2.0 | 6 votes |
def __init__(self, config, flows_dir, ports_dir, num_timesteps, debug=False): self.logger = logging.getLogger("LogHistory") if debug: self.logger.setLevel(logging.DEBUG) self.log_entry = namedtuple("LogEntry", "source destination type") self.ports = defaultdict(list) self.flows = defaultdict(list) self.data = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) self.current_timestep = 0 self.total_timesteps = num_timesteps self.parse_config(config) self.parse_logs(num_timesteps, flows_dir, ports_dir) self.info() pretty(self.data)
Example #8
Source File: analyze_logs.py From mmdetection with Apache License 2.0 | 6 votes |
def load_json_logs(json_logs): # load and convert json_logs to log_dict, key is epoch, value is a sub dict # keys of sub dict is different metrics, e.g. memory, bbox_mAP # value of sub dict is a list of corresponding values of all iterations log_dicts = [dict() for _ in json_logs] for json_log, log_dict in zip(json_logs, log_dicts): with open(json_log, 'r') as log_file: for line in log_file: log = json.loads(line.strip()) # skip lines without `epoch` field if 'epoch' not in log: continue epoch = log.pop('epoch') if epoch not in log_dict: log_dict[epoch] = defaultdict(list) for k, v in log.items(): log_dict[epoch][k].append(v) return log_dicts
Example #9
Source File: fingerprint.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 6 votes |
def __init__(self, tau=0, name="", ds_name=""): self.name = name self.ds_name = ds_name self.tau = tau self.ids = set() self.ids_correct = set() self.ids_correct_fp = set() self.ids_agree = set() # Legal = there is a fingerprint match below threshold tau self.ids_legal = set() self.counts = defaultdict(lambda: 0) self.counts_legal = defaultdict(lambda: 0) self.counts_correct = defaultdict(lambda: 0) # Total number of examples self.i = 0
Example #10
Source File: master.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _save_sorted_results(self, run_stats, scores, image_count, filename): """Saves sorted (by score) results of the evaluation. Args: run_stats: dictionary with runtime statistics for submissions, can be generated by WorkPiecesBase.compute_work_statistics scores: dictionary mapping submission ids to scores image_count: dictionary with number of images processed by submission filename: output filename """ with open(filename, 'w') as f: writer = csv.writer(f) writer.writerow(['SubmissionID', 'ExternalTeamId', 'Score', 'MedianTime', 'ImageCount']) get_second = lambda x: x[1] for s_id, score in sorted(iteritems(scores), key=get_second, reverse=True): external_id = self.submissions.get_external_id(s_id) stat = run_stats.get( s_id, collections.defaultdict(lambda: float('NaN'))) writer.writerow([s_id, external_id, score, stat['median_eval_time'], image_count[s_id]])
Example #11
Source File: test_loop.py From query-exporter with GNU General Public License v3.0 | 6 votes |
def metric_values(metric, by_labels=()): """Return values for the metric.""" if metric._type == "gauge": suffix = "" elif metric._type == "counter": suffix = "_total" values = defaultdict(list) for sample_suffix, labels, value in metric._samples(): if sample_suffix == suffix: if by_labels: label_values = tuple(labels[label] for label in by_labels) values[label_values] = value else: values[sample_suffix].append(value) return values if by_labels else values[suffix]
Example #12
Source File: news_corpus_generator.py From news-corpus-builder with MIT License | 6 votes |
def __init__(self,corpus_dir,datastore_type='file',db_name='corpus.db'): ''' Read links and associated categories for specified articles in text file seperated by a space Args: corpus_dir (str): The directory to save the generated corpus datastore_type (Optional[str]): Format to save generated corpus. Specify either 'file' or 'sqlite'. db_name (Optional[str]): Name of database if 'sqlite' is selected. ''' self.g = Goose({'browser_user_agent': 'Mozilla','parser_class':'soup'}) #self.g = Goose({'browser_user_agent': 'Mozilla'}) self.corpus_dir = corpus_dir self.datastore_type = datastore_type self.db_name = db_name self.stats = defaultdict(int) self._create_corpus_dir(self.corpus_dir) self.db = None if self.datastore_type == 'sqlite': self.db = self.corpus_dir + '/' + self.db_name self._set_up_db(self.db)
Example #13
Source File: unique_constraint.py From goodtables-py with MIT License | 6 votes |
def _create_unique_fields_cache(cells): primary_key_column_numbers = [] cache = {} # Unique for _, cell in enumerate(cells, start=1): field = cell.get('field') column_number = cell.get('column-number') if field is not None: if field.descriptor.get('primaryKey'): primary_key_column_numbers.append(column_number) if field.constraints.get('unique'): cache[tuple([column_number])] = defaultdict(list) # Primary key if primary_key_column_numbers: cache[tuple(primary_key_column_numbers)] = defaultdict(list) return cache
Example #14
Source File: coco.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def __init__(self, annotation_file=None): """ Constructor of Microsoft COCO helper class for reading and visualizing annotations. :param annotation_file (str): location of annotation file :param image_folder (str): location to the folder that hosts images. :return: """ # load dataset self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) if not annotation_file == None: print('loading annotations into memory...') tic = time.time() dataset = json.load(open(annotation_file, 'r')) assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) print('Done (t={:0.2f}s)'.format(time.time()- tic)) self.dataset = dataset self.createIndex()
Example #15
Source File: bleu_scorer.py From deep-summarization with MIT License | 6 votes |
def precook(s, n=4, out=False): """ Takes a string as input and returns an object that can be given to either cook_refs or cook_test. This is optional: cook_refs and cook_test can take string arguments as well. :param s: :param n: :param out: :return: """ words = s.split() counts = defaultdict(int) for k in xrange(1,n+1): for i in xrange(len(words)-k+1): ngram = tuple(words[i:i+k]) counts[ngram] += 1 return (len(words), counts)
Example #16
Source File: summarystats.py From svviz with MIT License | 5 votes |
def addVariantResults(self, dataHub): variant = str(dataHub.variant) for sampleName, sample in dataHub.samples.items(): counts = collections.Counter() reasons = {} alnScores = collections.defaultdict(list) insertSizes = collections.defaultdict(list) # collect stats for alnCollection in sample.alnCollections: allele = alnCollection.choice counts[allele] += 1 if not allele in reasons: reasons[allele] = collections.Counter() reasons[allele][alnCollection.why] += 1 alnScores[allele].append(sum(aln.score for aln in alnCollection.chosenSet().getAlignments())) insertSizes[allele].append(len(alnCollection.chosenSet())) # record stats for allele, count in counts.items(): self.stats.append([variant, sampleName, allele, "count", count]) for allele in reasons: for reason in reasons[allele]: self.stats.append([variant, sampleName, allele, "reason_{}".format(reason), reasons[allele][reason]]) for allele in alnScores: self.stats.append([variant, sampleName, allele, "alnScore_mean", numpy.mean(alnScores[allele])]) self.stats.append([variant, sampleName, allele, "alnScore_std", numpy.std(alnScores[allele])]) for allele in insertSizes: self.stats.append([variant, sampleName, allele, "insertSize_mean", numpy.mean(insertSizes[allele])]) self.stats.append([variant, sampleName, allele, "insertSize_std", numpy.std(insertSizes[allele])])
Example #17
Source File: pairfinder.py From svviz with MIT License | 5 votes |
def getToMatch(self): tomatch = set() readsByID = collections.defaultdict(ReadSet) for region in self.regions: for read in self.loadRegion(region.chr(), region.start(), region.end()): tomatch.add(read) readsByID[read.qname].add(read) if self.sampleReads is not None and len(readsByID) > self.sampleReads: return None, None if self.maxReads and len(tomatch) > self.maxReads: raise TooManyReadsException return tomatch, readsByID
Example #18
Source File: gff.py From svviz with MIT License | 5 votes |
def getAnnotations(self, chrom, start, end, clip=False, extension=1000000): chrom = self.fixChromFormat(chrom) lines = self.tabix.fetch(chrom, max(0, start-extension), end+extension) transcriptsToLines = collections.defaultdict(list) for i, line in enumerate(lines): if len(line) < 2: continue try: tx = re.match(RE_TRANSCRIPT, line).group(1) except AttributeError: tx = "anno{}".format(i) transcriptsToLines[tx].append(line) genes = [] for transcript, lines in transcriptsToLines.items(): genes.append(GTFGene(lines)) if extension > 0: genes = [gene for gene in genes if not (end<gene.start or start>gene.end)]#start<=gene.start<=end or start<=gene.end<=end)] if clip: for gene in genes: gene.clip(start, end) return genes
Example #19
Source File: proxy.py From incubator-spot with Apache License 2.0 | 5 votes |
def create_storyboard(uri,date,title,text,expanded_search,top_results): clientips = defaultdict(int) reqmethods = defaultdict(int) rescontype = defaultdict(int) referers = defaultdict(int) refered = defaultdict(int) requests = [] for row in expanded_search: clientips[row['clientIp']]+=1 reqmethods[row['requestMethod']]+=1 rescontype[row['responseContentType']]+=1 if row['uri'] == uri: #Source URI's that refered the user to the threat referers[row['referer']]+=1 requests += [{'clientip':row['clientIp'], 'referer':row['referer'],'reqmethod':row['requestMethod'], 'resconttype':row['responseContentType']}] else: #Destination URI's refered by the threat refered[row['uri']]+=1 create_incident_progression(uri,requests,refered,date) create_timeline(uri,clientips,date,top_results) save_comments(uri,title,text,date) return True
Example #20
Source File: graph.py From EDeN with MIT License | 5 votes |
def _transform(self, original_graph): graph = self._graph_preprocessing(original_graph) # collect all features for all vertices for each label_index feature_list = defaultdict(lambda: defaultdict(float)) for v in graph.nodes(): # only for vertices of type 'node', i.e. not for the 'edge' type if graph.nodes[v].get('node', False): self._transform_vertex(graph, v, feature_list) _clean_graph(graph) return self._normalization(feature_list)
Example #21
Source File: graph.py From EDeN with MIT License | 5 votes |
def _add_vector_labes(self, graph, vertex_v, node_feature_list): # add the vector with an offset given by the feature, multiplied by val vec = graph.nodes[vertex_v].get(self.key_vec, None) if vec: vec_feature_list = defaultdict(lambda: defaultdict(float)) for radius_dist_key in node_feature_list: for feature in node_feature_list[radius_dist_key]: val = node_feature_list[radius_dist_key][feature] for i, vec_val in enumerate(vec): key = (feature + i) % self.bitmask vec_feature_list[radius_dist_key][key] += val * vec_val node_feature_list = vec_feature_list return node_feature_list
Example #22
Source File: graph.py From EDeN with MIT License | 5 votes |
def _compute_vertex_based_features(self, graph): feature_rows = [] for v in graph.nodes(): # only for vertices of type 'node', i.e. not for the 'edge' type if graph.nodes[v].get('node', False): feature_list = defaultdict(lambda: defaultdict(float)) self._transform_vertex(graph, v, feature_list) feature_rows.append(self._normalization(feature_list)) data_matrix = self._convert_dict_to_sparse_matrix(feature_rows) return data_matrix # -------------------------------------------------------------------
Example #23
Source File: cost.py From aegea with Apache License 2.0 | 5 votes |
def cost(args): if not (args.group_by or args.group_by_tag): args.group_by = ["SERVICE"] get_cost_and_usage_args = dict(get_common_method_args(args), Metrics=args.metrics) get_cost_and_usage_args["GroupBy"] = [dict(Type="DIMENSION", Key=k) for k in args.group_by] get_cost_and_usage_args["GroupBy"] += [dict(Type="TAG", Key=k) for k in args.group_by_tag] rows = collections.defaultdict(dict) try: account_name = clients.iam.list_account_aliases()["AccountAliases"][0] except Exception: account_name = boto3.session.Session().profile_name title = "{} ({})".format(args.group_by[0] if args.group_by else "Tag:" + args.group_by_tag[0], account_name) args.columns, cell_transforms = [title], {"TOTAL": format_float} for page in clients.ce.get_cost_and_usage(**get_cost_and_usage_args)["ResultsByTime"]: args.columns.append(page["TimePeriod"]["Start"]) cell_transforms[page["TimePeriod"]["Start"]] = format_float for i, group in enumerate(page["Groups"]): value = group["Metrics"][args.metrics[0]] if isinstance(value, dict) and "Amount" in value: value = float(value["Amount"]) rows[group["Keys"][0]].setdefault(title, group["Keys"][0]) rows[group["Keys"][0]].setdefault("TOTAL", 0) rows[group["Keys"][0]]["TOTAL"] += value rows[group["Keys"][0]][page["TimePeriod"]["Start"]] = value args.columns.append("TOTAL") rows = [row for row in rows.values() if row["TOTAL"] > args.min_total] rows = sorted(rows, key=lambda row: -row["TOTAL"]) page_output(tabulate(rows, args, cell_transforms=cell_transforms))
Example #24
Source File: download.py From arm_now with MIT License | 5 votes |
def scrawl_kernel(arch): re_href = re.compile('href="?({arch}[^ <>"]*)"?'.format(arch=arch)) url = "https://toolchains.bootlin.com/downloads/releases/toolchains/{arch}/test-system/".format(arch=arch) response = requests.get(url + "?C=M;O=D") text = response.text links = re_href.findall(text) links_dict = defaultdict(lambda: defaultdict(dict)) for link in links: version = get_link_version(link) libc = get_link_libc(link) filetype = get_link_filetype(link) # TODO: make sure they have been compiled at the same time if filetype not in links_dict[version][libc]: if filetype is None: return None, None, None links_dict[version][libc][filetype] = url + link state = "bleeding-edge" if "stable" in links_dict: state = "stable" for libc in ["glibc", "uclibc", "musl"]: if libc in links_dict[state]: break else: libc = None target = links_dict[state][libc] dtb = target.get("dtb", None) rootfs = target.get("rootfs", None) kernel = target.get("kernel", None) return kernel, dtb, rootfs
Example #25
Source File: binary_database.py From BASS with GNU General Public License v2.0 | 5 votes |
def __init__(self, data): self.data = data self.callees = defaultdict(set) for func in self.data["functions"]: for call in func["called_from"]: self.callees[call].add(func["entry_point"])
Example #26
Source File: binary_database.py From BASS with GNU General Public License v2.0 | 5 votes |
def __init__(self, data): self.data = data self.callees = defaultdict(set) for func in self.data["functions"]: for call in func["called_from"]: self.callees[call].add(func["entry_point"])
Example #27
Source File: common.py From Att-ChemdNER with Apache License 2.0 | 5 votes |
def reset_uids(): global _UID_PREFIXES _UID_PREFIXES = defaultdict(int)
Example #28
Source File: views.py From everyclass-server with Mozilla Public License 2.0 | 5 votes |
def get_classroom(url_rid, url_semester): """教室查询""" # decrypt identifier in URL try: _, room_id = decrypt(url_rid, resource_type='room') except ValueError: return render_template("common/error.html", message=MSG_INVALID_IDENTIFIER) # todo 支持没有学期的room # RPC to get classroom timetable try: room = entity_service.get_classroom_timetable(url_semester, room_id) except Exception as e: return handle_exception_with_error_page(e) with tracer.trace('process_rpc_result'): cards = defaultdict(list) for card in room.cards: day, time = lesson_string_to_tuple(card.lesson) cards[(day, time)].append(card) empty_5, empty_6, empty_sat, empty_sun = _empty_column_check(cards) available_semesters = semester_calculate(url_semester, room.semesters) return render_template('entity/room.html', room=room, cards=cards, empty_sat=empty_sat, empty_sun=empty_sun, empty_6=empty_6, empty_5=empty_5, available_semesters=available_semesters, current_semester=url_semester)
Example #29
Source File: service.py From everyclass-server with Mozilla Public License 2.0 | 5 votes |
def generate_ics_file(type_: str, identifier: str, semester: str) -> str: """生成ics文件并返回文件名""" from everyclass.server import statsd # 需要在这里导入,否则导入的结果是None cal_filename = f"{type_}_{identifier}_{semester}.ics" cal_full_path = os.path.join(calendar_dir(), cal_filename) # 有缓存、且缓存时间小于一天,且不用强刷缓存 if os.path.exists(cal_full_path) \ and use_cache(cal_filename): logger.info("ics cache hit") statsd.increment("calendar.ics.cache.hit") return cal_filename statsd.increment("calendar.ics.cache.miss") # 无缓存、或需要强刷缓存 with tracer.trace('rpc'): # 获得原始学号或教工号 if type_ == 'student': rpc_result = entity_service.get_student_timetable(identifier, semester) else: # teacher rpc_result = entity_service.get_teacher_timetable(identifier, semester) semester = Semester(semester) cards: Dict[Tuple[int, int], List[Dict]] = defaultdict(list) for card in rpc_result.cards: cards[lesson_string_to_tuple(card.lesson)].append(dict(name=card.name, teacher=teacher_list_to_name_str(card.teachers), week=card.weeks, week_string=card.week_string, classroom=card.room, cid=card.card_id_encoded)) ics_generator.generate(name=rpc_result.name, cards=cards, semester=semester, filename=cal_filename) return cal_filename
Example #30
Source File: test.py From pywarp with Apache License 2.0 | 5 votes |
def __init__(self): self.users = collections.defaultdict(collections.defaultdict)