Python toolz.first() Examples
The following are 18
code examples of toolz.first().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
toolz
, or try the search function
.
Example #1
Source File: experimental.py From sidekick with MIT License | 6 votes |
def peek(seq: Seq, default=NOT_GIVEN) -> (object, Seq): """ Same as peek_with(first). Peek first element of sequence and return (first, seq). >>> fst, seq = peek(range(5)) >>> fst, list(seq) (0, [0, 1, 2, 3, 4]) """ try: x, seq = uncons(seq) except ValueError: if default is NOT_GIVEN: raise return default, iter(()) return x, toolz.cons(x, seq)
Example #2
Source File: experimental.py From sidekick with MIT License | 6 votes |
def first_repeated(key: Func, seq: Seq): """ Return the index and value of first repeated element in sequence. Raises a ValueError if no repeated element is found. Examples: >>> first_repeated(None, [1, 2, 3, 1]) (3, 1) """ key = to_callable(key) seen = set() add = seen.add for i, x in enumerate(seq): tag = key(x) if tag in seen: return i, x add(tag) raise ValueError("no repeated element in sequence")
Example #3
Source File: asset_writer.py From catalyst with Apache License 2.0 | 6 votes |
def _write_df_to_table( self, tbl, df, txn, chunk_size, idx=True, idx_label=None, ): df.to_sql( tbl.name, txn.connection, index=idx, index_label=( idx_label if idx_label is not None else first(tbl.primary_key.columns).name ), if_exists='append', chunksize=chunk_size, )
Example #4
Source File: __init__.py From attention-lvcsr with MIT License | 5 votes |
def _print_attributes(self, attribute_tuples): for attr, value in sorted(attribute_tuples.items(), key=first): if not self._attribute_filter(attr): print("\t", "{}:".format(attr), value)
Example #5
Source File: _incremental.py From dask-ml with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _adapt(self, info): # First, have an adaptive algorithm if self.n_initial_parameters == "grid": start = len(ParameterGrid(self.parameters)) else: start = self.n_initial_parameters def inverse(time): """ Decrease target number of models inversely with time """ return int(start / (1 + time) ** self.decay_rate) example = toolz.first(info.values()) time_step = example[-1]["partial_fit_calls"] current_time_step = time_step + 1 next_time_step = current_time_step if inverse(current_time_step) == 0: # we'll never get out of here next_time_step = 1 while inverse(current_time_step) == inverse(next_time_step) and ( self.decay_rate and not self.patience or next_time_step - current_time_step < self.fits_per_score ): next_time_step += 1 target = max(1, inverse(next_time_step)) best = toolz.topk(target, info, key=lambda k: info[k][-1]["score"]) if len(best) == 1: [best] = best return {best: 0} steps = next_time_step - current_time_step instructions = {b: steps for b in best} return instructions
Example #6
Source File: experimental.py From sidekick with MIT License | 5 votes |
def index(x, seq): try: return toolz.first(i for i, y in enumerate(seq) if x == y) except ValueError: raise IndexError("element not found in sequence")
Example #7
Source File: experimental.py From sidekick with MIT License | 5 votes |
def indexed_map(func: Func, *seqs: Seq, start=0) -> Seq: """ Like map, but pass the index of each element as the first argument to func. Examples: >>> ''.join(indexed_map((X * Y), 'hello', start=1)) 'heelllllllooooo' See Also: map """ return _map(func, itertools.count(start), *seqs)
Example #8
Source File: umis.py From umis with MIT License | 5 votes |
def detect_fastq_annotations(fastq_file): """ detects annotations preesent in a FASTQ file by examining the first read """ annotations = set() queryread = tz.first(read_fastq(fastq_file)) for k, v in BARCODEINFO.items(): if v.readprefix in queryread: annotations.add(k) return annotations
Example #9
Source File: __init__.py From attention-lvcsr with MIT License | 5 votes |
def main_loop(self): if not hasattr(self, '_main_loop'): raise ValueError("main loop must be assigned to extension first") return self._main_loop
Example #10
Source File: groupby.py From ibis with Apache License 2.0 | 5 votes |
def __init__( self, table, by, having=None, order_by=None, window=None, **expressions ): self.table = table self.by = util.promote_list(by if by is not None else []) + [ _get_group_by_key(table, v).name(k) for k, v in sorted(expressions.items(), key=toolz.first) ] self._order_by = order_by or [] self._having = having or [] self._window = window
Example #11
Source File: strings.py From ibis with Apache License 2.0 | 5 votes |
def execute_string_group_by_find_in_set(op, needle, haystack, **kwargs): # `list` could contain series, series groupbys, or scalars # mixing series and series groupbys is not allowed series_in_haystack = [ type(piece) for piece in haystack if isinstance(piece, (pd.Series, SeriesGroupBy)) ] if not series_in_haystack: return ibis.util.safe_index(haystack, needle) try: (collection_type,) = frozenset(map(type, series_in_haystack)) except ValueError: raise ValueError('Mixing Series and SeriesGroupBy is not allowed') pieces = haystack_to_series_of_lists( [getattr(piece, 'obj', piece) for piece in haystack] ) result = pieces.map(toolz.flip(ibis.util.safe_index)(needle)) if issubclass(collection_type, pd.Series): return result assert issubclass(collection_type, SeriesGroupBy) return result.groupby( toolz.first( piece.grouper.groupings for piece in haystack if hasattr(piece, 'grouper') ) )
Example #12
Source File: strings.py From ibis with Apache License 2.0 | 5 votes |
def haystack_to_series_of_lists(haystack, index=None): if index is None: index = toolz.first( piece.index for piece in haystack if hasattr(piece, 'index') ) pieces = reduce( operator.add, ( pd.Series(getattr(piece, 'values', piece), index=index).map( ibis.util.promote_list ) for piece in haystack ), ) return pieces
Example #13
Source File: selection.py From ibis with Apache License 2.0 | 5 votes |
def remap_overlapping_column_names(table_op, root_table, data_columns): """Return an ``OrderedDict`` mapping possibly suffixed column names to column names without suffixes. Parameters ---------- table_op : TableNode The ``TableNode`` we're selecting from. root_table : TableNode The root table of the expression we're selecting from. data_columns : set or frozenset The available columns to select from Returns ------- mapping : OrderedDict[str, str] A map from possibly-suffixed column names to column names without suffixes. """ if not isinstance(table_op, ops.Join): return None left_root, right_root = ops.distinct_roots(table_op.left, table_op.right) suffixes = { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, } column_names = [ ({name, name + suffixes[root_table]} & data_columns, name) for name in root_table.schema.names ] mapping = OrderedDict( (first(col_name), final_name) for col_name, final_name in column_names if col_name ) return mapping
Example #14
Source File: client.py From ibis with Apache License 2.0 | 5 votes |
def create_table(self, table_name, obj=None, schema=None): """Create a table.""" if obj is None and schema is None: raise com.IbisError('Must pass expr or schema') if obj is not None: df = pd.DataFrame(obj) else: dtypes = ibis_schema_to_pandas(schema) df = schema.apply_to( pd.DataFrame(columns=list(map(toolz.first, dtypes))) ) self.dictionary[table_name] = df
Example #15
Source File: umis.py From umis with MIT License | 5 votes |
def subset_bamfile(sam, barcodes): """ Subset a SAM/BAM file, keeping only alignments from given cellular barcodes """ from pysam import AlignmentFile start_time = time.time() sam_file = open_bamfile(sam) out_file = AlignmentFile("-", "wh", template=sam_file) track = sam_file.fetch(until_eof=True) # peek at first alignment to determine the annotations queryalignment = track.next() annotations = detect_alignment_annotations(queryalignment) track = itertools.chain([queryalignment], track) re_string = construct_transformed_regex(annotations) parser_re = re.compile(re_string) barcodes = set(barcode.strip() for barcode in barcodes) for count, aln in enumerate(track, start=1): if count and not count % 1000000: logger.info("Processed %d alignments." % count) match = parser_re.match(aln.qname) tags = aln.tags if "cellular" in annotations: cb = match.group('CB') if cb in barcodes: out_file.write(aln)
Example #16
Source File: asset_writer.py From catalyst with Apache License 2.0 | 4 votes |
def _split_symbol_mappings(df): """Split out the symbol: sid mappings from the raw data. Parameters ---------- df : pd.DataFrame The dataframe with multiple rows for each symbol: sid pair. Returns ------- asset_info : pd.DataFrame The asset info with one row per asset. symbol_mappings : pd.DataFrame The dataframe of just symbol: sid mappings. The index will be the sid, then there will be three columns: symbol, start_date, and end_date. """ mappings = df[list(mapping_columns)] ambigious = {} for symbol in mappings.symbol.unique(): persymbol = mappings[mappings.symbol == symbol] intersections = list(intersecting_ranges(map( from_tuple, zip(persymbol.start_date, persymbol.end_date), ))) if intersections: ambigious[symbol] = ( intersections, persymbol[['start_date', 'end_date']].astype('datetime64[ns]'), ) if ambigious: raise ValueError( 'Ambiguous ownership for %d symbol%s, multiple assets held the' ' following symbols:\n%s' % ( len(ambigious), '' if len(ambigious) == 1 else 's', '\n'.join( '%s:\n intersections: %s\n %s' % ( symbol, tuple(map(_format_range, intersections)), # indent the dataframe string '\n '.join(str(df).splitlines()), ) for symbol, (intersections, df) in sorted( ambigious.items(), key=first, ), ), ) ) return ( df.groupby(level=0).apply(_check_asset_group), df[list(mapping_columns)], )
Example #17
Source File: core.py From dask-lightgbm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train(client, data, label, params, model_factory, weight=None, **kwargs): # Split arrays/dataframes into parts. Arrange parts into tuples to enforce co-locality data_parts = _split_to_parts(data, is_matrix=True) label_parts = _split_to_parts(label, is_matrix=False) if weight is None: parts = list(map(delayed, zip(data_parts, label_parts))) else: weight_parts = _split_to_parts(weight, is_matrix=False) parts = list(map(delayed, zip(data_parts, label_parts, weight_parts))) # Start computation in the background parts = client.compute(parts) wait(parts) for part in parts: if part.status == 'error': return part # trigger error locally # Find locations of all parts and map them to particular Dask workers key_to_part_dict = dict([(part.key, part) for part in parts]) who_has = client.who_has(parts) worker_map = defaultdict(list) for key, workers in who_has.items(): worker_map[first(workers)].append(key_to_part_dict[key]) master_worker = first(worker_map) worker_ncores = client.ncores() if 'tree_learner' not in params or params['tree_learner'].lower() not in {'data', 'feature', 'voting'}: logger.warning('Parameter tree_learner not set or set to incorrect value ' f'({params.get("tree_learner", None)}), using "data" as default') params['tree_learner'] = 'data' # Tell each worker to train on the parts that it has locally futures_classifiers = [client.submit(_train_part, model_factory=model_factory, params=assoc(params, 'num_threads', worker_ncores[worker]), list_of_parts=list_of_parts, worker_addresses=list(worker_map.keys()), local_listen_port=params.get('local_listen_port', 12400), time_out=params.get('time_out', 120), return_model=(worker == master_worker), **kwargs) for worker, list_of_parts in worker_map.items()] results = client.gather(futures_classifiers) results = [v for v in results if v] return results[0]
Example #18
Source File: umis.py From umis with MIT License | 4 votes |
def bamtag(sam): ''' Convert a BAM/SAM with fastqtransformed read names to have UMI and cellular barcode tags ''' from pysam import AlignmentFile start_time = time.time() sam_file = open_bamfile(sam) out_file = AlignmentFile("-", "wh", template=sam_file) track = sam_file.fetch(until_eof=True) # peek at first alignment to determine the annotations if is_python3(): queryalignment = next(track) else: queryalignment = track.next() annotations = detect_alignment_annotations(queryalignment) track = itertools.chain([queryalignment], track) re_string = construct_transformed_regex(annotations) parser_re = re.compile(re_string) for count, aln in enumerate(track, start=1): if count and not count % 1000000: logger.info("Processed %d alignments." % count) match = parser_re.match(aln.qname) tags = aln.tags if "cellular" in annotations: aln.tags += [('XC', match.group('CB'))] if "molecular" in annotations: aln.tags += [('RX', match.group('MB'))] if "sample" in annotations: aln.tags += [('XS', match.group('SB'))] out_file.write(aln) total_time = time.time() - start_time logger.info('BAM tag conversion done - {:.3}s, {:,} alns/min'.format(total_time, int(60. * count / total_time))) logger.info("Processed %d alignments." % count)