Python more_itertools.flatten() Examples

The following are 18 code examples of more_itertools.flatten(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module more_itertools , or try the search function .
Example #1
Source File: displacements.py    From langchangetrack with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def calculate_words_displacement(self, column_names, n_jobs = 1):
        """ Calculate word displacements for each word in the Pandas data frame. """

        words = self.get_word_list()
        # Create chunks of the words to be processed.
        chunk_sz = np.ceil(len(words)/float(n_jobs))
        chunks = list(more_itertools.chunked(words, chunk_sz))

        # Calculate the displacements
        chunksL = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_source, self) for chunk in chunks)
        chunksH = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, process_word_dest, self) for chunk in chunks)
        L = more_itertools.flatten(chunksL)
        H = more_itertools.flatten(chunksH)
        flattendL = [x for sublist in L for x in sublist]
        flattendH = [x for sublist in H for x in sublist]

        # Store the results in a nice pandas data frame
        dfo, dfn = self.create_data_frames(flattendL, flattendH, column_names)
        return flattendL, flattendH, dfo, dfn 
Example #2
Source File: stack_probabilities_for_linear.py    From OpenKiwi with GNU Affero General Public License v3.0 6 votes vote down vote up
def concat(probabilities, prob_sep='|', token_sep='\n', sentence_sep='\n\n'):
    flat_probabilities = [list(flatten(probs)) for probs in probabilities]
    if not all_equal([len(p) for p in flat_probabilities]):
        logger.error('Number of tokens do not match.')
        return None

    probs_per_token_sentence_file = [
        list(zip(*parallel_probs)) for parallel_probs in zip(*probabilities)
    ]

    content_str = sentence_sep.join(
        [
            token_sep.join(
                [prob_sep.join(map(str, tokens)) for tokens in sentence]
            )
            for sentence in probs_per_token_sentence_file
        ]
    )
    content_str += sentence_sep  # Add a trailing newline before EOF.

    return content_str 
Example #3
Source File: test_recipes.py    From pipenv with MIT License 5 votes vote down vote up
def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f))) 
Example #4
Source File: test_more.py    From Tautulli with GNU General Public License v3.0 5 votes vote down vote up
def test_collapse_flatten(self):
        l = [[1], [2], [[3], 4], [[[5]]]]
        self.assertEqual(list(mi.collapse(l, levels=1)), list(mi.flatten(l))) 
Example #5
Source File: test_recipes.py    From Tautulli with GNU General Public License v3.0 5 votes vote down vote up
def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f))) 
Example #6
Source File: test_recipes.py    From Tautulli with GNU General Public License v3.0 5 votes vote down vote up
def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f))) 
Example #7
Source File: test_more.py    From pipenv with MIT License 5 votes vote down vote up
def test_collapse_flatten(self):
        l = [[1], [2], [[3], 4], [[[5]]]]
        self.assertEqual(list(mi.collapse(l, levels=1)), list(mi.flatten(l))) 
Example #8
Source File: test_recipes.py    From pipenv with MIT License 5 votes vote down vote up
def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f))) 
Example #9
Source File: test_recipes.py    From python-netsurv with MIT License 5 votes vote down vote up
def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f))) 
Example #10
Source File: evaluate.py    From OpenKiwi with GNU Affero General Public License v3.0 5 votes vote down vote up
def eval_word_level(golds, pred_files, tag_name):
    scores_table = []
    for pred_file, pred in pred_files[tag_name]:
        _check_lengths(golds[tag_name], pred)

        scores = score_word_level(
            list(flatten(golds[tag_name])), list(flatten(pred))
        )

        scores_table.append((pred_file, *scores))
    # If more than one system is provided, compute ensemble score
    if len(pred_files[tag_name]) > 1:
        ensemble_pred = _average(
            [list(flatten(pred)) for _, pred in pred_files[tag_name]]
        )
        ensemble_score = score_word_level(
            list(flatten(golds[tag_name])), ensemble_pred
        )
        scores_table.append(("*ensemble*", *ensemble_score))

    scores = np.array(
        scores_table,
        dtype=[
            ("File", "object"),
            ("F1_{}".format(const.LABELS[0]), float),
            ("F1_{}".format(const.LABELS[1]), float),
            ("F1_mult", float),
        ],
    )
    # Put the main metric in the first column
    scores = scores[
        [
            "File",
            "F1_mult",
            "F1_{}".format(const.LABELS[0]),
            "F1_{}".format(const.LABELS[1]),
        ]
    ]

    return scores 
Example #11
Source File: evaluate.py    From OpenKiwi with GNU Affero General Public License v3.0 5 votes vote down vote up
def _average(probs_per_file):
    # flat_probs = [list(flatten(probs)) for probs in probs_per_file]
    probabilities = np.array(probs_per_file, dtype="float32")
    return probabilities.mean(axis=0).tolist() 
Example #12
Source File: utils.py    From OpenKiwi with GNU Affero General Public License v3.0 5 votes vote down vote up
def unroll(list_of_lists):
    """
    :param list_of_lists: a list that contains lists
    :param rec: unroll recursively
    :return: a flattened list
    """
    if isinstance(first(list_of_lists), (np.ndarray, list)):
        return list(flatten(list_of_lists))
    return list_of_lists 
Example #13
Source File: test_recipes.py    From python-netsurv with MIT License 5 votes vote down vote up
def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f))) 
Example #14
Source File: test_recipes.py    From python-netsurv with MIT License 5 votes vote down vote up
def test_basic_usage(self):
        """ensure list of lists is flattened one level"""
        f = [[0, 1, 2], [3, 4, 5]]
        self.assertEqual(list(range(6)), list(mi.flatten(f))) 
Example #15
Source File: test_more.py    From python-netsurv with MIT License 5 votes vote down vote up
def test_collapse_flatten(self):
        l = [[1], [2], [[3], 4], [[[5]]]]
        self.assertEqual(list(mi.collapse(l, levels=1)), list(mi.flatten(l))) 
Example #16
Source File: test_recipes.py    From python-netsurv with MIT License 5 votes vote down vote up
def test_single_level(self):
        """ensure list of lists is flattened only one level"""
        f = [[0, [1, 2]], [[3, 4], 5]]
        self.assertEqual([0, [1, 2], [3, 4], 5], list(mi.flatten(f))) 
Example #17
Source File: record_merger.py    From recordexpungPDX with MIT License 4 votes vote down vote up
def merge(
        ambiguous_record: AmbiguousRecord,
        ambiguous_charge_id_to_time_eligibility_list: List[Dict[str, TimeEligibility]],
        charge_ids_with_question: List[str],
    ) -> Record:
        ambiguous_charge_id_to_time_eligibilities: Dict[str, List[TimeEligibility]] = collections.defaultdict(list)
        for charge_id_to_time_eligibility in ambiguous_charge_id_to_time_eligibility_list:
            for k, v in charge_id_to_time_eligibility.items():
                if v not in ambiguous_charge_id_to_time_eligibilities[k]:
                    ambiguous_charge_id_to_time_eligibilities[k].append(v)
        charges = list(flatten([record.charges for record in ambiguous_record]))
        record = ambiguous_record[0]
        new_case_list: List[Case] = []
        for case in record.cases:
            new_charges = []
            for charge in case.charges:
                time_eligibilities = ambiguous_charge_id_to_time_eligibilities.get(
                    charge.ambiguous_charge_id
                )  # TODO: Review whether this can return None
                sorted_time_eligibility = (
                    sorted(time_eligibilities, key=lambda e: e.date_will_be_eligible) if time_eligibilities else None
                )
                same_charges = list(filter(lambda c: c.ambiguous_charge_id == charge.ambiguous_charge_id, charges))
                romeo_and_juliet_exception = RecordMerger._is_romeo_and_juliet_exception(same_charges)
                merged_type_eligibility = RecordMerger.merge_type_eligibilities(same_charges)
                merged_time_eligibility = RecordMerger.merge_time_eligibilities(sorted_time_eligibility)
                if charge.ambiguous_charge_id in charge_ids_with_question:
                    charge_eligibility = ChargeEligibility(
                        ChargeEligibilityStatus.NEEDS_MORE_ANALYSIS, "Needs More Analysis"
                    )
                else:
                    charge_eligibility = RecordMerger.compute_charge_eligibility(
                        merged_type_eligibility, sorted_time_eligibility, romeo_and_juliet_exception
                    )
                    if "open" in charge_eligibility.label.lower():
                        charge_eligibility = replace(
                            charge_eligibility,
                            label=f"Eligibility Timeframe Dependent On Open Charge: {charge_eligibility.label}",
                        )
                expungement_result = ExpungementResult(
                    type_eligibility=merged_type_eligibility,
                    time_eligibility=merged_time_eligibility,
                    charge_eligibility=charge_eligibility,
                )
                merged_type_name = " ⬥ ".join(
                    list(unique_everseen([charge.charge_type.type_name for charge in same_charges]))
                )
                merged_charge_type = replace(charge.charge_type, type_name=merged_type_name)
                merged_disposition = RecordMerger.merge_dispositions(same_charges)
                new_charge: Charge = replace(
                    charge,
                    charge_type=merged_charge_type,
                    expungement_result=expungement_result,
                    disposition=merged_disposition,
                )
                new_charges.append(new_charge)
            new_case = replace(case, charges=tuple(new_charges))
            new_case_list.append(new_case)
        return replace(record, cases=tuple(new_case_list)) 
Example #18
Source File: dump_timeseries.py    From langchangetrack with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def main(args):
    # get the arguments
    method = args.method
    win_size = args.win_size
    step = args.step
    metric_name = args.metric_name
    n_jobs = args.workers

    # Load the data.
    L, H, olddf, newdf = pickle.load(open(args.filename))
    words = pd.Series(olddf.word.values.ravel()).unique()
    oldrows = []
    newrows = []
    sourcexrange = np.arange(args.mint, args.maxt, step)
    destxrange = np.arange(args.mint, args.maxt, step)
    if method == 'win':
        sourcexrange = sourcexrange[win_size:]
        destxrange = destxrange[:-win_size]

    if args.interpolate:
        sourcexinter = np.arange(sourcexrange[0], sourcexrange[-1] + 1, 1)
        destxinter = np.arange(destxrange[0], destxrange[-1] + 1, 1)
    else:
        sourcexinter = sourcexrange
        destxinter = destxrange

    # Construct the series
    assert(len(sourcexinter) == len(destxinter))
    chunk_sz = np.ceil(len(words)/float(n_jobs))
    words_chunks = more_itertools.chunked(words, chunk_sz)
    timeseries_chunks = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, create_word_time_series, olddf, newdf,
                                                                               sourcexinter, destxinter,
                                                                               metric_name=metric_name,
                                                                               interpolate=args.interpolate) for chunk in words_chunks)

    timeseries = list(more_itertools.flatten(timeseries_chunks))

    # Dump the data frame
    for orow, newrow in timeseries:
        if orow and newrow:
            oldrows.append(orow)
            newrows.append(newrow)

    oldtimeseries = pd.DataFrame()
    newtimeseries = pd.DataFrame()
    header = ['word']
    header.extend(sourcexinter)
    newheader = ['word']
    newheader.extend(destxinter)
    oldtimeseries = oldtimeseries.from_records(oldrows, columns=header)
    oldtimeseries = oldtimeseries.fillna(method='backfill', axis=1)
    newtimeseries = newtimeseries.from_records(newrows, columns=newheader)
    newtimeseries = newtimeseries.fillna(method='backfill', axis=1)
    oldtimeseries.to_csv(args.sourcetimef, encoding='utf-8')
    newtimeseries.to_csv(args.endtimef, encoding='utf-8')