Python rasa_nlu.training_data.load_data() Examples

The following are 30 code examples of rasa_nlu.training_data.load_data(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module rasa_nlu.training_data , or try the search function .
Example #1
Source File: bot.py    From rasa_core with Apache License 2.0 7 votes vote down vote up
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu import config
    from rasa_nlu.model import Trainer

    training_data = load_data('data/nlu.md')
    trainer = Trainer(config.load("config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu/',
                                      fixed_model_name="current")

    return model_directory 
Example #2
Source File: bot.py    From rasa_core with Apache License 2.0 6 votes vote down vote up
def train_dialogue(domain_file="domain.yml",
                         model_path="models/dialogue",
                         training_data_file="data/stories.md"):
    agent = Agent(domain_file,
                  policies=[MemoizationPolicy(max_history=3),
                            MappingPolicy(),
                            RestaurantPolicy(batch_size=100, epochs=400,
                                             validation_split=0.2)])

    training_data = await agent.load_data(training_data_file)
    agent.train(
        training_data
    )

    agent.persist(model_path)
    return agent 
Example #3
Source File: trainer.py    From weather-bot with MIT License 6 votes vote down vote up
def train_dialogue(
        domain_file="domain.yml",
        model_path="models/dialogue",
        training_data_file="data/stories.md"
        ):
    agent = Agent(
        domain_file,
        policies=[MemoizationPolicy(max_history=3), KerasPolicy()]
        )
    training_data = agent.load_data(training_data_file)
    agent.train(
        training_data,
        epochs=400,
        batch_size=100,
        validation_split=0.2
        )
    agent.persist(model_path)
    return agent 
Example #4
Source File: test_featurizers.py    From Rasa_NLU_Chi with Apache License 2.0 6 votes vote down vote up
def test_spacy_featurizer_casing(spacy_nlp):
    from rasa_nlu.featurizers import spacy_featurizer

    # if this starts failing for the default model, we should think about
    # removing the lower casing the spacy nlp component does when it
    # retrieves vectors. For compressed spacy models (e.g. models
    # ending in _sm) this test will most likely fail.

    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    for e in td.intent_examples:
        doc = spacy_nlp(e.text)
        doc_capitalized = spacy_nlp(e.text.capitalize())

        vecs = spacy_featurizer.features_for_doc(doc)
        vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized)

        assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \
            "Vectors are unequal for texts '{}' and '{}'".format(
                    e.text, e.text.capitalize()) 
Example #5
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 6 votes vote down vote up
def test_demo_data(filename):
    td = training_data.load_data(filename)
    assert td.intents == {"affirm", "greet", "restaurant_search", "goodbye"}
    assert td.entities == {"location", "cuisine"}
    assert len(td.training_examples) == 42
    assert len(td.intent_examples) == 42
    assert len(td.entity_examples) == 11

    assert td.entity_synonyms == {'Chines': 'chinese',
                                  'Chinese': 'chinese',
                                  'chines': 'chinese',
                                  'vegg': 'vegetarian',
                                  'veggie': 'vegetarian'}

    assert td.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"},
                                 {"name": "zipcode", "pattern": "[0-9]{5}"}] 
Example #6
Source File: test_multitenancy.py    From Rasa_NLU_Chi with Apache License 2.0 6 votes vote down vote up
def train_models(component_builder, data):
    # Retrain different multitenancy models
    def train(cfg_name, project_name):
        from rasa_nlu.train import create_persistor
        from rasa_nlu import training_data

        cfg = config.load(cfg_name)
        trainer = Trainer(cfg, component_builder)
        training_data = training_data.load_data(data)

        trainer.train(training_data)
        trainer.persist("test_projects", project_name=project_name)

    train("sample_configs/config_spacy.yml", "test_project_spacy_sklearn")
    train("sample_configs/config_mitie.yml", "test_project_mitie")
    train("sample_configs/config_mitie_sklearn.yml", "test_project_mitie_sklearn") 
Example #7
Source File: test_interpreter.py    From Rasa_NLU_Chi with Apache License 2.0 6 votes vote down vote up
def test_interpreter(pipeline_template, component_builder, tmpdir):
    test_data = "data/examples/rasa/demo-rasa.json"
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = test_data
    td = training_data.load_data(test_data)
    interpreter = utilities.interpreter_for(component_builder,
                                            "data/examples/rasa/demo-rasa.json",
                                            tmpdir.strpath,
                                            _conf)

    texts = ["good bye", "i am looking for an indian spot"]

    for text in texts:
        result = interpreter.parse(text, time=None)
        assert result['text'] == text
        assert (not result['intent']['name']
                or result['intent']['name'] in td.intents)
        assert result['intent']['confidence'] >= 0
        # Ensure the model doesn't detect entity types that are not present
        # Models on our test data set are not stable enough to
        # require the exact entities to be found
        for entity in result['entities']:
            assert entity['entity'] in td.entities 
Example #8
Source File: test_featurizers.py    From rasa_nlu with Apache License 2.0 6 votes vote down vote up
def test_spacy_featurizer_casing(spacy_nlp):
    from rasa_nlu.featurizers import spacy_featurizer

    # if this starts failing for the default model, we should think about
    # removing the lower casing the spacy nlp component does when it
    # retrieves vectors. For compressed spacy models (e.g. models
    # ending in _sm) this test will most likely fail.

    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    for e in td.intent_examples:
        doc = spacy_nlp(e.text)
        doc_capitalized = spacy_nlp(e.text.capitalize())

        vecs = spacy_featurizer.features_for_doc(doc)
        vecs_capitalized = spacy_featurizer.features_for_doc(doc_capitalized)

        assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \
            "Vectors are unequal for texts '{}' and '{}'".format(
                e.text, e.text.capitalize()) 
Example #9
Source File: test_training_data.py    From rasa_nlu with Apache License 2.0 6 votes vote down vote up
def test_dialogflow_data():
    td = training_data.load_data('data/examples/dialogflow/')
    assert len(td.entity_examples) == 5
    assert len(td.intent_examples) == 24
    assert len(td.training_examples) == 24
    assert len(td.lookup_tables) == 2
    assert td.intents == {"affirm", "goodbye", "hi", "inform"}
    assert td.entities == {"cuisine", "location"}
    non_trivial_synonyms = {k: v
                            for k, v in td.entity_synonyms.items() if k != v}
    assert non_trivial_synonyms == {"mexico": "mexican",
                                    "china": "chinese",
                                    "india": "indian"}
    # The order changes based on different computers hence the grouping
    assert {td.lookup_tables[0]['name'],
            td.lookup_tables[1]['name']} == {'location', 'cuisine'}
    assert {len(td.lookup_tables[0]['elements']),
            len(td.lookup_tables[1]['elements'])} == {4, 6} 
Example #10
Source File: time_train_test.py    From rasa_lookup_demo with Apache License 2.0 6 votes vote down vote up
def train_model():
    # trains a model and times it
    t = time()
    # training_data = load_data('demo_train.md')
    training_data = load_data("data/company_train_lookup.json")
    td_load_time = time() - t
    trainer = Trainer(config.load("config.yaml"))
    t = time()
    trainer.train(training_data)
    train_time = time() - t
    clear_model_dir()
    t = time()
    model_directory = trainer.persist(
        "./tmp/models"
    )  # Returns the directory the model is stored in
    persist_time = time() - t
    return td_load_time, train_time, persist_time 
Example #11
Source File: test_interpreter.py    From rasa_nlu with Apache License 2.0 6 votes vote down vote up
def test_interpreter(pipeline_template, component_builder, tmpdir):
    test_data = "data/examples/rasa/demo-rasa.json"
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = test_data
    td = training_data.load_data(test_data)
    interpreter = utilities.interpreter_for(component_builder,
                                            "data/examples/rasa/demo-rasa.json",
                                            tmpdir.strpath,
                                            _conf)

    texts = ["good bye", "i am looking for an indian spot"]

    for text in texts:
        result = interpreter.parse(text, time=None)
        assert result['text'] == text
        assert (not result['intent']['name'] or
                result['intent']['name'] in td.intents)
        assert result['intent']['confidence'] >= 0
        # Ensure the model doesn't detect entity types that are not present
        # Models on our test data set are not stable enough to
        # require the exact entities to be found
        for entity in result['entities']:
            assert entity['entity'] in td.entities 
Example #12
Source File: test_training_data.py    From rasa_nlu with Apache License 2.0 5 votes vote down vote up
def test_luis_data():
    td = training_data.load_data('data/examples/luis/demo-restaurants.json')
    assert len(td.entity_examples) == 8
    assert len(td.intent_examples) == 28
    assert len(td.training_examples) == 28
    assert td.entity_synonyms == {}
    assert td.intents == {"affirm", "goodbye", "greet", "inform"}
    assert td.entities == {"location", "cuisine"} 
Example #13
Source File: convert.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def convert_training_data(data_file, out_file, output_format, language):
    td = training_data.load_data(data_file, language)

    if output_format == 'md':
        output = td.as_markdown()
    else:
        output = td.as_json(indent=2)

    write_to_file(out_file, output) 
Example #14
Source File: evaluate.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def run_evaluation(data_path, model_path,
                   component_builder=None):  # pragma: no cover
    """Evaluate intent classification and entity extraction."""

    # get the metadata config from the package data
    interpreter = Interpreter.load(model_path, component_builder)
    test_data = training_data.load_data(data_path,
                                        interpreter.model_metadata.language)
    extractors = get_entity_extractors(interpreter)
    entity_predictions, tokens = get_entity_predictions(interpreter,
                                                        test_data)
    if duckling_extractors.intersection(extractors):
        entity_predictions = remove_duckling_entities(entity_predictions)
        extractors = remove_duckling_extractors(extractors)

    if is_intent_classifier_present(interpreter):
        intent_targets = get_intent_targets(test_data)
        intent_predictions = get_intent_predictions(interpreter, test_data)
        logger.info("Intent evaluation results:")
        evaluate_intents(intent_targets, intent_predictions)

    if extractors:
        entity_targets = get_entity_targets(test_data)

        logger.info("Entity evaluation results:")
        evaluate_entities(entity_targets, entity_predictions, tokens,
                          extractors) 
Example #15
Source File: train.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def do_train(cfg,  # type: RasaNLUModelConfig
             data,  # type: Text
             path=None,  # type: Optional[Text]
             project=None,  # type: Optional[Text]
             fixed_model_name=None,  # type: Optional[Text]
             storage=None,  # type: Optional[Text]
             component_builder=None,  # type: Optional[ComponentBuilder]
             url=None,  # type: Optional[Text]
             **kwargs  # type: Any
             ):
    # type: (...) -> Tuple[Trainer, Interpreter, Text]
    """Loads the trainer and the data and runs the training of the model."""

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(cfg, component_builder)
    persistor = create_persistor(storage)
    if url is not None:
        training_data = load_data_from_url(url, cfg.language)
    else:
        training_data = load_data(data, cfg.language)
    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(path,
                                         persistor,
                                         project,
                                         fixed_model_name)
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path 
Example #16
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_wit_data():
    td = training_data.load_data('data/examples/wit/demo-flights.json')
    assert len(td.entity_examples) == 4
    assert len(td.intent_examples) == 1
    assert len(td.training_examples) == 4
    assert td.entity_synonyms == {}
    assert td.intents == {"flight_booking"}
    assert td.entities == {"location", "datetime"} 
Example #17
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_dialogflow_data():
    td = training_data.load_data('data/examples/dialogflow/')
    assert len(td.entity_examples) == 5
    assert len(td.intent_examples) == 24
    assert len(td.training_examples) == 24
    assert td.intents == {"affirm", "goodbye", "hi", "inform"}
    assert td.entities == {"cuisine", "location"}
    non_trivial_synonyms = {k: v for k, v in td.entity_synonyms.items() if k != v}
    assert non_trivial_synonyms == {"mexico": "mexican",
                                    "china": "chinese",
                                    "india": "indian"} 
Example #18
Source File: convert.py    From rasa_nlu with Apache License 2.0 5 votes vote down vote up
def convert_training_data(data_file, out_file, output_format, language):
    td = training_data.load_data(data_file, language)

    if output_format == 'md':
        output = td.as_markdown()
    else:
        output = td.as_json(indent=2)

    write_to_file(out_file, output) 
Example #19
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_data_merging(files):
    td_reference = training_data.load_data(files[0])
    td = training_data.load_data(files[1])
    assert len(td.entity_examples) == len(td_reference.entity_examples)
    assert len(td.intent_examples) == len(td_reference.intent_examples)
    assert len(td.training_examples) == len(td_reference.training_examples)
    assert td.intents == td_reference.intents
    assert td.entities == td_reference.entities
    assert td.entity_synonyms == td_reference.entity_synonyms
    assert td.regex_features == td_reference.regex_features 
Example #20
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_markdown_single_sections():
    td_regex_only = training_data.load_data('data/test/markdown_single_sections/regex_only.md')
    assert td_regex_only.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"}]

    td_syn_only = training_data.load_data('data/test/markdown_single_sections/synonyms_only.md')
    assert td_syn_only.entity_synonyms == {'Chines': 'chinese',
                                           'Chinese': 'chinese'} 
Example #21
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_multiword_entities():
    data = """
{
  "rasa_nlu_data": {
    "common_examples" : [
      {
        "text": "show me flights to New York City",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 32,
            "value": "New York City"
          }
        ]
      }
    ]
  }
}"""
    with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
        f.write(data.encode("utf-8"))
        f.flush()
        td = training_data.load_data(f.name)
        assert len(td.entity_examples) == 1
        example = td.entity_examples[0]
        entities = example.get("entities")
        assert len(entities) == 1
        tokens = WhitespaceTokenizer().tokenize(example.text)
        start, end = MitieEntityExtractor.find_entity(entities[0],
                                                      example.text,
                                                      tokens)
        assert start == 4
        assert end == 7 
Example #22
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_nonascii_entities():
    data = """
{
  "luis_schema_version": "2.0",
  "utterances" : [
    {
      "text": "I am looking for a ßäæ ?€ö) item",
      "intent": "unk",
      "entities": [
        {
          "entity": "description",
          "startPos": 19,
          "endPos": 26
        }
      ]
    }
  ]
}"""
    with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f:
        f.write(data.encode("utf-8"))
        f.flush()
        td = training_data.load_data(f.name)
        assert len(td.entity_examples) == 1
        example = td.entity_examples[0]
        entities = example.get("entities")
        assert len(entities) == 1
        entity = entities[0]
        assert entity["value"] == "ßäæ ?€ö)"
        assert entity["start"] == 19
        assert entity["end"] == 27
        assert entity["entity"] == "description" 
Example #23
Source File: test_training_data.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_training_data_conversion(tmpdir, data_file, gold_standard_file,
                                  output_format, language):
    out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath, output_format, language)
    td = training_data.load_data(out_path.strpath, language)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = training_data.load_data(gold_standard_file, language)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms

    # converting the converted file back to original
    # file format and performing the same tests
    rto_path = tmpdir.join("data_in_original_format.txt")
    convert_training_data(out_path.strpath, rto_path.strpath, 'json', language)
    rto = training_data.load_data(rto_path.strpath, language)
    cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
    cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
    assert gold_standard.entity_synonyms == rto.entity_synonyms

    # If the above assert fails - this can be used
    # to dump to the file and diff using git
    # with io.open(gold_standard_file) as f:
    #     f.write(td.as_json(indent=2)) 
Example #24
Source File: test_evaluation.py    From Rasa_NLU_Chi with Apache License 2.0 5 votes vote down vote up
def test_drop_intents_below_freq():
    td = training_data.load_data('data/examples/rasa/demo-rasa.json')
    clean_td = drop_intents_below_freq(td, 0)
    assert clean_td.intents == {'affirm', 'goodbye', 'greet',
                                'restaurant_search'}

    clean_td = drop_intents_below_freq(td, 10)
    assert clean_td.intents == {'affirm', 'restaurant_search'} 
Example #25
Source File: visualize.py    From rasa_core with Apache License 2.0 5 votes vote down vote up
def visualize(config_path: Text, domain_path: Text, stories_path: Text,
                    nlu_data_path: Text, output_path: Text, max_history: int):
    from rasa.core.agent import Agent
    from rasa.core import config

    policies = config.load(config_path)

    agent = Agent(domain_path, policies=policies)

    # this is optional, only needed if the `/greet` type of
    # messages in the stories should be replaced with actual
    # messages (e.g. `hello`)
    if nlu_data_path is not None:
        from rasa_nlu.training_data import load_data

        nlu_data_path = load_data(nlu_data_path)
    else:
        nlu_data_path = None

    logger.info("Starting to visualize stories...")
    await agent.visualize(stories_path, output_path,
                          max_history,
                          nlu_training_data=nlu_data_path)

    full_output_path = "file://{}".format(os.path.abspath(output_path))
    logger.info("Finished graph creation. Saved into {}".format(
        full_output_path))

    import webbrowser
    webbrowser.open(full_output_path) 
Example #26
Source File: trainer.py    From weather-bot with MIT License 5 votes vote down vote up
def train_nlu():
    training_data = load_data('data/nlu-data.md')
    trainer = Trainer(config.load("nlu-config.yml"))
    trainer.train(training_data)
    model_directory = trainer.persist('models/nlu/', fixed_model_name="current")
    return model_directory 
Example #27
Source File: bot.py    From rasa_bot with Apache License 2.0 5 votes vote down vote up
def train_nlu():
    from rasa_nlu.training_data import load_data
    from rasa_nlu.config import RasaNLUModelConfig
    from rasa_nlu.model import Trainer
    from rasa_nlu import config

    training_data = load_data("data/nlu.json")
    trainer = Trainer(config.load("data/nlu_model_config.json"))
    trainer.train(training_data)
    model_directory = trainer.persist("models/", project_name="ivr", fixed_model_name="demo")

    return model_directory 
Example #28
Source File: test_training_data.py    From rasa_nlu with Apache License 2.0 5 votes vote down vote up
def test_wit_data():
    td = training_data.load_data('data/examples/wit/demo-flights.json')
    assert len(td.entity_examples) == 4
    assert len(td.intent_examples) == 1
    assert len(td.training_examples) == 4
    assert td.entity_synonyms == {}
    assert td.intents == {"flight_booking"}
    assert td.entities == {"location", "datetime"} 
Example #29
Source File: test_training_data.py    From rasa_nlu with Apache License 2.0 5 votes vote down vote up
def test_lookup_table_json():
    lookup_fname = 'data/test/lookup_tables/plates.txt'
    td_lookup = training_data.load_data(
        'data/test/lookup_tables/lookup_table.json')
    assert td_lookup.lookup_tables[0]['name'] == 'plates'
    assert td_lookup.lookup_tables[0]['elements'] == lookup_fname
    assert td_lookup.lookup_tables[1]['name'] == 'drinks'
    assert td_lookup.lookup_tables[1]['elements'] == [
        'mojito', 'lemonade', 'sweet berry wine', 'tea', 'club mate'] 
Example #30
Source File: test_training_data.py    From rasa_nlu with Apache License 2.0 5 votes vote down vote up
def test_lookup_table_md():
    lookup_fname = 'data/test/lookup_tables/plates.txt'
    td_lookup = training_data.load_data(
        'data/test/lookup_tables/lookup_table.md')
    assert td_lookup.lookup_tables[0]['name'] == 'plates'
    assert td_lookup.lookup_tables[0]['elements'] == lookup_fname
    assert td_lookup.lookup_tables[1]['name'] == 'drinks'
    assert td_lookup.lookup_tables[1]['elements'] == [
        'mojito', 'lemonade', 'sweet berry wine', 'tea', 'club mate']