Question Answering Automated Dialog System

change_log_config[source]

change_log_config()

Change Deeppavlov configuration files to ERROR mode

run_shell_installs[source]

run_shell_installs()

Run install commands

action_over_list_f[source]

action_over_list_f(arr, v)

v[0] and v[1] are dictionaries arr is array of dictionaries

replacement_f[source]

replacement_f(model_config, **args)

Replaces the model config dictionary with new values provided in **args

# test action_over_list_f
from random import randint


def gen_list_keys_for_tests():
    '''This function is used for tests
    '''

    str_n = lambda x: f'{x}_{randint(1,10):1}'
    gen_dict_list = lambda: {
        'id': str_n('id'),
        'key1': str_n('v1'),
        'key2': str_n('v2'),
        'key3': str_n('v3')
    }

    pipe_list = [gen_dict_list() for _ in range(randint(3, 10))]

    rand_id = pipe_list[randint(0, len(pipe_list) - 1)]['id']
    rand_key = f'key{randint(1, 3)}' 

    new_rand_val = str_n('new')
    args = {
        'chains': {
            'pipe': [{
                'id': rand_id
            }, {
               rand_key : new_rand_val
            }]
        }
    }

    return pipe_list, rand_id, rand_key, args, new_rand_val


def test_action_over_list_f():


    pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests()

    assert all(
        new_rand_val not in pipe_elem.values() for pipe_elem in pipe_list
    )

    action_over_list_f(pipe_list, args['chains']['pipe'])

    assert any(
        rand_key in pipe_elem.keys() and
        new_rand_val in pipe_elem.values() for pipe_elem in pipe_list
    )


def test_replacement_f_list():

    pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests()

    mod_conf = {'chains': {'pipe': pipe_list}}

    assert all(
        new_rand_val not in pipe_elem.values()
        for pipe_elem in mod_conf['chains']['pipe']
    )

    replacement_f(model_config=mod_conf, **args)
    assert any(
        rand_key in pipe_elem.keys() and
        new_rand_val in pipe_elem.values()
        for pipe_elem in mod_conf['chains']['pipe']
    )


def test_replacement_f_val():
    args = {'key3': 'newvalue'}
    mod_conf = {'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}
    replacement_f(model_config=mod_conf, **args)
    assert all(
        arg_k in mod_conf.keys() and arg_v in mod_conf.values()
        for arg_k, arg_v in args.items()
    )


def test_replacement_f_dict():
    args = {'1_key_3': {'2_key_2': 'newvalue'}}
    mod_conf = {'1_key_3': {'2_key_2': 'oldvalue'}, '0_key_': '0_val'}
    replacement_f(model_config=mod_conf, **args)
    assert mod_conf['1_key_3']['2_key_2'] == 'newvalue'


test_action_over_list_f()
test_replacement_f_list()
test_replacement_f_val()
test_replacement_f_dict()

updates_faq_config_file[source]

updates_faq_config_file(configs_path, **args)

Updates deepplavov json config file

#test updates_faq_config_file
import tempfile
from shutil import copyfile


def gen_list_keys_for_tests():

    str_n = lambda x: f'{x}_{randint(1,10):1}'
    gen_dict_list = lambda: {
        'id': str_n('id'),
        'key1': str_n('v1'),
        'key2': str_n('v2'),
        'key3': str_n('v3')
    }

    pipe_list = [gen_dict_list() for _ in range(randint(3, 10))]

    rand_id = pipe_list[randint(0, len(pipe_list) - 1)]['id']
    rand_key =  f'key{randint(1, 3)}' 

    new_rand_val = str_n('new')
    pipe_dict = {'pipe': [{'id': rand_id}, {rand_key: new_rand_val}]}
    args = {'chainer': pipe_dict}

    return pipe_list, rand_id, rand_key, args, new_rand_val


def test_updates_faq_config_file_update_string():

    with tempfile.TemporaryDirectory() as tmpdirname:

        tmp_config_file = path.join(tmpdirname, 'tmp_file.json')

        copyfile(configs.faq.tfidf_logreg_en_faq, tmp_config_file)

        assert path.isfile(tmp_config_file)

        updates_faq_config_file(
            configs_path=tmp_config_file,
            dataset_reader={'data_path': 'fictional_csv_file.csv'}
        )

        config_json = json.load(open(tmp_config_file))
        assert 'data_path' in config_json['dataset_reader']


def test_updates_faq_config_file_update_list():

    with tempfile.TemporaryDirectory() as tmpdirname:

        tmp_config_file = path.join(tmpdirname, 'tmp_file.json')

        pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests(
        )
        mod_conf = {
            'chainer': {
                'pipe': pipe_list
            },
            'dataset_reader': 'dataset_reader_dictionary'
        }

        json.dump(mod_conf, open(tmp_config_file, 'w'))

        assert path.isfile(tmp_config_file)

        updates_faq_config_file(configs_path=tmp_config_file, **args)

        config_json = json.load(open(tmp_config_file))
   
        assert any(
            rand_key in pipe_elem.keys() and new_rand_val in pipe_elem.values()
            for pipe_elem in config_json['chainer']['pipe']
        )


test_updates_faq_config_file_update_string()
test_updates_faq_config_file_update_list()

select_faq_responses[source]

select_faq_responses(faq_model, question)

Calls Deeppavlov FAQ model

#test faq responses
import tempfile
from shutil import copyfile


def gen_mock_csv_file(tmpdirname, faqs):

    temp_faq_csv = path.join(tmpdirname, 'tmp_faq.csv')

    pd.DataFrame(faqs).to_csv(temp_faq_csv, index=False)

    return temp_faq_csv


def gen_mock_vocab_answers(tmpdirname, vocabs):

    temp_dict_file = path.join(tmpdirname, 'temp_vocab_answers.dict')
    vocabs_text = '\n'.join(
        t + '\t' + str(f) for t, f in zip(vocabs['text'], vocabs['freq'])
    )

    f = open(temp_dict_file, 'w')
    f.write(vocabs_text)
    f.close()

    return temp_dict_file


def gen_faq_config(tmpdirname, vocab_file, faq_file):

    temp_configs_faq = path.join(tmpdirname, 'temp_config_faq.json')
    copyfile(configs.faq.tfidf_logreg_en_faq, temp_configs_faq)

    changes_dict = {'save_path': vocab_file, 'load_path': vocab_file}
    id_dict = {'id': 'answers_vocab'}

    updates_faq_config_file(
        configs_path=temp_configs_faq,
        chainer={'pipe': [id_dict, changes_dict]},
        dataset_reader={'data_path': faq_file}
    )

    return temp_configs_faq


def test_faq_response_with_minimum_faqs_in_dataframe_fail_case():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faqs = {
            'Question': ['Is Covid erradicated?'],
            'Answer': ['Definitely not!']
        }

        vocabs = {'text': ['This is a vocab example'], 'freq': [1]}

        faq_file = gen_mock_csv_file(tmpdirname, faqs)
        vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)

        configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)

        try:
            select_faq_responses(
                question='Is Enrique the prettiest person in town?',
                faq_model=train_model(configs_file, download=True)
            )
            assert False
        except ValueError as e:
            assert True


def test_faq_response_with_minimum_faqs_in_dataframe_success_case():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faqs = {
            'Question': ['Is Covid erradicated?', 'Who is the current POTUS?'],
            'Answer': ['Definitely not!', 'Donald Trump']
        }

        vocabs = {'text': ['This is a vocab example'], 'freq': [1]}

        faq_file = gen_mock_csv_file(tmpdirname, faqs)
        vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)

        configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)

        assert select_faq_responses(
            question='Is Enrique the prettiest person in town?',
            faq_model=train_model(configs_file, download=True)
        ) == ['Donald Trump']

        
        
def test_faq_response_with_minimum_answers_vocab_success_case():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faqs = {
            'Question': ['Is Covid erradicated?', 'Who is the current POTUS?'],
            'Answer': ['Definitely not!', 'Donald Trump']
        }

        vocabs = {'text': [], 'freq': []}

        faq_file = gen_mock_csv_file(tmpdirname, faqs)
        vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)

        configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)

        select_faq_responses(
            question='Is Enrique the prettiest person in town?',
            faq_model=train_model(configs_file, download=True)
        ) == ['Donald Trump']

test_faq_response_with_minimum_faqs_in_dataframe_fail_case()
test_faq_response_with_minimum_faqs_in_dataframe_success_case()
test_faq_response_with_minimum_answers_vocab_success_case()
09:56:59 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:56:59 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz.md5 HTTP/1.1" 200 189
09:56:59 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:00 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz HTTP/1.1" 200 12276
100%|██████████| 12.3k/12.3k [00:00<00:00, 7.71MB/s]
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package perluniprops to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package perluniprops is already up-to-date!
[nltk_data] Downloading package nonbreaking_prefixes to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package nonbreaking_prefixes is already up-to-date!
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
09:57:02 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:02 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz.md5 HTTP/1.1" 200 189
09:57:02 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:02 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz HTTP/1.1" 200 12276
100%|██████████| 12.3k/12.3k [00:00<00:00, 9.63MB/s]
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
09:57:07 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:07 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz.md5 HTTP/1.1" 200 189
09:57:07 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:08 DEBUG:http://files.deeppavlov.ai:80 "GET /faq/mipt/en_mipt_faq_v4.tar.gz HTTP/1.1" 200 12276
100%|██████████| 12.3k/12.3k [00:00<00:00, 6.32MB/s]
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)

select_squad_responses[source]

select_squad_responses(contexts, squad_model, question, best_results=1)

Calls Deeppavlov BERT and RNET Context Question Answering

#test select_squad_responses
import tempfile
from shutil import copyfile

empty = {'topic': [], 'context': []}
spacex = {
    'topic': ['SpaceX'],
    'context':
        [
            '''Space Exploration Technologies Corp., trading as SpaceX, is an American aerospace manufacturer and space transportation
services company headquartered in Hawthorne, California. It was founded in 2002 by Elon Musk with the goal of reducing space 
transportation costs to enable the colonization of Mars. SpaceX has developed several launch vehicles, the Starlink satellite
constellation, and the Dragon spacecraft. It is widely considered among the most successful private spaceflight companies.'''
        ]
}

intekglobal = {
    'topic': ['Intekglobal', 'InG'],
    'context':
        [
            'Intekglobal has its headquarters located in TJ',
            'Intekglobal is in the north of mexico'
        ]
}


def assert_squad_model(
    contexts, squad_model, question, expected_responses, **args
):
    responses, top_responses = select_squad_responses(
        contexts=pd.DataFrame(contexts),
        squad_model=squad_model,
        question=question,
        **args
    )
    assert top_responses == expected_responses


def test_squad_bert():

    bert = build_model(configs.squad.squad_bert, download=True)

    assert_squad_model(
        empty,
        bert,
        'Is an empty response expected?',
        expected_responses=[],
        best_results=2
    )

    assert_squad_model(
        spacex, bert, 'Who founded SpaceX?', expected_responses=['Elon Musk']
    )

    assert_squad_model(
        intekglobal,
        bert,
        'Where is Intekglobal located?',
        expected_responses=['north of mexico','TJ'],
        best_results=2
    )


def test_squad_rnet():

    bert = build_model(configs.squad.squad, download=True)

    assert_squad_model(
        empty,
        bert,
        'Is an empty response expected?',
        expected_responses=[],
        best_results=5
    )

    assert_squad_model(
        spacex, bert, 'Who founded SpaceX?', expected_responses=['Elon Musk']
    )

    assert_squad_model(
        intekglobal,
        bert,
        'Where is Intekglobal located?',
        expected_responses=['north of mexico','TJ'],
        best_results=2
    )

test_squad_bert()
test_squad_rnet()
del spacex, empty, intekglobal
09:57:11 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:11 DEBUG:http://files.deeppavlov.ai:80 "GET /deeppavlov_data/bert/cased_L-12_H-768_A-12.zip.md5 HTTP/1.1" 200 386
09:57:13 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:13 DEBUG:http://files.deeppavlov.ai:80 "GET /deeppavlov_data/squad_bert.tar.gz.md5 HTTP/1.1" 200 184
09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/bert_dp/tokenization.py:125: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:37: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:222: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:222: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:193: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/bert/bert_squad.py:81: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/bert/bert_squad.py:178: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/bert_dp/modeling.py:178: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/bert_dp/modeling.py:418: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.

09:57:16 WARNING:From /opt/conda/lib/python3.7/site-packages/bert_dp/modeling.py:499: The name tf.assert_less_equal is deprecated. Please use tf.compat.v1.assert_less_equal instead.

09:57:16 WARNING:
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

09:57:17 WARNING:From /opt/conda/lib/python3.7/site-packages/bert_dp/modeling.py:366: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
09:57:17 WARNING:From /opt/conda/lib/python3.7/site-packages/bert_dp/modeling.py:680: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.
09:57:17 WARNING:From /opt/conda/lib/python3.7/site-packages/tensorflow_core/python/layers/core.py:187: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.
09:57:17 WARNING:From /opt/conda/lib/python3.7/site-packages/bert_dp/modeling.py:283: The name tf.erf is deprecated. Please use tf.math.erf instead.

09:57:19 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/bert/bert_squad.py:154: The name tf.matrix_band_part is deprecated. Please use tf.linalg.band_part instead.

09:57:19 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/bert/bert_squad.py:166: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

09:57:19 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:234: The name tf.train.AdadeltaOptimizer is deprecated. Please use tf.compat.v1.train.AdadeltaOptimizer instead.

09:57:19 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:127: The name tf.get_collection is deprecated. Please use tf.compat.v1.get_collection instead.

09:57:19 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:127: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.

09:57:28 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/bert/bert_squad.py:89: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.

09:57:32 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/bert/bert_squad.py:94: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
09:57:32 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/models/tf_model.py:54: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.

09:57:32 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
09:57:34 DEBUG:Responses: []
09:57:34 DEBUG:Top Responses: []
09:57:36 DEBUG:Responses: [list([['Elon Musk'], [203], [50257280.0]])]
09:57:36 DEBUG:Top Responses: ['Elon Musk']
09:57:38 DEBUG:Responses: [list([['TJ'], [44], [6978.86279296875]])
 list([['north of mexico'], [22], [81567.328125]])]
09:57:38 DEBUG:Top Responses: ['north of mexico', 'TJ']
09:57:38 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:38 DEBUG:http://files.deeppavlov.ai:80 "GET /embeddings/wiki-news-300d-1M-char.vec.md5 HTTP/1.1" 200 61
09:57:38 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:39 DEBUG:http://files.deeppavlov.ai:80 "GET /embeddings/wiki-news-300d-1M.vec.md5 HTTP/1.1" 200 56
09:57:43 DEBUG:Starting new HTTP connection (1): files.deeppavlov.ai:80
09:57:43 DEBUG:http://files.deeppavlov.ai:80 "GET /deeppavlov_data/squad_model_1.4_cpu_compatible.tar.gz.md5 HTTP/1.1" 200 389
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/tensorflow_core/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:122: GRUCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/layers/tf_layers.py:595: MultiRNNCell.__init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/layers/tf_layers.py:600: dynamic_rnn (from tensorflow.python.ops.rnn) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/tensorflow_core/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:133: Layer.add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.add_weight` method instead.
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/tensorflow_core/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:139: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/tensorflow_core/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py:155: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/core/layers/tf_layers.py:812: calling reverse_sequence (from tensorflow.python.ops.array_ops) with seq_dim is deprecated and will be removed in a future version.
Instructions for updating:
seq_dim is deprecated, use seq_axis instead
09:57:45 WARNING:From /opt/conda/lib/python3.7/site-packages/tensorflow_core/python/util/deprecation.py:507: calling reverse_sequence (from tensorflow.python.ops.array_ops) with batch_dim is deprecated and will be removed in a future version.
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
09:57:46 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/squad/utils.py:101: The name tf.AUTO_REUSE is deprecated. Please use tf.compat.v1.AUTO_REUSE instead.

09:57:48 WARNING:From /opt/conda/lib/python3.7/site-packages/deeppavlov/models/squad/utils.py:139: The name tf.get_variable_scope is deprecated. Please use tf.compat.v1.get_variable_scope instead.

09:57:59 WARNING:From /opt/conda/lib/python3.7/site-packages/tensorflow_core/python/ops/clip_ops.py:172: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
09:58:07 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
09:58:09 DEBUG:Responses: []
09:58:09 DEBUG:Top Responses: []
09:58:11 DEBUG:Responses: [list([['Elon Musk'], [203], [36056324.0]])]
09:58:11 DEBUG:Top Responses: ['Elon Musk']
09:58:11 DEBUG:Responses: [list([['TJ'], [44], [564.4996948242188]])
 list([['north of mexico'], [22], [138151.90625]])]
09:58:11 DEBUG:Top Responses: ['north of mexico', 'TJ']

load_qa_models[source]

load_qa_models(config_rnet=PosixPath('/opt/conda/lib/python3.7/site-packages/deeppavlov/configs/squad/squad.json'), config_bert=PosixPath('/opt/conda/lib/python3.7/site-packages/deeppavlov/configs/squad/squad_bert.json'), config_tfidf=PosixPath('/opt/conda/lib/python3.7/site-packages/deeppavlov/configs/faq/tfidf_logreg_en_faq.json'), download=True)

Load the squad and faq models INPUT:

  • config_rnet -> path to json config file
  • config_bert -> path to json config file
  • config_tfidf -> path to json config file
  • download -> download files (True/False)

The default for the config files are the deeppavlov config files. The default download is True.

get_responses[source]

get_responses(contexts, question, qa_models, nb_squad_results=1)

Generates responses from a question

INPUT:

  • question -> question string

  • contexts -> list of contexts

  • qa_models -> dictionary of available models (see load_qa_models)
# test get_responses
import tempfile
from shutil import copyfile

intekglobal_context = {
    'topic': ['Intekglobal', 'InG'],
    'context':
        [
            'Intekglobal has its headquarters located in TJ',
            'Intekglobal is in the north of mexico'
        ]
}

intekglobal_faqs = {
    'Question': ['Is Intekglobal an IT company?', 'Where can I apply?'],
    'Answer':
        ['Yes it is!', 'Please refer the our website for further information']
}


def mock_faq_files(tmpdirname, faqs):

    faq_files = {
        'data': path.join(tmpdirname, 'temp_faq.csv'),
        'config': path.join(tmpdirname, 'temp_config_faq.json')
    }

    pd.DataFrame(faqs).to_csv(faq_files['data'], index=False)
    copyfile(configs.faq.tfidf_logreg_en_faq, faq_files['config'])

    updates_faq_config_file(
        configs_path=faq_files['config'],
        dataset_reader={'data_path': faq_files['data']}
    )

    return faq_files


def test_get_intekglobal_responses():
    with tempfile.TemporaryDirectory() as tmpdirname:

        faq_files = mock_faq_files(tmpdirname, intekglobal_faqs)
        qa_models = load_qa_models(
            config_tfidf=faq_files['config'], download=False
        )

        question, responses = get_responses(
            pd.DataFrame(intekglobal_context),
            'Where is Intekglobal?',
            qa_models,
            nb_squad_results=2
        )

        logging.debug(f' Question: {question}')
        logging.debug(f" Responses: {responses}")
        assert all(
            response in ('north of mexico', 'TJ', 'Yes it is!')
            for model_responses in responses['squad'].values()
            for response in model_responses
        )


def test_get_responses_with_empty_context():
    with tempfile.TemporaryDirectory() as tmpdirname:
        min_faqs = {
            'Question':
                ['Minimum number of questions?', 'This is the other question?'],
            'Answer': ['Two', 'yes']
        }
        faq_files = mock_faq_files(tmpdirname, min_faqs)

        qa_models = load_qa_models(
            config_tfidf=faq_files['config'], download=False
        )
        empty_context = {'topic': [], 'context': []}

        question, responses = get_responses(
            pd.DataFrame(empty_context),
            'What is the minimun number of FAQ questions',
            qa_models,
            nb_squad_results=2
        )

        logging.debug(f' Question: {question}')
        logging.debug(f' Responses: {responses}')
        assert responses['faq']['tfidf'] == ['Two']


test_get_intekglobal_responses()
test_get_responses_with_empty_context()

del intekglobal_context
09:58:39 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
09:59:08 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
09:59:28 DEBUG:Responses: [list([['TJ'], [44], [120.95974731445312]])
 list([['north of mexico'], [22], [174602.40625]])]
09:59:28 DEBUG:Top Responses: ['north of mexico', 'TJ']
09:59:31 DEBUG:Responses: [list([['TJ'], [44], [22507.34375]])
 list([['north of mexico'], [22], [269778.3125]])]
09:59:31 DEBUG:Top Responses: ['north of mexico', 'TJ']
09:59:31 DEBUG: Question: Where is Intekglobal?
09:59:31 DEBUG: Responses: {'squad': defaultdict(<class 'list'>, {'rnet': ['north of mexico', 'TJ'], 'bert': ['north of mexico', 'TJ']}), 'faq': defaultdict(<class 'list'>, {'tfidf': ['Yes it is!']})}
09:59:53 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
10:00:21 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
10:00:27 DEBUG:Responses: []
10:00:27 DEBUG:Top Responses: []
10:00:27 DEBUG:Responses: []
10:00:27 DEBUG:Top Responses: []
10:00:27 DEBUG: Question: What is the minimun number of FAQ questions
10:00:27 DEBUG: Responses: {'squad': defaultdict(<class 'list'>, {'rnet': [], 'bert': []}), 'faq': defaultdict(<class 'list'>, {'tfidf': ['Two']})}

format_responses[source]

format_responses(dict_responses)

Format question-response pair

INPUT:

  • dictionary of responses

OUTPUT:

  • list of flattened responses

  • response as string

#test
def test_format_responses():
    dict_responses = {
        'sq': {
            '1': ['sq_11', 'sq_12'],
            '2': ['sq_21']
        },
        'fq': {
            '3': ['fq_11'],
            '4': ['fq_21', 'fq_22']
        }
    }
    flatten_responses, formatted_response = format_responses(
        dict_responses=dict_responses
    )
    expected_arr =[
        'sq_11', 'sq_12', 'sq_21', 'fq_11', 'fq_21', 'fq_22'
    ]
    assert flatten_responses == expected_arr
    assert all(res in formatted_response for res in expected_arr)
test_format_responses()
10:00:28 DEBUG:['sq_11', 'sq_12', 'sq_21', 'fq_11', 'fq_21', 'fq_22']
10:00:28 DEBUG:
 Answers:

1: fq_21
2: sq_21
3: fq_11
4: sq_11
5: sq_12
6: fq_22

get_input[source]

get_input(text)

This redundancy is needed for testing

question_response[source]

question_response(data, qa_models, num_returned_values_per_squad_model=1)

Receive response and call get_response()

##Test FAQ dialog system's part
import tempfile
from unittest.mock import patch
from shutil import copyfile
from collections import defaultdict

def mock_faq_files(tmpdirname, faqs, faq_dic):

    faq_dic['path'] = path.join(tmpdirname, 'temp_faq.csv')
    faq_dic['config'] = path.join(tmpdirname, 'temp_config_faq.json')
    faq_dic['df'] = pd.DataFrame(faqs)
    faq_dic['df'].to_csv(faq_dic['path'], index=False)

    copyfile(configs.faq.tfidf_logreg_en_faq, faq_dic['config'])

    updates_faq_config_file(
        configs_path=faq_dic['config'],
        dataset_reader={'data_path': faq_dic['path']}
    )


def mock_context_file(tmpdirname, contexts, context_dic):

    context_dic['path'] = path.join(tmpdirname, 'temp_context.csv')
    context_dic['df'] = pd.DataFrame(contexts)
    context_dic['df'].to_csv(context_dic['path'], index=False)


@patch('__main__.get_input')
def test_context_response_with_no_updates(mock_input):
    mock_input.side_effect = ['Who is Enrique Jimenez?']
    data = {'context': defaultdict(str), 'faq': defaultdict(str)}
    contexts = {
        'context':
            [
                'Intekglobal has its headquarters located in TJ',
                'In Intekglobal we care about you',
                '''Enrique Jimenez is one of the smartest minds on the planet, 
                   he currently works as Intekglobal employee'''
            ],
        'topic': ['headquarters', 'mission', 'Enrique\'s biography']
    }

    faqs = {
        'Question':
            ['Minimum number of questions?', 'This is the other question?'],
        'Answer': ['Two', 'yes']
    }

    with tempfile.TemporaryDirectory() as tmpdirname:

        mock_faq_files(tmpdirname, faqs, data['faq'])
        mock_context_file(tmpdirname, contexts, data['context'])

        qa_models = load_qa_models(
            config_tfidf=data['faq']['config'], download=False
        )

        question,responses = question_response(data, qa_models)
        logging.debug(f'  {question}')
        logging.debug(f'  {responses}')
        assert 'Who is Enrique Jimenez?' == question
        assert 'one of the smartest minds on the planet' in responses

test_context_response_with_no_updates()
10:00:46 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
10:01:02 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
10:01:09 DEBUG:Responses: [list([['Intekglobal'], [0], [160.03579711914062]])
 list([['Intekglobal'], [3], [244.86245727539062]])
 list([['one of the smartest minds on the planet'], [19], [1664652.0]])]
10:01:09 DEBUG:Top Responses: ['one of the smartest minds on the planet']
10:01:13 DEBUG:Responses: [list([[''], [-1], [0.0011892060283571482]])
 list([[''], [-1], [0.01691678911447525]])
 list([['one of the smartest minds on the planet, \n                   he currently works as Intekglobal employee'], [19], [18812.87109375]])]
10:01:13 DEBUG:Top Responses: ['one of the smartest minds on the planet, \n                   he currently works as Intekglobal employee']
10:01:13 DEBUG:['one of the smartest minds on the planet', 'one of the smartest minds on the planet, \n                   he currently works as Intekglobal employee', 'yes']
10:01:13 DEBUG:
 Answers:

1: one of the smartest minds on the planet, 
                   he currently works as Intekglobal employee
2: one of the smartest minds on the planet
3: yes

10:01:13 DEBUG:  Who is Enrique Jimenez?
10:01:13 DEBUG:  
 Answers:

1: one of the smartest minds on the planet, 
                   he currently works as Intekglobal employee
2: one of the smartest minds on the planet
3: yes

new_question_answer[source]

new_question_answer(data, qa_models)

Asks for a new question-answer pair; store the result in the faq dataframe and retrain the faq-model

INPUT:

  • dictionary of data
  • dictionary of question-answer models OUTPUT:

  • None: Updates the dictionaries of data and models

#tests


@patch('__main__.get_input')
def test_new_question_answer(mock_input):
    question = 'What is Intekglobal?'
    new_answer = 'Intekglobal is one of the best companies in the world'
    mock_input.side_effect = [question, new_answer]

    data = {'context': defaultdict(str), 'faq': defaultdict(str)}

    faqs = {
        'Question': ['Who  owns Tesla Company?', 'Is this is heaven?'],
        'Answer': [
            'Elon Musk is the owner of Tesla', 'No, it is life on earth'
        ]
    }

    with tempfile.TemporaryDirectory() as tmpdirname:

        mock_faq_files(tmpdirname, faqs, data['faq'])
        qa_models = load_qa_models(
            config_tfidf=data['faq']['config'], download=False
        )
        new_question_answer(data, qa_models)
        updated_faq = pd.read_csv(data['faq']['path'])

        assert updated_faq[updated_faq['Answer'] == new_answer].shape[0] == 1


test_new_question_answer()
10:01:41 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_model/model
10:02:09 INFO:Restoring parameters from /home/jovyan/.deeppavlov/models/squad_bert/model
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
/opt/conda/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.
  "this warning.", FutureWarning)
10:02:20 INFO:FAQ dataset and model updated..

new_context[source]

new_context(data)

Stores the new context in the context dataframe

INPUT:

  • dictionary of data OUTPUT:

  • None: Updates the dictionary of data

@patch('__main__.get_input')
def test_new_context(mock_input):
    data = {'context': defaultdict(str), 'faq': defaultdict(str)}

    new_topic = 'AI Tool & Chatbot Development'
    new_context_str = '''

A chatbot is an important tool for simulating intelligent conversations with humans.
Intekglobal chatbots efficiently live message on platforms such as Facebook Messenger, 
Slack, and Telegram. But chatbots are more than just a cool technology advancement.

'''
    contexts = {
        'context':
            [
                '''One of the greatest punk rock bands from all the time
                is the Ramones.
                '''
            ],
        'topic': ['Ramones']
    }
    
    mock_input.side_effect = [new_topic, new_context_str]
    with tempfile.TemporaryDirectory() as tmpdirname:

        
        logging.debug(str(new_context))
        mock_context_file(tmpdirname, contexts, data['context'])
        new_context(data)
        updated_faq = pd.read_csv(data['context']['path'])

        assert updated_faq[updated_faq.topic == new_topic].shape[0] == 1


test_new_context()
10:02:20 DEBUG:<function new_context at 0x7f60e148c170>
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py:7138: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  sort=sort,
10:02:20 INFO:contexts dataset updated..

set_minimal_faq_questions[source]

set_minimal_faq_questions(data)

Sets the faq configurations that assure a proper operation.

If inexistent, a non-empty dataframe for faq is created with 'Question' and 'Answer as columns'

set_minimal_contexts[source]

set_minimal_contexts(data)

Sets the context configurations that assure a proper operation.

If inexistent, a empy dataframe is created with 'topic' and 'context' as columns

set_data_dict[source]

set_data_dict(file, data, question_type, data_dir)

Creates unexistent files

load_and_prepare_data[source]

load_and_prepare_data(context_data_file, faq_data_file, data, configs_faq)

Calls the context and faq configuration routines.

If dataframe files missing, it will create them in data directory.

If the data frames are provided they must have the following columns for proper functioning:

  • context: 'topic', 'context'
  • faq: 'Question, 'Answer'
#tests
import tempfile,logging
import pandas as pd
from collections import defaultdict
from shutil import rmtree
from os import path,popen
from unittest.mock import patch


def test_set_minimal_faqs_with_more_than_one_question():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data_file = path.join(tmpdirname, 'tmp_data.csv')
        questions = ['a?', 'b?']
        answers = ['a', 'b']
        df = pd.DataFrame({'Question': questions, 'Answer': answers})
        df.to_csv(data_file, index=False)
        data = {'df': df, 'path': data_file}
        set_minimal_faq_questions(data)

        assert data['df'].shape[0] == 2


def test_set_minimal_faqs_with_less_than_two_questions():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data_file = path.join(tmpdirname, 'tmp_data.csv')
        questions = ['a?']
        answers = ['a']
        df = pd.DataFrame({'Question': questions, 'Answer': answers})
        df.to_csv(data_file, index=False)
        data = {'df': df, 'path': data_file}

        assert data['df'].shape[0] == 1

        set_minimal_faq_questions(data)

        assert data['df'].shape[0] == 3
        assert any(
            data['df'].Question == 'Is this the Intekglobal Dialog System?'
        )


def test_set_minimal_contexts():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data_file = path.join(tmpdirname, 'tmp_data.csv')
        data = {'df': pd.DataFrame(), 'path': data_file}
        set_minimal_contexts(data)
        assert path.isfile(data['path'])
        assert all(data['df'].columns == ['topic', 'context'])


def test_set_data_dict_no_file():
    with tempfile.TemporaryDirectory() as tmpdirname:
        data = {'context': defaultdict(str)}
        set_data_dict(
            file=None,
            data=data['context'],
            data_dir=tmpdirname,
            question_type='context'
        )
        logging.debug(data)
        assert path.isfile(data['context']['path'])


@patch('__main__.popen')
def test_load_and_prepare_data(mock_popen):
    with tempfile.TemporaryDirectory() as tmpdirname:
        mock_popen("$PWD").read().strip.side_effect = [tmpdirname]
        data = {'context': defaultdict(str), 'faq': defaultdict(str)}

        load_and_prepare_data(
            context_data_file=None,
            faq_data_file=None,
            data=data,
            configs_faq=None
        )
        data_dir = path.join(tmpdirname, 'data')
        assert path.isdir(data_dir)


test_set_minimal_faqs_with_more_than_one_question()
test_set_minimal_faqs_with_less_than_two_questions()
test_set_minimal_contexts()
test_set_data_dict_no_file()
test_load_and_prepare_data()
10:02:20 INFO: File created at /tmp/tmptuu9vt86/tmp_data.csv
10:02:20 INFO: File created at /tmp/tmphqp6ty6p/tmp_data.csv
10:02:20 INFO: File created at /tmp/tmpaj9wgtv5/context_data.csv
10:02:20 DEBUG:{'context': defaultdict(<class 'str'>, {'path': '/tmp/tmpaj9wgtv5/context_data.csv', 'df': Empty DataFrame
Columns: [topic, context]
Index: []})}
10:02:20 INFO:Data directory created at /tmp/tmp8srz9dtl/data
10:02:20 INFO: File created at /tmp/tmp8srz9dtl/data/faq_data.csv
10:02:20 INFO: File created at /tmp/tmp8srz9dtl/data/context_data.csv