Question Answering Automated Dialog System
# test action_over_list_f
from random import randint
def gen_list_keys_for_tests():
'''This function is used for tests
'''
str_n = lambda x: f'{x}_{randint(1,10):1}'
gen_dict_list = lambda: {
'id': str_n('id'),
'key1': str_n('v1'),
'key2': str_n('v2'),
'key3': str_n('v3')
}
pipe_list = [gen_dict_list() for _ in range(randint(3, 10))]
rand_id = pipe_list[randint(0, len(pipe_list) - 1)]['id']
rand_key = f'key{randint(1, 3)}'
new_rand_val = str_n('new')
args = {
'chains': {
'pipe': [{
'id': rand_id
}, {
rand_key : new_rand_val
}]
}
}
return pipe_list, rand_id, rand_key, args, new_rand_val
def test_action_over_list_f():
pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests()
assert all(
new_rand_val not in pipe_elem.values() for pipe_elem in pipe_list
)
action_over_list_f(pipe_list, args['chains']['pipe'])
assert any(
rand_key in pipe_elem.keys() and
new_rand_val in pipe_elem.values() for pipe_elem in pipe_list
)
def test_replacement_f_list():
pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests()
mod_conf = {'chains': {'pipe': pipe_list}}
assert all(
new_rand_val not in pipe_elem.values()
for pipe_elem in mod_conf['chains']['pipe']
)
replacement_f(model_config=mod_conf, **args)
assert any(
rand_key in pipe_elem.keys() and
new_rand_val in pipe_elem.values()
for pipe_elem in mod_conf['chains']['pipe']
)
def test_replacement_f_val():
args = {'key3': 'newvalue'}
mod_conf = {'key1': 'val1', 'key2': 'val2', 'key3': 'val3'}
replacement_f(model_config=mod_conf, **args)
assert all(
arg_k in mod_conf.keys() and arg_v in mod_conf.values()
for arg_k, arg_v in args.items()
)
def test_replacement_f_dict():
args = {'1_key_3': {'2_key_2': 'newvalue'}}
mod_conf = {'1_key_3': {'2_key_2': 'oldvalue'}, '0_key_': '0_val'}
replacement_f(model_config=mod_conf, **args)
assert mod_conf['1_key_3']['2_key_2'] == 'newvalue'
test_action_over_list_f()
test_replacement_f_list()
test_replacement_f_val()
test_replacement_f_dict()
#test updates_faq_config_file
import tempfile
from shutil import copyfile
def gen_list_keys_for_tests():
str_n = lambda x: f'{x}_{randint(1,10):1}'
gen_dict_list = lambda: {
'id': str_n('id'),
'key1': str_n('v1'),
'key2': str_n('v2'),
'key3': str_n('v3')
}
pipe_list = [gen_dict_list() for _ in range(randint(3, 10))]
rand_id = pipe_list[randint(0, len(pipe_list) - 1)]['id']
rand_key = f'key{randint(1, 3)}'
new_rand_val = str_n('new')
pipe_dict = {'pipe': [{'id': rand_id}, {rand_key: new_rand_val}]}
args = {'chainer': pipe_dict}
return pipe_list, rand_id, rand_key, args, new_rand_val
def test_updates_faq_config_file_update_string():
with tempfile.TemporaryDirectory() as tmpdirname:
tmp_config_file = path.join(tmpdirname, 'tmp_file.json')
copyfile(configs.faq.tfidf_logreg_en_faq, tmp_config_file)
assert path.isfile(tmp_config_file)
updates_faq_config_file(
configs_path=tmp_config_file,
dataset_reader={'data_path': 'fictional_csv_file.csv'}
)
config_json = json.load(open(tmp_config_file))
assert 'data_path' in config_json['dataset_reader']
def test_updates_faq_config_file_update_list():
with tempfile.TemporaryDirectory() as tmpdirname:
tmp_config_file = path.join(tmpdirname, 'tmp_file.json')
pipe_list, rand_id, rand_key, args, new_rand_val = gen_list_keys_for_tests(
)
mod_conf = {
'chainer': {
'pipe': pipe_list
},
'dataset_reader': 'dataset_reader_dictionary'
}
json.dump(mod_conf, open(tmp_config_file, 'w'))
assert path.isfile(tmp_config_file)
updates_faq_config_file(configs_path=tmp_config_file, **args)
config_json = json.load(open(tmp_config_file))
assert any(
rand_key in pipe_elem.keys() and new_rand_val in pipe_elem.values()
for pipe_elem in config_json['chainer']['pipe']
)
test_updates_faq_config_file_update_string()
test_updates_faq_config_file_update_list()
#test faq responses
import tempfile
from shutil import copyfile
def gen_mock_csv_file(tmpdirname, faqs):
temp_faq_csv = path.join(tmpdirname, 'tmp_faq.csv')
pd.DataFrame(faqs).to_csv(temp_faq_csv, index=False)
return temp_faq_csv
def gen_mock_vocab_answers(tmpdirname, vocabs):
temp_dict_file = path.join(tmpdirname, 'temp_vocab_answers.dict')
vocabs_text = '\n'.join(
t + '\t' + str(f) for t, f in zip(vocabs['text'], vocabs['freq'])
)
f = open(temp_dict_file, 'w')
f.write(vocabs_text)
f.close()
return temp_dict_file
def gen_faq_config(tmpdirname, vocab_file, faq_file):
temp_configs_faq = path.join(tmpdirname, 'temp_config_faq.json')
copyfile(configs.faq.tfidf_logreg_en_faq, temp_configs_faq)
changes_dict = {'save_path': vocab_file, 'load_path': vocab_file}
id_dict = {'id': 'answers_vocab'}
updates_faq_config_file(
configs_path=temp_configs_faq,
chainer={'pipe': [id_dict, changes_dict]},
dataset_reader={'data_path': faq_file}
)
return temp_configs_faq
def test_faq_response_with_minimum_faqs_in_dataframe_fail_case():
with tempfile.TemporaryDirectory() as tmpdirname:
faqs = {
'Question': ['Is Covid erradicated?'],
'Answer': ['Definitely not!']
}
vocabs = {'text': ['This is a vocab example'], 'freq': [1]}
faq_file = gen_mock_csv_file(tmpdirname, faqs)
vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)
configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)
try:
select_faq_responses(
question='Is Enrique the prettiest person in town?',
faq_model=train_model(configs_file, download=True)
)
assert False
except ValueError as e:
assert True
def test_faq_response_with_minimum_faqs_in_dataframe_success_case():
with tempfile.TemporaryDirectory() as tmpdirname:
faqs = {
'Question': ['Is Covid erradicated?', 'Who is the current POTUS?'],
'Answer': ['Definitely not!', 'Donald Trump']
}
vocabs = {'text': ['This is a vocab example'], 'freq': [1]}
faq_file = gen_mock_csv_file(tmpdirname, faqs)
vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)
configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)
assert select_faq_responses(
question='Is Enrique the prettiest person in town?',
faq_model=train_model(configs_file, download=True)
) == ['Donald Trump']
def test_faq_response_with_minimum_answers_vocab_success_case():
with tempfile.TemporaryDirectory() as tmpdirname:
faqs = {
'Question': ['Is Covid erradicated?', 'Who is the current POTUS?'],
'Answer': ['Definitely not!', 'Donald Trump']
}
vocabs = {'text': [], 'freq': []}
faq_file = gen_mock_csv_file(tmpdirname, faqs)
vocab_file = gen_mock_vocab_answers(tmpdirname, vocabs)
configs_file = gen_faq_config(tmpdirname, vocab_file, faq_file)
select_faq_responses(
question='Is Enrique the prettiest person in town?',
faq_model=train_model(configs_file, download=True)
) == ['Donald Trump']
test_faq_response_with_minimum_faqs_in_dataframe_fail_case()
test_faq_response_with_minimum_faqs_in_dataframe_success_case()
test_faq_response_with_minimum_answers_vocab_success_case()
#test select_squad_responses
import tempfile
from shutil import copyfile
empty = {'topic': [], 'context': []}
spacex = {
'topic': ['SpaceX'],
'context':
[
'''Space Exploration Technologies Corp., trading as SpaceX, is an American aerospace manufacturer and space transportation
services company headquartered in Hawthorne, California. It was founded in 2002 by Elon Musk with the goal of reducing space
transportation costs to enable the colonization of Mars. SpaceX has developed several launch vehicles, the Starlink satellite
constellation, and the Dragon spacecraft. It is widely considered among the most successful private spaceflight companies.'''
]
}
intekglobal = {
'topic': ['Intekglobal', 'InG'],
'context':
[
'Intekglobal has its headquarters located in TJ',
'Intekglobal is in the north of mexico'
]
}
def assert_squad_model(
contexts, squad_model, question, expected_responses, **args
):
responses, top_responses = select_squad_responses(
contexts=pd.DataFrame(contexts),
squad_model=squad_model,
question=question,
**args
)
assert top_responses == expected_responses
def test_squad_bert():
bert = build_model(configs.squad.squad_bert, download=True)
assert_squad_model(
empty,
bert,
'Is an empty response expected?',
expected_responses=[],
best_results=2
)
assert_squad_model(
spacex, bert, 'Who founded SpaceX?', expected_responses=['Elon Musk']
)
assert_squad_model(
intekglobal,
bert,
'Where is Intekglobal located?',
expected_responses=['north of mexico','TJ'],
best_results=2
)
def test_squad_rnet():
bert = build_model(configs.squad.squad, download=True)
assert_squad_model(
empty,
bert,
'Is an empty response expected?',
expected_responses=[],
best_results=5
)
assert_squad_model(
spacex, bert, 'Who founded SpaceX?', expected_responses=['Elon Musk']
)
assert_squad_model(
intekglobal,
bert,
'Where is Intekglobal located?',
expected_responses=['north of mexico','TJ'],
best_results=2
)
test_squad_bert()
test_squad_rnet()
del spacex, empty, intekglobal
# test get_responses
import tempfile
from shutil import copyfile
intekglobal_context = {
'topic': ['Intekglobal', 'InG'],
'context':
[
'Intekglobal has its headquarters located in TJ',
'Intekglobal is in the north of mexico'
]
}
intekglobal_faqs = {
'Question': ['Is Intekglobal an IT company?', 'Where can I apply?'],
'Answer':
['Yes it is!', 'Please refer the our website for further information']
}
def mock_faq_files(tmpdirname, faqs):
faq_files = {
'data': path.join(tmpdirname, 'temp_faq.csv'),
'config': path.join(tmpdirname, 'temp_config_faq.json')
}
pd.DataFrame(faqs).to_csv(faq_files['data'], index=False)
copyfile(configs.faq.tfidf_logreg_en_faq, faq_files['config'])
updates_faq_config_file(
configs_path=faq_files['config'],
dataset_reader={'data_path': faq_files['data']}
)
return faq_files
def test_get_intekglobal_responses():
with tempfile.TemporaryDirectory() as tmpdirname:
faq_files = mock_faq_files(tmpdirname, intekglobal_faqs)
qa_models = load_qa_models(
config_tfidf=faq_files['config'], download=False
)
question, responses = get_responses(
pd.DataFrame(intekglobal_context),
'Where is Intekglobal?',
qa_models,
nb_squad_results=2
)
logging.debug(f' Question: {question}')
logging.debug(f" Responses: {responses}")
assert all(
response in ('north of mexico', 'TJ', 'Yes it is!')
for model_responses in responses['squad'].values()
for response in model_responses
)
def test_get_responses_with_empty_context():
with tempfile.TemporaryDirectory() as tmpdirname:
min_faqs = {
'Question':
['Minimum number of questions?', 'This is the other question?'],
'Answer': ['Two', 'yes']
}
faq_files = mock_faq_files(tmpdirname, min_faqs)
qa_models = load_qa_models(
config_tfidf=faq_files['config'], download=False
)
empty_context = {'topic': [], 'context': []}
question, responses = get_responses(
pd.DataFrame(empty_context),
'What is the minimun number of FAQ questions',
qa_models,
nb_squad_results=2
)
logging.debug(f' Question: {question}')
logging.debug(f' Responses: {responses}')
assert responses['faq']['tfidf'] == ['Two']
test_get_intekglobal_responses()
test_get_responses_with_empty_context()
del intekglobal_context
#test
def test_format_responses():
dict_responses = {
'sq': {
'1': ['sq_11', 'sq_12'],
'2': ['sq_21']
},
'fq': {
'3': ['fq_11'],
'4': ['fq_21', 'fq_22']
}
}
flatten_responses, formatted_response = format_responses(
dict_responses=dict_responses
)
expected_arr =[
'sq_11', 'sq_12', 'sq_21', 'fq_11', 'fq_21', 'fq_22'
]
assert flatten_responses == expected_arr
assert all(res in formatted_response for res in expected_arr)
test_format_responses()
##Test FAQ dialog system's part
import tempfile
from unittest.mock import patch
from shutil import copyfile
from collections import defaultdict
def mock_faq_files(tmpdirname, faqs, faq_dic):
faq_dic['path'] = path.join(tmpdirname, 'temp_faq.csv')
faq_dic['config'] = path.join(tmpdirname, 'temp_config_faq.json')
faq_dic['df'] = pd.DataFrame(faqs)
faq_dic['df'].to_csv(faq_dic['path'], index=False)
copyfile(configs.faq.tfidf_logreg_en_faq, faq_dic['config'])
updates_faq_config_file(
configs_path=faq_dic['config'],
dataset_reader={'data_path': faq_dic['path']}
)
def mock_context_file(tmpdirname, contexts, context_dic):
context_dic['path'] = path.join(tmpdirname, 'temp_context.csv')
context_dic['df'] = pd.DataFrame(contexts)
context_dic['df'].to_csv(context_dic['path'], index=False)
@patch('__main__.get_input')
def test_context_response_with_no_updates(mock_input):
mock_input.side_effect = ['Who is Enrique Jimenez?']
data = {'context': defaultdict(str), 'faq': defaultdict(str)}
contexts = {
'context':
[
'Intekglobal has its headquarters located in TJ',
'In Intekglobal we care about you',
'''Enrique Jimenez is one of the smartest minds on the planet,
he currently works as Intekglobal employee'''
],
'topic': ['headquarters', 'mission', 'Enrique\'s biography']
}
faqs = {
'Question':
['Minimum number of questions?', 'This is the other question?'],
'Answer': ['Two', 'yes']
}
with tempfile.TemporaryDirectory() as tmpdirname:
mock_faq_files(tmpdirname, faqs, data['faq'])
mock_context_file(tmpdirname, contexts, data['context'])
qa_models = load_qa_models(
config_tfidf=data['faq']['config'], download=False
)
question,responses = question_response(data, qa_models)
logging.debug(f' {question}')
logging.debug(f' {responses}')
assert 'Who is Enrique Jimenez?' == question
assert 'one of the smartest minds on the planet' in responses
test_context_response_with_no_updates()
#tests
@patch('__main__.get_input')
def test_new_question_answer(mock_input):
question = 'What is Intekglobal?'
new_answer = 'Intekglobal is one of the best companies in the world'
mock_input.side_effect = [question, new_answer]
data = {'context': defaultdict(str), 'faq': defaultdict(str)}
faqs = {
'Question': ['Who owns Tesla Company?', 'Is this is heaven?'],
'Answer': [
'Elon Musk is the owner of Tesla', 'No, it is life on earth'
]
}
with tempfile.TemporaryDirectory() as tmpdirname:
mock_faq_files(tmpdirname, faqs, data['faq'])
qa_models = load_qa_models(
config_tfidf=data['faq']['config'], download=False
)
new_question_answer(data, qa_models)
updated_faq = pd.read_csv(data['faq']['path'])
assert updated_faq[updated_faq['Answer'] == new_answer].shape[0] == 1
test_new_question_answer()
@patch('__main__.get_input')
def test_new_context(mock_input):
data = {'context': defaultdict(str), 'faq': defaultdict(str)}
new_topic = 'AI Tool & Chatbot Development'
new_context_str = '''
A chatbot is an important tool for simulating intelligent conversations with humans.
Intekglobal chatbots efficiently live message on platforms such as Facebook Messenger,
Slack, and Telegram. But chatbots are more than just a cool technology advancement.
'''
contexts = {
'context':
[
'''One of the greatest punk rock bands from all the time
is the Ramones.
'''
],
'topic': ['Ramones']
}
mock_input.side_effect = [new_topic, new_context_str]
with tempfile.TemporaryDirectory() as tmpdirname:
logging.debug(str(new_context))
mock_context_file(tmpdirname, contexts, data['context'])
new_context(data)
updated_faq = pd.read_csv(data['context']['path'])
assert updated_faq[updated_faq.topic == new_topic].shape[0] == 1
test_new_context()
#tests
import tempfile,logging
import pandas as pd
from collections import defaultdict
from shutil import rmtree
from os import path,popen
from unittest.mock import patch
def test_set_minimal_faqs_with_more_than_one_question():
with tempfile.TemporaryDirectory() as tmpdirname:
data_file = path.join(tmpdirname, 'tmp_data.csv')
questions = ['a?', 'b?']
answers = ['a', 'b']
df = pd.DataFrame({'Question': questions, 'Answer': answers})
df.to_csv(data_file, index=False)
data = {'df': df, 'path': data_file}
set_minimal_faq_questions(data)
assert data['df'].shape[0] == 2
def test_set_minimal_faqs_with_less_than_two_questions():
with tempfile.TemporaryDirectory() as tmpdirname:
data_file = path.join(tmpdirname, 'tmp_data.csv')
questions = ['a?']
answers = ['a']
df = pd.DataFrame({'Question': questions, 'Answer': answers})
df.to_csv(data_file, index=False)
data = {'df': df, 'path': data_file}
assert data['df'].shape[0] == 1
set_minimal_faq_questions(data)
assert data['df'].shape[0] == 3
assert any(
data['df'].Question == 'Is this the Intekglobal Dialog System?'
)
def test_set_minimal_contexts():
with tempfile.TemporaryDirectory() as tmpdirname:
data_file = path.join(tmpdirname, 'tmp_data.csv')
data = {'df': pd.DataFrame(), 'path': data_file}
set_minimal_contexts(data)
assert path.isfile(data['path'])
assert all(data['df'].columns == ['topic', 'context'])
def test_set_data_dict_no_file():
with tempfile.TemporaryDirectory() as tmpdirname:
data = {'context': defaultdict(str)}
set_data_dict(
file=None,
data=data['context'],
data_dir=tmpdirname,
question_type='context'
)
logging.debug(data)
assert path.isfile(data['context']['path'])
@patch('__main__.popen')
def test_load_and_prepare_data(mock_popen):
with tempfile.TemporaryDirectory() as tmpdirname:
mock_popen("$PWD").read().strip.side_effect = [tmpdirname]
data = {'context': defaultdict(str), 'faq': defaultdict(str)}
load_and_prepare_data(
context_data_file=None,
faq_data_file=None,
data=data,
configs_faq=None
)
data_dir = path.join(tmpdirname, 'data')
assert path.isdir(data_dir)
test_set_minimal_faqs_with_more_than_one_question()
test_set_minimal_faqs_with_less_than_two_questions()
test_set_minimal_contexts()
test_set_data_dict_no_file()
test_load_and_prepare_data()