import logging
from random import randint
from nltk.corpus import stopwords
import nltk
from sugaroid.brain.utils import LanguageProcessor
[docs]def sigmaSimilarity(src, dest):
"""
:param src: a list of literals
:param dest: a list of final literals
:return: Probability
"""
total = len(src)
sum = 0
for i in src:
for j in dest:
if i == j:
sum += 1
return sum / total
[docs]def difference(lst1, lst2):
lst3 = [value for value in lst1 if value not in lst2]
return lst3
[docs]def reverse(token):
"""
Reverses the first person pronouns to second person pronouns and vice versa
:param token: a nltk.word_tokenize type list
:return: a list similar to nltk.word_tokenize
"""
processed = []
has_am = "am" in token
has_is = "are" in token
logging.info("Reverse: Received {}".format(token))
interrogation = False
for i in token:
lps = LanguageProcessor().tokenize(i)[0]
if lps.tag_ == "." and lps.lower_ == "?":
interrogation = True
elif str(lps.tag_).startswith("W"):
interrogation = True
for i in token:
tagged = nltk.pos_tag([i])
if tagged[0][1] == "PRP":
if i == "you":
if interrogation:
processed.append("I")
else:
processed = processed[:-1] + ["I"] + processed[-1:]
elif i.lower() == "i":
processed.append("you")
elif tagged[0][1] == "VBP":
if i == "are":
if "I" in processed:
processed.append("am")
else:
processed.append("are")
elif i == "am":
if "I" in processed:
processed.append("am")
else:
processed.append("are")
else:
processed.append("are")
else:
processed.append(i)
for j in range(0, len(processed) - 2):
if processed[j] == "I" and processed[j + 1] == "are":
processed[j + 1] = "am"
elif processed[j] == "you" and processed[j + 1] == "am":
processed[j + 1] = "are"
else:
continue
for j in range(0, len(processed) - 2):
if processed[j] == "I" and processed[j + 1] == "are":
processed[j] = "you"
elif processed[j] == "you" and processed[j + 1] == "am":
processed[j] = "I"
else:
continue
for j in range(0, len(processed) - 2):
if processed[j].lower() == "are" and processed[j + 1] == "I":
processed[j] = "am"
elif processed[j] == "am" and processed[j + 1] == "you":
processed[j] = "are"
else:
continue
logging.info("Reverse: Pushing {}".format(processed))
return processed
[docs]def random_response(iterable=()):
"""
Selects a random response from the given set of iterable types
:param iterable:
:return: a selected value of the iterable
"""
return iterable[randint(0, len(iterable) - 1)]
[docs]def cosine_similarity(X_list, Y_list):
# Program to measure similarity between
# two sentences using cosine similarity.
# sw contains the list of stopwords
sw = stopwords.words("english")
l1 = []
l2 = []
# remove stop words from string
X_set = {w for w in X_list if w not in sw}
Y_set = {w for w in Y_list if w not in sw}
# form a set containing keywords of both strings
rvector = X_set.union(Y_set)
for w in rvector:
if w in X_set:
l1.append(1) # create a vector
else:
l1.append(0)
if w in Y_set:
l2.append(1)
else:
l2.append(0)
c = 0
# cosine formula
for i in range(len(rvector)):
c += l1[i] * l2[i]
cosine = c / float((sum(l1) * sum(l2)) ** 0.5)
return cosine
[docs]def any_in(arg1: list, string1: list):
"""
Advanced in operator
Checks any of the list item exists in in the string
:param arg1:
:param string1:
:return:
"""
for i in arg1:
if i in string1:
return True
else:
return False
[docs]def raw_in(arg, spacy_tokenized):
for i in spacy_tokenized:
if i.text == arg:
return True
else:
return False
[docs]def raw_lower_in(arg, spacy_tokenized):
for i in spacy_tokenized:
if i.lower_ == arg:
return True
else:
return False
[docs]def lemma_in(arg, spacy_tokenized):
for i in spacy_tokenized:
if i.lemma_ == arg:
return True
else:
return False
[docs]def pos_in(arg, spacy_tokenized):
for i in spacy_tokenized:
if i.pos_ == arg:
return True
else:
return False
[docs]def text2int(textnum, numwords={}):
"""
Converts words to numbers
from https://stackoverflow.com/questions/493174/is-there-a-way-to-convert-number-words-to-integers
:param textnum:
:param numwords:
:return:
"""
if not numwords:
units = [
"zero",
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"ten",
"eleven",
"twelve",
"thirteen",
"fourteen",
"fifteen",
"sixteen",
"seventeen",
"eighteen",
"nineteen",
]
tens = [
"",
"",
"twenty",
"thirty",
"forty",
"fifty",
"sixty",
"seventy",
"eighty",
"ninety",
]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(units):
numwords[word] = (1, idx)
for idx, word in enumerate(tens):
numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales):
numwords[word] = (10 ** (idx * 3 or 2), 0)
current = result = 0
for word in textnum.split():
if word not in numwords:
raise Exception("Illegal word: " + word)
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
return result + current