Source code for sugaroid.brain.news

"""
News headlines provided by NewsAPI
"""
import logging
import os

from chatterbot.logic import LogicAdapter
from newsapi import NewsApiClient
from dotenv import load_dotenv
from sugaroid.brain.ooo import Emotion
from sugaroid.brain.preprocessors import spac_token

from sugaroid.core.statement import SugaroidStatement

NEWSAPI_COUNTRIES = {
    "argentina": "ar",
    "australia": "au",
    "austria": "at",
    "belgium": "be",
    "brazil": "br",
    "bulgaria": "bg",
    "canada": "ca",
    "china": "cn",
    "colombia": "co",
    "cuba": "cu",
    "czech republic": "cz",
    "egypt": "eg",
    "france": "fr",
    "germany": "de",
    "greece": "gr",
    "hong kong": "hk",
    "hungary": "hu",
    "india": "in",
    "indonesia": "id",
    "ireland": "ie",
    "israel": "il",
    "italy": "it",
    "japan": "jp",
    "latvia": "lv",
    "lithuania": "lt",
    "malaysia": "my",
    "mexico": "mx",
    "morocco": "ma",
    "netherlands": "nl",
    "new zealand": "nz",
    "nigeria": "ng",
    "norway": "no",
    "philippines": "ph",
    "poland": "pl",
    "portugal": "pt",
    "romania": "ro",
    "russia": "ru",
    "saudi arabia": "sa",
    "serbia": "rs",
    "singapore": "sg",
    "slovakia": "sk",
    "slovenia": "si",
    "south africa": "za",
    "south korea": "kr",
    "sweden": "se",
    "switzerland": "ch",
    "taiwan": "tw",
    "thailand": "th",
    "turkey": "tr",
    "uae": "ae",
    "ukraine": "ua",
    "united kingdom": "gb",
    "united states": "us",
    "venuzuela": "ve",
    "us": "us",
    "uk": "uk",
    "u.k.": "uk",
    "u.s.": "us",
    "u.s": "us",
    "u.k": "uk",
    "u s": "us",
    "u k": "uk",
    "the united states": "us",
    "the united kingdom": "uk",
}


[docs]class SugaroidNews: """ A dedicated class for processing news from the API """ def __init__(self, chatbot=None): load_dotenv() self.chatbot = chatbot token = os.getenv("NEWSAPI_TOKEN") self.api = NewsApiClient(api_key=token)
[docs] def get_top_headlines(self, country=None): """ Get the top news items of the country if provided. Otherwise give international news :param country: :return: """ try: if country is None: response = self.api.get_top_headlines() else: response = self.api.get_top_headlines(country=country) return self.process(response) except Exception as e: if str(e) == "invalid country": self.chatbot.globals["last_news"] = False logging.info( f"SugaroidNews: Failed to get the news. country=[{type(country)}, {country}]" ) return [ "The country you provided does not exist in my database. I am sorry." ] self.chatbot.globals["last_news"] = False return [ "Failed to establish connection with the server. Error: {}".format(e) ]
[docs] def get_news_keyword(self, keyword): try: response = self.api.get_everything(q=keyword) return self.process(response) except Exception as e: self.chatbot.globals["last_news"] = False return [ "Failed to establish connection with the server. Error: {}".format(e) ]
[docs] def process(self, response): """ Convert the raw JSON Response to Human Readable News bullets :param response: :return: array of news headlines """ news_headlines = [] for i in response["articles"]: try: news_headlines.append( "{} from {}".format(i["title"], i["source"]["name"]) ) except Exception as e: pass if not news_headlines: self.chatbot.globals["last_news"] = False return [ "There was an error collecting the latest news headlines. Please try again later." ] self.chatbot.globals["last_news"] = response return news_headlines
[docs]class NewsAdapter(LogicAdapter): """ Ports the SugaroidNews Wrapper for easier access by the SugaroidChatbot Class """ def __init__(self, chatbot, **kwargs): super().__init__(chatbot, **kwargs) self.cos = None self.integer = False
[docs] def can_process(self, statement): """ Checks if the statement has a cosine similarity of any of the words mentioned here :param statement: SugaroidStatement Class Object :return: bool, """ _ = self.chatbot.lp.similarity try: last_type = self.chatbot.globals["history"]["types"][-1] except IndexError: last_type = False logging.info("NewsAdapter: can_process() last_adapter was {}".format(last_type)) processed = str(statement).replace("?", "").replace(".", "") self.cos = max( [ _(processed, "Whats happening"), _(processed, "Get me the latest headlines"), _(processed, "Show me today's news"), _(processed, "Can you tell me todays news"), _(processed, "Can you show me today's headlines"), _(processed, "What is happening today"), _(processed, "What top stories running right now"), ] ) logging.info("NewsAdapter received a cosine similarity of {}".format(self.cos)) if ( self.cos > 0.8 or "news" in processed.lower() or "headlines" in processed.lower() ): return True elif ( "show me more" in str(statement).lower() and self.chatbot.globals["last_news"] ): for i in spac_token(statement, self.chatbot): if i.tag_ in ["LS", "CD"]: self.integer = i.lower_ return True else: return False elif self.chatbot.globals["last_news"] and (last_type == "NewsAdapter"): for i in spac_token(statement, self.chatbot): if i.tag_ in ["LS", "CD"]: self.integer = i.lower_ return True else: return False else: return False
[docs] def process(self, statement, additional_response_selection_parameters=None): confidence = self.cos country = None keyword = False # Override the other answer when asked for normalize = spac_token(statement, self.chatbot) if len(normalize) <= 2: confidence = confidence + 0.5 if self.integer and self.chatbot.last_news: response = "Your selected integer {}, response {}".format( self.integer, self.chatbot.last_news ) try: self.integer = int(self.integer) except ValueError: response = ( "Sorry, I couldn't make out what number it actually is. I might understand it better if " "it is in a numerical form? " ) if isinstance(self.integer, int): try: data = self.chatbot.last_news["articles"][self.integer - 1] provider = data["source"]["name"] author = data["author"] last_updated = data["publishedAt"] title = data["title"] description = data["description"] url = data["url"] img_url = data["urlToImage"] response = ( f"From {provider}\nWritten by {author} on {last_updated}\n\n{title}\n\n{description}" f"\nRead more at {url}\n{img_url} " ) except IndexError: response = ( "I am sorry. I couldn't find the item you actually asked for." ) else: for ent in normalize.ents: if ent.label_ == "GPE": country = ent elif ent.label_ in [ "PERSON", "NORP", "FAC", "ORG", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LAW", ]: keyword = ent logging.info( "NewsAdapter found as Natural Entity Recognizer words, country : {}, keyword: {}".format( country, keyword ) ) for i in range(len(normalize) - 1): if normalize[i].lower_ == "about" or normalize[i].lower_ == "on": logging.info("NewsAdapter: found determiner about or on") if not keyword: keyword_words = [] for j in range(i + 1, len(normalize)): if normalize[j].pos_ == "DET": continue else: keyword_words.append(normalize[j].text) keyword = " ".join(keyword_words) sg_news = SugaroidNews(self.chatbot) if keyword: logging.info("NewsAdapter: Getting news of keyword {}".format(keyword)) news = sg_news.get_news_keyword(str(keyword)) else: logging.info("NewsAdapter: Getting news of country {}".format(country)) news = sg_news.get_top_headlines( country=NEWSAPI_COUNTRIES.get(str(country).lower(), None) ) def bracketize(x): return "\n ❇️ [{}] {}".format(x[0] + 1, str(x[1])) response = " ".join([bracketize(x) for x in enumerate(news)]) selected_statement = SugaroidStatement(response, chatbot=True) selected_statement.confidence = confidence emotion = Emotion.neutral selected_statement.adapter = "NewsAdapter" selected_statement.emotion = emotion return selected_statement