2024-02-08 12:52:28 +01:00
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from typing import Union
|
|
|
|
|
|
|
|
class DuckDuckGoGateway:
|
2024-02-09 08:39:07 +01:00
|
|
|
"""Gateway to DuckDuckGo search engine"""
|
2024-02-08 12:52:28 +01:00
|
|
|
url = "https://duckduckgo.com/html/"
|
|
|
|
headers = {
|
|
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:84.0) Gecko/20100101 Firefox/84.0",
|
|
|
|
}
|
|
|
|
|
|
|
|
def search(self, query: str) -> Union[dict, list]:
|
|
|
|
""" Search DuckDuckGo for a query
|
|
|
|
Args:
|
|
|
|
query (str): The query to search for
|
|
|
|
Returns:
|
|
|
|
Union[dict, list]: A list of search results
|
|
|
|
Example:
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"title": "DuckDuckGo — Privacy, simplified.",
|
|
|
|
"link": "https://duckduckgo.com/",
|
|
|
|
"body": "The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.",
|
|
|
|
"icon": "/assets/meta/DDG-icon_256x256.png",
|
|
|
|
},
|
|
|
|
...
|
|
|
|
]
|
|
|
|
"""
|
|
|
|
data_output = []
|
|
|
|
response = requests.get(self.url, headers=self.headers, params={"q": query})
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
results = soup.find_all("div", class_="result")
|
|
|
|
for result in results:
|
|
|
|
title = result.find("h2", class_="result__title").find("a").text.strip()
|
|
|
|
link = result.find("div", class_="result__extras__url").text.strip().splitlines()[0]
|
|
|
|
body = result.find("a", class_="result__snippet").text.strip()
|
|
|
|
icon_obj = result.find("img", class_="result__icon__img")
|
|
|
|
icon = icon_obj["src"][2:] if icon_obj else False
|
|
|
|
|
|
|
|
data_output.append(
|
|
|
|
{
|
|
|
|
"title": title,
|
|
|
|
"link": link,
|
|
|
|
"body": body,
|
|
|
|
"icon": icon,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
return data_output
|