simple-api-duckduckgo/gateways/externals.py

import requests
from bs4 import BeautifulSoup
from typing import Union

class DuckDuckGoGateway:
    url = "https://duckduckgo.com/html/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:84.0) Gecko/20100101 Firefox/84.0",
    }

    def search(self, query: str) -> Union[dict, list]:
        """ Search DuckDuckGo for a query
        Args:
            query (str): The query to search for
        Returns:
            Union[dict, list]: A list of search results
            Example:
            [
                {
                    "title": "DuckDuckGo — Privacy, simplified.",
                    "link": "https://duckduckgo.com/",
                    "body": "The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.",
                    "icon": "/assets/meta/DDG-icon_256x256.png",
                },
                ...
            ]
        """
        data_output = []
        response = requests.get(self.url, headers=self.headers, params={"q": query})
        soup = BeautifulSoup(response.text, "html.parser")
        results = soup.find_all("div", class_="result")
        for result in results:
            title = result.find("h2", class_="result__title").find("a").text.strip()
            link = result.find("div", class_="result__extras__url").text.strip().splitlines()[0]
            body = result.find("a", class_="result__snippet").text.strip()
            icon_obj = result.find("img", class_="result__icon__img")
            icon = icon_obj["src"][2:] if icon_obj else False

            data_output.append(
                {
                    "title": title,
                    "link": link,
                    "body": body,
                    "icon": icon,
                }
            )
        return data_output
Update 2024-02-08 12:52:28 +01:00			`import requests`
			`from bs4 import BeautifulSoup`
			`from typing import Union`

			`class DuckDuckGoGateway:`
			`url = "https://duckduckgo.com/html/"`
			`headers = {`
			`"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:84.0) Gecko/20100101 Firefox/84.0",`
			`}`

			`def search(self, query: str) -> Union[dict, list]:`
			`""" Search DuckDuckGo for a query`
			`Args:`
			`query (str): The query to search for`
			`Returns:`
			`Union[dict, list]: A list of search results`
			`Example:`
			`[`
			`{`
			`"title": "DuckDuckGo — Privacy, simplified.",`
			`"link": "https://duckduckgo.com/",`
			`"body": "The Internet privacy company that empowers you to seamlessly take control of your personal information online, without any tradeoffs.",`
			`"icon": "/assets/meta/DDG-icon_256x256.png",`
			`},`
			`...`
			`]`
			`"""`
			`data_output = []`
			`response = requests.get(self.url, headers=self.headers, params={"q": query})`
			`soup = BeautifulSoup(response.text, "html.parser")`
			`results = soup.find_all("div", class_="result")`
			`for result in results:`
			`title = result.find("h2", class_="result__title").find("a").text.strip()`
			`link = result.find("div", class_="result__extras__url").text.strip().splitlines()[0]`
			`body = result.find("a", class_="result__snippet").text.strip()`
			`icon_obj = result.find("img", class_="result__icon__img")`
			`icon = icon_obj["src"][2:] if icon_obj else False`

			`data_output.append(`
			`{`
			`"title": title,`
			`"link": link,`
			`"body": body,`
			`"icon": icon,`
			`}`
			`)`
			`return data_output`