diff --git a/googlesearch/__init__.py b/googlesearch/__init__.py index 74e6564..46d03bf 100644 --- a/googlesearch/__init__.py +++ b/googlesearch/__init__.py @@ -1,4 +1,4 @@ -"""googlesearch is a Python library for searching Google, easily.""" +"""googlesearch is a Python library for searching Google and Bing, easily.""" from time import sleep from bs4 import BeautifulSoup from requests import get @@ -6,9 +6,13 @@ import urllib -def _req(term, results, lang, start, proxies, timeout): +def _req(term, results, lang, start, proxies, timeout, engine): + search_url = { + "google": "https://www.google.com/search", + "bing": "https://www.bing.com/search", + } resp = get( - url="https://www.google.com/search", + url=search_url[engine], headers={ "User-Agent": get_useragent() }, @@ -35,8 +39,8 @@ def __repr__(self): return f"SearchResult(url={self.url}, title={self.title}, description={self.description})" -def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5): - """Search the Google search engine""" +def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5, engine="google"): + """Search the Google or Bing search engine""" escaped_term = urllib.parse.quote_plus(term) # make 'site:xxx.xxx.xxx ' works. @@ -53,22 +57,35 @@ def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_in while start < num_results: # Send request resp = _req(escaped_term, num_results - start, - lang, start, proxies, timeout) + lang, start, proxies, timeout, engine) # Parse soup = BeautifulSoup(resp.text, "html.parser") - result_block = soup.find_all("div", attrs={"class": "g"}) - if len(result_block) ==0: + if engine == "bing": + result_block = soup.find_all("li", attrs={"class": "b_algo"}) + title_tag = "h2" + + else: + result_block = soup.find_all("div", attrs={"class": "g"}) + title_tag = "h3" + + # print(len(result_block)) + # print(result_block[0]) + if len(result_block) == 0: start += 1 - for result in result_block: - # Find link, title, description - link = result.find("a", href=True) - title = result.find("h3") - description_box = result.find( - "div", {"style": "-webkit-line-clamp:2"}) - if description_box: - description = description_box.text - if link and title and description: + else: + for result in result_block: + # Find link, title, description + link = result.find("a", href=True) + title = result.find(title_tag) + if engine == "bing": + description_box = result.find("p") + else: + description_box = result.find("div", {"style": "-webkit-line-clamp:2"}) + + description = description_box.text if description_box else "" + + if link and title: start += 1 if advanced: yield SearchResult(link["href"], title.text, description) diff --git a/setup.py b/setup.py index b226a12..039adc9 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name="googlesearch-python", - version="1.2.3", + version="1.2.4", author="Nishant Vikramaditya", author_email="junk4Nv7@gmail.com", description="A Python library for scraping the Google search engine.",