Add links to search results

This commit is contained in:
Cohee
2023-12-11 23:52:25 +02:00
parent 1363fd242d
commit 98535e6fa3
3 changed files with 47 additions and 21 deletions

View File

@@ -4,6 +4,8 @@ from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from modules.utils import is_colab
import atexit
@@ -33,41 +35,65 @@ def get_driver():
return webdriver.Firefox(service=firefoxService, options=options)
def search_google(query: str) -> str:
def search_google(query: str) -> (str, list[str]):
global driver
print(f"Searching Google for {query}...")
driver.get("https://google.com/search?hl=en&q=" + query)
wait_for_id('res')
save_debug()
text = ''
# Answer box
for el in driver.find_elements(By.CSS_SELECTOR, '.hgKElc'):
if el and el.text:
text += el.text + '\n'
text += get_from_selector('.wDYxhc')
# Knowledge panel
for el in driver.find_elements(By.CSS_SELECTOR, '.hgKElc'):
if el and el.text:
text += el.text + '\n'
text += get_from_selector('.hgKElc')
# Page snippets
for el in driver.find_elements(By.CSS_SELECTOR, '.yDYNvb.lyLwlc'):
if el and el.text:
text += el.text + '\n'
print("Found: " + text)
return text
text += get_from_selector('.r025kc.lVm3ye')
# Old selectors (for compatibility)
text += get_from_selector('.yDYNvb.lyLwlc')
# Links
links = get_links_from_selector('.yuRUbf a')
print("Found: " + text, links)
return (text, links)
def search_duckduckgo(query: str) -> str:
def search_duckduckgo(query: str) -> (str, list[str]):
global driver
print(f"Searching DuckDuckGo for {query}...")
driver.get("https://duckduckgo.com/?kp=-2&kl=wt-wt&q=" + query)
text = ''
for el in driver.find_elements(By.CSS_SELECTOR, '[data-result="snippet"]'):
if el and el.text:
text += el.text + '\n'
print("Found: " + text)
return text
wait_for_id('web_content_wrapper')
save_debug()
text = get_from_selector('[data-result="snippet"]')
links = get_links_from_selector('[data-testid="result-title-a"]')
print("Found: " + text, links)
return (text, links)
driver = get_driver()
def quit_driver():
driver.quit()
def save_debug():
with open("data/tmp/debug.html", "w", encoding='utf-8') as f:
f.write(driver.page_source)
def wait_for_id(id: str, delay: int = 5):
try:
WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, id)))
except:
print(f"Element with id {id} not found, proceeding without.")
def get_from_selector(selector: str):
result = ''
for el in driver.find_elements(By.CSS_SELECTOR, selector):
if el and el.text:
result += el.text + '\n'
return result
def get_links_from_selector(selector: str):
links = []
for el in driver.find_elements(By.CSS_SELECTOR, selector):
if el and el.text:
links.append(el.get_attribute('href'))
return links
atexit.register(quit_driver)