Files
SillyTavern-extras/modules/websearch/script.py
2023-12-11 23:52:25 +02:00

100 lines
3.3 KiB
Python

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from modules.utils import is_colab
import atexit
def get_driver():
try:
print("Initializing Chrome driver...")
options = ChromeOptions()
options.add_argument('--disable-infobars')
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--lang=en-GB")
if is_colab():
return webdriver.Chrome('chromedriver', options=options)
else:
chromeService = ChromeService()
return webdriver.Chrome(service=chromeService, options=options)
except:
print("Chrome not found, using Firefox instead.")
firefoxService = FirefoxService()
options = FirefoxOptions()
options.add_argument("--headless")
options.set_preference("intl.accept_languages", "en,en_US")
return webdriver.Firefox(service=firefoxService, options=options)
def search_google(query: str) -> (str, list[str]):
global driver
print(f"Searching Google for {query}...")
driver.get("https://google.com/search?hl=en&q=" + query)
wait_for_id('res')
save_debug()
text = ''
# Answer box
text += get_from_selector('.wDYxhc')
# Knowledge panel
text += get_from_selector('.hgKElc')
# Page snippets
text += get_from_selector('.r025kc.lVm3ye')
# Old selectors (for compatibility)
text += get_from_selector('.yDYNvb.lyLwlc')
# Links
links = get_links_from_selector('.yuRUbf a')
print("Found: " + text, links)
return (text, links)
def search_duckduckgo(query: str) -> (str, list[str]):
global driver
print(f"Searching DuckDuckGo for {query}...")
driver.get("https://duckduckgo.com/?kp=-2&kl=wt-wt&q=" + query)
wait_for_id('web_content_wrapper')
save_debug()
text = get_from_selector('[data-result="snippet"]')
links = get_links_from_selector('[data-testid="result-title-a"]')
print("Found: " + text, links)
return (text, links)
driver = get_driver()
def quit_driver():
driver.quit()
def save_debug():
with open("data/tmp/debug.html", "w", encoding='utf-8') as f:
f.write(driver.page_source)
def wait_for_id(id: str, delay: int = 5):
try:
WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, id)))
except:
print(f"Element with id {id} not found, proceeding without.")
def get_from_selector(selector: str):
result = ''
for el in driver.find_elements(By.CSS_SELECTOR, selector):
if el and el.text:
result += el.text + '\n'
return result
def get_links_from_selector(selector: str):
links = []
for el in driver.find_elements(By.CSS_SELECTOR, selector):
if el and el.text:
links.append(el.get_attribute('href'))
return links
atexit.register(quit_driver)