mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-02-08 07:20:13 +00:00
100 lines
3.3 KiB
Python
100 lines
3.3 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
|
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
|
from selenium.webdriver.chrome.service import Service as ChromeService
|
|
from selenium.webdriver.firefox.service import Service as FirefoxService
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from modules.utils import is_colab
|
|
import atexit
|
|
|
|
|
|
def get_driver():
|
|
try:
|
|
print("Initializing Chrome driver...")
|
|
options = ChromeOptions()
|
|
options.add_argument('--disable-infobars')
|
|
options.add_argument("--headless")
|
|
options.add_argument("--disable-gpu")
|
|
options.add_argument("--no-sandbox")
|
|
options.add_argument('--disable-dev-shm-usage')
|
|
options.add_argument("--lang=en-GB")
|
|
|
|
if is_colab():
|
|
return webdriver.Chrome('chromedriver', options=options)
|
|
else:
|
|
chromeService = ChromeService()
|
|
return webdriver.Chrome(service=chromeService, options=options)
|
|
except:
|
|
print("Chrome not found, using Firefox instead.")
|
|
firefoxService = FirefoxService()
|
|
options = FirefoxOptions()
|
|
options.add_argument("--headless")
|
|
options.set_preference("intl.accept_languages", "en,en_US")
|
|
return webdriver.Firefox(service=firefoxService, options=options)
|
|
|
|
|
|
def search_google(query: str) -> (str, list[str]):
|
|
global driver
|
|
print(f"Searching Google for {query}...")
|
|
driver.get("https://google.com/search?hl=en&q=" + query)
|
|
wait_for_id('res')
|
|
save_debug()
|
|
text = ''
|
|
# Answer box
|
|
text += get_from_selector('.wDYxhc')
|
|
# Knowledge panel
|
|
text += get_from_selector('.hgKElc')
|
|
# Page snippets
|
|
text += get_from_selector('.r025kc.lVm3ye')
|
|
# Old selectors (for compatibility)
|
|
text += get_from_selector('.yDYNvb.lyLwlc')
|
|
# Links
|
|
links = get_links_from_selector('.yuRUbf a')
|
|
print("Found: " + text, links)
|
|
return (text, links)
|
|
|
|
|
|
def search_duckduckgo(query: str) -> (str, list[str]):
|
|
global driver
|
|
print(f"Searching DuckDuckGo for {query}...")
|
|
driver.get("https://duckduckgo.com/?kp=-2&kl=wt-wt&q=" + query)
|
|
wait_for_id('web_content_wrapper')
|
|
save_debug()
|
|
text = get_from_selector('[data-result="snippet"]')
|
|
links = get_links_from_selector('[data-testid="result-title-a"]')
|
|
print("Found: " + text, links)
|
|
return (text, links)
|
|
|
|
driver = get_driver()
|
|
|
|
def quit_driver():
|
|
driver.quit()
|
|
|
|
def save_debug():
|
|
with open("data/tmp/debug.html", "w", encoding='utf-8') as f:
|
|
f.write(driver.page_source)
|
|
|
|
def wait_for_id(id: str, delay: int = 5):
|
|
try:
|
|
WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, id)))
|
|
except:
|
|
print(f"Element with id {id} not found, proceeding without.")
|
|
|
|
def get_from_selector(selector: str):
|
|
result = ''
|
|
for el in driver.find_elements(By.CSS_SELECTOR, selector):
|
|
if el and el.text:
|
|
result += el.text + '\n'
|
|
return result
|
|
|
|
def get_links_from_selector(selector: str):
|
|
links = []
|
|
for el in driver.find_elements(By.CSS_SELECTOR, selector):
|
|
if el and el.text:
|
|
links.append(el.get_attribute('href'))
|
|
return links
|
|
|
|
atexit.register(quit_driver)
|