mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-04-30 19:31:20 +00:00
Add links to search results
This commit is contained in:
@@ -4,6 +4,8 @@ from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
||||
from selenium.webdriver.chrome.service import Service as ChromeService
|
||||
from selenium.webdriver.firefox.service import Service as FirefoxService
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from modules.utils import is_colab
|
||||
import atexit
|
||||
|
||||
@@ -33,41 +35,65 @@ def get_driver():
|
||||
return webdriver.Firefox(service=firefoxService, options=options)
|
||||
|
||||
|
||||
def search_google(query: str) -> str:
|
||||
def search_google(query: str) -> (str, list[str]):
|
||||
global driver
|
||||
print(f"Searching Google for {query}...")
|
||||
driver.get("https://google.com/search?hl=en&q=" + query)
|
||||
wait_for_id('res')
|
||||
save_debug()
|
||||
text = ''
|
||||
# Answer box
|
||||
for el in driver.find_elements(By.CSS_SELECTOR, '.hgKElc'):
|
||||
if el and el.text:
|
||||
text += el.text + '\n'
|
||||
text += get_from_selector('.wDYxhc')
|
||||
# Knowledge panel
|
||||
for el in driver.find_elements(By.CSS_SELECTOR, '.hgKElc'):
|
||||
if el and el.text:
|
||||
text += el.text + '\n'
|
||||
text += get_from_selector('.hgKElc')
|
||||
# Page snippets
|
||||
for el in driver.find_elements(By.CSS_SELECTOR, '.yDYNvb.lyLwlc'):
|
||||
if el and el.text:
|
||||
text += el.text + '\n'
|
||||
print("Found: " + text)
|
||||
return text
|
||||
text += get_from_selector('.r025kc.lVm3ye')
|
||||
# Old selectors (for compatibility)
|
||||
text += get_from_selector('.yDYNvb.lyLwlc')
|
||||
# Links
|
||||
links = get_links_from_selector('.yuRUbf a')
|
||||
print("Found: " + text, links)
|
||||
return (text, links)
|
||||
|
||||
|
||||
def search_duckduckgo(query: str) -> str:
|
||||
def search_duckduckgo(query: str) -> (str, list[str]):
|
||||
global driver
|
||||
print(f"Searching DuckDuckGo for {query}...")
|
||||
driver.get("https://duckduckgo.com/?kp=-2&kl=wt-wt&q=" + query)
|
||||
text = ''
|
||||
for el in driver.find_elements(By.CSS_SELECTOR, '[data-result="snippet"]'):
|
||||
if el and el.text:
|
||||
text += el.text + '\n'
|
||||
print("Found: " + text)
|
||||
return text
|
||||
wait_for_id('web_content_wrapper')
|
||||
save_debug()
|
||||
text = get_from_selector('[data-result="snippet"]')
|
||||
links = get_links_from_selector('[data-testid="result-title-a"]')
|
||||
print("Found: " + text, links)
|
||||
return (text, links)
|
||||
|
||||
driver = get_driver()
|
||||
|
||||
def quit_driver():
|
||||
driver.quit()
|
||||
|
||||
def save_debug():
|
||||
with open("data/tmp/debug.html", "w", encoding='utf-8') as f:
|
||||
f.write(driver.page_source)
|
||||
|
||||
def wait_for_id(id: str, delay: int = 5):
|
||||
try:
|
||||
WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.ID, id)))
|
||||
except:
|
||||
print(f"Element with id {id} not found, proceeding without.")
|
||||
|
||||
def get_from_selector(selector: str):
|
||||
result = ''
|
||||
for el in driver.find_elements(By.CSS_SELECTOR, selector):
|
||||
if el and el.text:
|
||||
result += el.text + '\n'
|
||||
return result
|
||||
|
||||
def get_links_from_selector(selector: str):
|
||||
links = []
|
||||
for el in driver.find_elements(By.CSS_SELECTOR, selector):
|
||||
if el and el.text:
|
||||
links.append(el.get_attribute('href'))
|
||||
return links
|
||||
|
||||
atexit.register(quit_driver)
|
||||
|
||||
Reference in New Issue
Block a user