[mod] stackoverflow & yandex: detect CAPTCHA response

This commit is contained in:
Alexandre Flament 2020-12-03 13:23:19 +01:00
parent 7905d41487
commit fa909c7c02
2 changed files with 12 additions and 2 deletions

View File

@ -10,9 +10,10 @@
@parse url, title, content @parse url, title, content
""" """
from urllib.parse import urlencode, urljoin from urllib.parse import urlencode, urljoin, urlparse
from lxml import html from lxml import html
from searx.utils import extract_text from searx.utils import extract_text
from searx.exceptions import SearxEngineCaptchaException
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
@ -37,6 +38,10 @@ def request(query, params):
# get response from search-request # get response from search-request
def response(resp): def response(resp):
resp_url = urlparse(resp.url)
if resp_url.path.startswith('/nocaptcha'):
raise SearxEngineCaptchaException()
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)

View File

@ -9,9 +9,10 @@
@parse url, title, content @parse url, title, content
""" """
from urllib.parse import urlencode from urllib.parse import urlencode, urlparse
from lxml import html from lxml import html
from searx import logger from searx import logger
from searx.exceptions import SearxEngineCaptchaException
logger = logger.getChild('yandex engine') logger = logger.getChild('yandex engine')
@ -47,6 +48,10 @@ def request(query, params):
# get response from search-request # get response from search-request
def response(resp): def response(resp):
resp_url = urlparse(resp.url)
if resp_url.path.startswith('/showcaptcha'):
raise SearxEngineCaptchaException()
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
results = [] results = []