Using selenium to solve captcha

Anna Plym picture Anna Plym · Nov 20, 2019 · Viewed 10.3k times · Source

My Code below is keep solving a different Captcha ! Please correct my mistake as i don't know what's causing that!

from selenium import webdriver
from python3_anticaptcha import ImageToTextTask, CallbackClient
import time
import requests

browser = webdriver.Firefox()

url = 'https://urlmased.com/'
browser.get(url)
time.sleep(10)
username = browser.find_element_by_id("masked")
username.send_keys("testuser")
password = browser.find_element_by_id("masked")
password.send_keys("testpass")

image_link = browser.find_element_by_xpath(
    '//*[@id="masked"]').get_attribute('src')
pic = requests.get(image_link)
if pic.status_code == 200:
    with open("image.png", 'wb') as f:
        f.write(pic.content)
ANTICAPTCHA_KEY = 'masked'
captcha_file = "image.png"
result = ImageToTextTask.ImageToTextTask(
    anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(captcha_file=captcha_file)

captcha = browser.find_element_by_id("masked")
captcha.send_keys(result['solution']['text'])
login = browser.find_element_by_id("yw2")

Be Informed that the API is active so you can use it till you reach a solution. and then i will change it.

Also the accuracy of solving is 100%

Answer

Ahmed Soliman picture Ahmed Soliman · Nov 22, 2019

The issue here is that the captcha URL in the page source is not an actual image URL. It's a script to dynamically generate captcha images there for when you use the captcha solver API you are solving a defferent image than the browser loaded. To solve this we have to save the same image the browser loaded. I traced the image request and found out, it's using unique cookies that was generated when the browser loaded the page.

Using Selenium:

from selenium import webdriver
from python3_anticaptcha import ImageToTextTask, CallbackClient
from time import sleep
import requests



def GetImageCookies():
    print('Extracting Browser Cookies')
    image_cookies = ''
    for cookie in browser.get_cookies():
        if cookie['name'] == 'ssc':
            image_cookies += 'ssc={};'.format(cookie['value'])
        elif cookie['name'] == 'ghsdfkjlksssalk35bbr':
            image_cookies += 'ghsdfkjlksssalk35bbr={};'.format(cookie['value'])
    # print(image_cookies)
    return image_cookies

def SaveImage(captcha_file = "master.jpg"):
    print('Saving the captcha image')
    header = {
    'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
    'Cookie': GetImageCookies(),
    'Host': 'masked',
    'Referer': 'masked',
    'Sec-Fetch-Mode': 'no-cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}


    pic = requests.get('https://masked/site/captcha/v/',verify=False,headers = header)
    if pic.status_code == 200:
        with open(captcha_file, 'wb') as f:
            f.write(pic.content)

def SolveCapcha(captcha_file = "master.jpg"):
    print('Solving the captcha image')
    ANTICAPTCHA_KEY = 'masked'  
    result = ImageToTextTask.ImageToTextTask(
        anticaptcha_key=ANTICAPTCHA_KEY).captcha_handler(captcha_file=captcha_file)
    captcha_text = result['solution']['text']
    print('Captcha text is :',captcha_text)
    return captcha_text


browser = webdriver.Firefox()
url = 'https://masked/'
browser.get(url)
def Login():
    SaveImage()
    sleep(5)
    username = browser.find_element_by_id("masked_username")
    username.clear()
    username.send_keys("testuser")
    password = browser.find_element_by_id("masked")
    password.clear()
    password.send_keys("testpass")
    captcha = browser.find_element_by_id("masked")
    captcha.clear()
    captcha_text = SolveCapcha()
    captcha.send_keys(captcha_text)
    login = browser.find_element_by_id("masked").click()
    sleep(5)
    err_message = browser.find_elements_by_id('masked')
    if err_message :
        if err_message[0].text == 'The verification code is incorrect.':
            print(err_message[0].text)
            return False
    return True


"""The logic here is that the image gets downloaded using the cookies but sometimes
the letters are hard to be solved so each time we download the same image with the
same cookies the content of the image will be the same but how it's written is different
So we keep trying till we get it right """
while Login() == False:
    Login()

Using Requests and Beautiful soup: the following is the idea not sure if it's working you will have to test yourself:

from bs4 import BeautifulSoup
def SaveImage(captcha_file = "master.jpg"):
    print('Saving the captcha image')
    header = {
    'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en,en-US;q=0.9,ar;q=0.8',
    'Host': 'masked',
    'Referer': 'https://masked/',
    'Sec-Fetch-Mode': 'no-cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0'}


    pic = session.get('https://masked/site/captcha/v/',verify=False,headers = header)
    if pic.status_code == 200:
        with open(captcha_file, 'wb') as f:
            f.write(pic.content)

with requests.Session() as session:
    source      = session.get(url = 'https://masked/',verify=False) # To get the itial cookies  
    soup        = BeautifulSoup(source.text, 'html.parser')  
    token       = soup.find('input', {'name': 'masked'}).get('value')
    SaveImage()
    captcha_text = SolveCapcha()
    post_data={"masked": token,
                'masked[username]': 'testuser',
                'masked[password]': 'testpass',
                'masked[captcha]': captcha_text,
                'masked':''}
    session.post('https://masked/', data=post_data,verify=False)