How can I programatically upload captcha images to this solving service in python (html given)?

clj picture clj · Apr 29, 2015 · Viewed 7.4k times · Source

I am writing a python program that needs to be able to solve captchas on a web site. I would like to use 2captcha. I have written a python script using selenium that will do everything I need to do except solve the captchas. When I click on the 2captcha.com "API" tab, this (along with other parameters) is what is shown:

You can upload your CAPTCHAs in two available formats:

Multipart and Base64.:

Multipad sample:

<form method="post" action="http://2captcha.com/in.php" enctype="multipart/form-data">
<input type="hidden" name="method" value="post">
Your key:
<input type="text" name="key" value="YOUR_APIKEY">
The CAPTCHA file:
<input type="file" name="file">
<input type="submit" value="download and get the ID">
</form>

YOUR_APIKEY - is your key of 32 symbols length.

Base64 Sample:

<form method="post" action="http://2captcha.com/in.php">
<input type="hidden" name="method" value="base64">
Your key:
<input type="text" name="key" value="YOUR_APIKEY">
The CAPTCHA file body in base64 format:
<textarea name="body">BASE64_FILE</textarea>
<input type="submit" value="download and get the ID">
</form>

YOUR_APIKEY - is your key of 32 symbols length.

BASE64_FILE - is the base 64 encoded image body.

I know python, and most of its scientific and mathematical modules well, but I am a bit new to web related programming. The code above looks like html. How would I make a python program carry out the html instructions above?

Answer

Zaliko Panjakidze picture Zaliko Panjakidze · Oct 28, 2015

Im from 2captcha team, and we are have samples python. you can use the code:

"""

This is the sample how to pass google recaptcha v2. I have used python + selenium + phantomJS to do this. Phantom JS is a headless browser used in automation web testing. I'm using it to scrap pages and bypass google captchas. To use this you should have phantomJS installed (and build from sources in linux system case) and selenium python module installed

Google captchas appears on a page in iframes. You should scrap its element, click on images and check error messages after clicking Ok.

"""

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import *
import re
import os
import sys
import time
import random
import requests
from PIL import Image

class capcha_resolver:
    def __init__(self, captchakey, proxy = None):
        """
       It is assumed that you have phantomjs installed into /bin folder on your linux system.
       """

        self.TWOCAPTCHA_API_KEY = captchakey
        phantom_args = []
        if proxy:
            self.PROXY = proxy
            phantom_args = ['--proxy='+self.PROXY, '--proxy-type=http', '--proxy-type=https']
        self.driver = webdriver.PhantomJS('/bin/phantomjs',service_args=phantom_args)
        self.driver.set_page_load_timeout(20)

    def fail(self, msg):
        print "[!] Error: " + msg
        self.driver.save_screenshot('error.png')

    def get_page(self):
        self.driver.get('https://www.google.com/recaptcha/api2/demo')
        self.driver.save_screenshot('page.png')
        return 0

    def send_capcha(self, filename):
        numbers = []
        captchafile = {'file': open(filename, 'rb')}
        data = {'key': self.TWOCAPTCHA_API_KEY, 'method': 'post'}
        r = requests.post('http://2captcha.com/in.php', files=captchafile, data=data)
        if r.ok and r.text.find('OK') > -1:
            reqid = r.text[r.text.find('|')+1:]
            print "[+] Capcha id: "+reqid
            for timeout in range(40):
                r = requests.get('http://2captcha.com/res.php?key={0}&action=get&id={1}'.format(self.TWOCAPTCHA_API_KEY, reqid))
                if r.text.find('CAPCHA_NOT_READY') > -1:
                    print r.text
                    time.sleep(3)
                if r.text.find('ERROR') > -1:
                    return []
                if r.text.find('OK') > -1:
                    return list(r.text[r.text.find('|')+1:])
        return []

    def bypass_captcha(self):
        """
       Google recaptcha could be found by id. Frame with checkbox has id which starts with I0, recapcha frame has id with I1
       """

        capthcaboxframe = self.driver.find_element_by_xpath('//iframe[starts-with(@id, "I0")]')
        self.driver.switch_to.frame(capthcaboxframe)
        time.sleep(1)
        checkbox = self.driver.find_element_by_id('recaptcha-anchor')
        checkbox.click()
        print "[*] Clicked on checkbox"
        time.sleep(2)
        self.driver.switch_to.default_content()

        capcthaframe = self.driver.find_element_by_xpath('//iframe[starts-with(@id, "I1")]')

        bounding_box = (
            capcthaframe.location['x'], # left
            capcthaframe.location['y'], # upper
            (capcthaframe.location['x'] + capcthaframe.size['width']), # right
            (capcthaframe.location['y'] + capcthaframe.size['height'])) # bottom
        imgname = 'capcha.jpeg' #use jpeg because png images can exceed 2capcha file size limit
        time.sleep(2)
        self.driver.save_screenshot(imgname)
        base_image = Image.open(imgname)
        cropped_image = base_image.crop(bounding_box)
        base_image = base_image.resize(cropped_image.size)
        base_image.paste(cropped_image, (0, 0))
        base_image.save(imgname)

        numbers = self.send_capcha(imgname)
        if numbers == []:
            return -1

        self.driver.switch_to.frame(capcthaframe)
        picturetable = self.driver.find_element_by_css_selector('.rc-imageselect-table-3')
        images = []
        for row in picturetable.find_elements_by_tag_name('tr'):
            for col in row.find_elements_by_tag_name('td'):
                images.append(col.find_element_by_tag_name('img'))
        if images == []:
            self.fail("Found no captcha images")
            return -1

        print "[*] Got answer : " + str(numbers)
        for number in numbers:
            index = int(number) - 1
            images[index].click()
            print '[+] clicked on image '+str(index)
        self.driver.save_screenshot('res.png')
        verifybutton = self.driver.find_element_by_id('recaptcha-verify-button')
        verifybutton.click()
        print "[*] Clicked verify button"
        time.sleep(2)
        if self.driver.find_element_by_css_selector('.rc-imageselect-incorrect-response').is_displayed() or \
                        self.driver.find_element_by_css_selector('.rc-imageselect-error-select-one').is_displayed() or \
                        self.driver.find_element_by_css_selector('.rc-imageselect-error-select-more').is_displayed():
            self.fail("Incorrect answer from 2captcha")
            return -1
        self.driver.switch_to.default_content()

        self.driver.switch_to.frame(capthcaboxframe)
        if self.driver.find_element_by_css_selector('.recaptcha-checkbox').get_attribute('aria-checked') == 'false':
            self.fail("Capctha not passed")
            return -1
        self.driver.switch_to.default_content()
        self.driver.save_screenshot('passed.png')
        return 0

proxy = None
if len(sys.argv) < 2:
    print "Usage: python resolver.py 2CAPCHA_API_KEY [PROXY]"
if len(sys.argv) > 2:
    proxy = sys.argv[2]
resolver = capcha_resolver(sys.argv[1], proxy)

if resolver.get_page() == -1:
    print "[!] Error while getting page"
else:
    print "[+] Opened URL"

if resolver.bypass_captcha() == -1:
    print "[!] Error on captcha resolving"
else:
    print "[+] Resolved captcha"

Good luck!