"""Client for a CAPTCHA implementation which allows multiple variants
of the same CAPTCHA word to be fetched by the same client."""
import urllib, re, os, string, operator
PORT = 8003
def solve(img_url):
"""Naive OCR-based solving attempt."""
webf = urllib.urlopen(img_url)
outf = open('/tmp/captcha.jpg', 'w')
outf.write(webf.read())
outf.close()
os.system("convert /tmp/captcha.jpg -colorspace Gray -level 0,20%,0.1 -depth 1 /tmp/captcha.bmp")
os.system("bmp2tiff -c none /tmp/captcha.bmp /tmp/captcha.tif")
os.system("tesseract /tmp/captcha.tif /tmp/captcha -l eng > /dev/null 2>&1")
solf = open('/tmp/captcha.txt')
result = string.lower(solf.read(16).strip())
return result
def solve_voting(img_url):
votes = {}
for attempts in range(20):
solution = solve(img_url)
try:
votes[solution] += 1
if votes[solution] >= 3:
return solution
except KeyError:
votes[solution] = 1
return sorted(votes.items(), key=operator.itemgetter(1),
reverse=True)[0][0]
for i in range(20):
webf = urllib.urlopen('http://localhost:%d' % PORT)
# Look for the CAPTCHA id.
test = re.search('<img src="(.*).jpg"/>', webf.read())
if test != None:
captcha_id = test.groups()[0]
# Solve the CAPTCHA with voting between potential results.
solution = solve_voting('http://localhost:%d/%s.jpg' % (PORT, captcha_id))
params = {
'id': captcha_id,
'word': solution
}
webf = urllib.urlopen('http://localhost:%d' % PORT, urllib.urlencode(params))
print i, webf.read(),