I want to submit login to the website Reddit.com, navigate to a particular area of the page, and submit a comment. I don't see what's wrong with this code, but it is not working in that no change is reflected on the Reddit site.
import mechanize
import cookielib
def main():
#Browser
br = mechanize.Browser()
# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
#Opens the site to be navigated
r= br.open('http://www.reddit.com')
html = r.read()
# Select the second (index one) form
br.select_form(nr=1)
# User credentials
br.form['user'] = 'DUMMYUSERNAME'
br.form['passwd'] = 'DUMMYPASSWORD'
# Login
br.submit()
#Open up comment page
r= br.open('http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/')
html = r.read()
#Text box is the 8th form on the page (which, I believe, is the text area)
br.select_form(nr=7)
#Change 'text' value to a testing string
br.form['text']= "this is an automated test"
#Submit the information
br.submit()
What's wrong with this?
I would definitely suggest trying to use the API if possible, but this works for me (not for your example post, which has been deleted, but for any active one):
#!/usr/bin/env python
import mechanize
import cookielib
import urllib
import logging
import sys
def main():
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
r= br.open('http://www.reddit.com')
# Select the second (index one) form
br.select_form(nr=1)
# User credentials
br.form['user'] = 'user'
br.form['passwd'] = 'passwd'
# Login
br.submit()
# Open up comment page
posting = 'http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/'
rval = 'PoopSandwiches'
# you can get the rval in other ways, but this will work for testing
r = br.open(posting)
# You need the 'uh' value from the first form
br.select_form(nr=0)
uh = br.form['uh']
br.select_form(nr=7)
thing_id = br.form['thing_id']
id = '#' + br.form.attrs['id']
# The id that gets posted is the form id with a '#' prepended.
data = {'uh':uh, 'thing_id':thing_id, 'id':id, 'renderstyle':'html', 'r':rval, 'text':"Your text here!"}
new_data_dict = dict((k, urllib.quote(v).replace('%20', '+')) for k, v in data.iteritems())
# not sure if the replace needs to happen, I did it anyway
new_data = 'thing_id=%(thing_id)s&text=%(text)s&id=%(id)s&r=%(r)s&uh=%(uh)s&renderstyle=%(renderstyle)s' %(new_data_dict)
# not sure which of these headers are really needed, but it works with all
# of them, so why not just include them.
req = mechanize.Request('http://www.reddit.com/api/comment', new_data)
req.add_header('Referer', posting)
req.add_header('Accept', ' application/json, text/javascript, */*')
req.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
req.add_header('X-Requested-With', 'XMLHttpRequest')
cj.add_cookie_header(req)
res = mechanize.urlopen(req)
main()
It would be interesting to turn javascript off and see how the reddit comments are handled then. Right now there is a bunch of magic
that happens in an onsubmit function called when making your post. This is where the uh
and id
value get added.