当然, 只是原型, 但基本功能有了.
目前在刷回帖的时候会有乱码, 编码问题需要解决.
成果展示:
---------------------------------------------------------------
#!/usr/bin/python # -*- coding:utf-8 -*- import urllib2 import urllib import socket import libxml2dom import Image import time def domToQueryDict(doc, formIndex=0): data = {} form = doc.getElementsByTagName('form')[formIndex] nodes = form.getElementsByTagName('input') nodes.extend(form.getElementsByTagName('textarea')) for node in nodes: if node.hasAttribute('name') and node.getAttribute('type')!= u"submit": if node.hasAttribute('value'): data[node.getAttribute('name')] = \ node.getAttribute('value').encode('gbk', 'ignore') else: data[node.getAttribute('name')] = "" return data cookie_handler = urllib2.HTTPCookieProcessor() opener = urllib2.build_opener(cookie_handler) urllib2.install_opener(opener) # 获得登陆数据 url = "http://passport.baidu.com/?" + \ "login&tpl=tb&u=http%3A//tieba.baidu.com/f%3Fkw%3Dpython" doc = libxml2dom.parseString(opener.open(url).read(), html=1, htmlencoding='gbk') data = domToQueryDict(doc) # 用户名/密码设置 data[u'username'] = '用户名' data[u'password'] = '密码' # 验证图片处理 veryfyPic = opener.open("https://passport.baidu.com/?verifypic") picfile = file("pic.jpg", 'wb') picfile.write(veryfyPic.read()) picfile.close() im = Image.open("pic.jpg") im.show() data["verifycode"] = raw_input("What you see?").strip() # 登陆页面 data = urllib.urlencode(data) b = opener.open(url, data) # here login OK print b.read() print '-'*20 for i in xrange(20): # 分析主页数据 # url = "http://tieba.baidu.com/f?kz=543530949" b = opener.open("http://tieba.baidu.com/f?kw=python&t=1") frontpage = b.read() doc = libxml2dom.parseString(frontpage, html=1, htmlencoding='gbk') data = domToQueryDict(doc, 1) print "*", # 发贴标题及内容 data[u'ti'] = "Fledna 自爆专用~!!!!!!!!!!!!!!"+str(i) data[u'co'] = "For Test only. " + str(i) print data[u'ti'] data = urllib.urlencode(data) post_url = "http://tieba.baidu.com/f" b = opener.open(post_url, data) time.sleep(20)
1 条评论:
找到这来了
发表评论