当然, 只是原型, 但基本功能有了.
目前在刷回帖的时候会有乱码, 编码问题需要解决.
成果展示:
---------------------------------------------------------------
---------------------------------------------------------------
#!/usr/bin/python
# -*- coding:utf-8 -*-
import urllib2
import urllib
import socket
import libxml2dom
import Image
import time
def domToQueryDict(doc, formIndex=0):
data = {}
form = doc.getElementsByTagName('form')[formIndex]
nodes = form.getElementsByTagName('input')
nodes.extend(form.getElementsByTagName('textarea'))
for node in nodes:
if node.hasAttribute('name') and node.getAttribute('type')!= u"submit":
if node.hasAttribute('value'):
data[node.getAttribute('name')] = \
node.getAttribute('value').encode('gbk', 'ignore')
else:
data[node.getAttribute('name')] = ""
return data
cookie_handler = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookie_handler)
urllib2.install_opener(opener)
# 获得登陆数据
url = "http://passport.baidu.com/?" + \
"login&tpl=tb&u=http%3A//tieba.baidu.com/f%3Fkw%3Dpython"
doc = libxml2dom.parseString(opener.open(url).read(), html=1,
htmlencoding='gbk')
data = domToQueryDict(doc)
# 用户名/密码设置
data[u'username'] = '用户名'
data[u'password'] = '密码'
# 验证图片处理
veryfyPic = opener.open("https://passport.baidu.com/?verifypic")
picfile = file("pic.jpg", 'wb')
picfile.write(veryfyPic.read())
picfile.close()
im = Image.open("pic.jpg")
im.show()
data["verifycode"] = raw_input("What you see?").strip()
# 登陆页面
data = urllib.urlencode(data)
b = opener.open(url, data) # here login OK
print b.read()
print '-'*20
for i in xrange(20):
# 分析主页数据
# url = "http://tieba.baidu.com/f?kz=543530949"
b = opener.open("http://tieba.baidu.com/f?kw=python&t=1")
frontpage = b.read()
doc = libxml2dom.parseString(frontpage, html=1, htmlencoding='gbk')
data = domToQueryDict(doc, 1)
print "*",
# 发贴标题及内容
data[u'ti'] = "Fledna 自爆专用~!!!!!!!!!!!!!!"+str(i)
data[u'co'] = "For Test only. " + str(i)
print data[u'ti']
data = urllib.urlencode(data)
post_url = "http://tieba.baidu.com/f"
b = opener.open(post_url, data)
time.sleep(20)

1 条评论:
找到这来了
发表评论