andelf fledna Feather

2009年2月22日星期日

用 python 暴百度贴吧

当然, 只是原型, 但基本功能有了. 目前在刷回帖的时候会有乱码, 编码问题需要解决. 成果展示: ---------------------------------------------------------------


#!/usr/bin/python
# -*- coding:utf-8 -*-

import urllib2
import urllib
import socket
import libxml2dom
import Image
import time



def domToQueryDict(doc, formIndex=0):
   data = {}
   form = doc.getElementsByTagName('form')[formIndex]
   nodes = form.getElementsByTagName('input')
   nodes.extend(form.getElementsByTagName('textarea'))
   for node in nodes:
       if node.hasAttribute('name') and node.getAttribute('type')!= u"submit":
           if node.hasAttribute('value'):
               data[node.getAttribute('name')] = \
                   node.getAttribute('value').encode('gbk', 'ignore')
           else:
               data[node.getAttribute('name')] = ""
   return data

             
cookie_handler = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookie_handler)
urllib2.install_opener(opener)

# 获得登陆数据
url = "http://passport.baidu.com/?" + \
   "login&tpl=tb&u=http%3A//tieba.baidu.com/f%3Fkw%3Dpython"
doc = libxml2dom.parseString(opener.open(url).read(), html=1,
                            htmlencoding='gbk')
data = domToQueryDict(doc)

# 用户名/密码设置
data[u'username'] = '用户名'
data[u'password'] = '密码'

# 验证图片处理
veryfyPic = opener.open("https://passport.baidu.com/?verifypic")
picfile = file("pic.jpg", 'wb')
picfile.write(veryfyPic.read())
picfile.close()
im = Image.open("pic.jpg")
im.show()
data["verifycode"] = raw_input("What you see?").strip()

# 登陆页面
data = urllib.urlencode(data)
b = opener.open(url, data)  # here login OK
print b.read()
print '-'*20

for i in xrange(20):
   # 分析主页数据
   # url = "http://tieba.baidu.com/f?kz=543530949"
   b = opener.open("http://tieba.baidu.com/f?kw=python&t=1")
   frontpage = b.read()
   doc = libxml2dom.parseString(frontpage, html=1, htmlencoding='gbk')
             
   data = domToQueryDict(doc, 1)
   print "*",
   # 发贴标题及内容
   data[u'ti'] = "Fledna 自爆专用~!!!!!!!!!!!!!!"+str(i)
   data[u'co'] = "For Test only. " + str(i)
   print data[u'ti']
   data = urllib.urlencode(data)
   post_url = "http://tieba.baidu.com/f"
   b = opener.open(post_url, data)
   time.sleep(20)

1 条评论:

mocihan 说...

找到这来了