andelf fledna Feather

2009年2月28日星期六

百度谷歌造[有图有真相][baidu made from google]

有图有真相... 上图:
真相:
#!win32 only. no unix magic chars
# coding=utf-8

# need pywin32 module
import win32com.client

bro = win32com.client.Dispatch("InternetExplorer.Application")
# show window
bro.Visible = True
bro.Navigate("http://www.baidu.com")

while bro.Busy:
    pass

print bro.LocationName
# MSDN: bro.Document is a DOM object
doc = bro.Document
logo = doc.getElementsByTagName('img')[0]
logo.setAttribute('src', 'http://www.google.cn/intl/zh-CN/images/logo_cn.gif')
logo.setAttribute('width', '286')
logo.setAttribute('height', '110')

2009年2月24日星期二

HTTP Basic 验证HTTPBasicAuthHandler

fanfou api 为例说明. 有兴趣的可以做个功能完整的工具.

#!/usr/bin/python
# coding=utf-8

import urllib2
import xml.dom.minidom
import winsound
import time
import thread
import urllib

public_timeline_url = "http://api.fanfou.com/statuses/public_timeline.xml"
user_timeline_url = "http://api.fanfou.com/statuses/user_timeline.xml"
friends_timeline_url = "http://api.fanfou.com/statuses/friends_timeline.xml"
statuses_update_url = "http://api.fanfou.com/statuses/update.xml"


# friendship related
friendships_create_url = "http://api.fanfou.com/friendships/create.xml"
friendships_destroy_url = "http://api.fanfou.com/friendships/destroy.xml"

# block list
blocks_create_url = "http://api.fanfou.com/blocks/create.xml"
blocks_destroy_url = "http://api.fanfou.com/blocks/destroy.xml"

passwd_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
passwd_mgr.add_password(realm=None,
                          uri='http://api.fanfou.com/',
                          user='xxxxxxx',
                          passwd='xxxxxxxxx') # 用户名和密码

auth_handler = urllib2.HTTPBasicAuthHandler(passwd_mgr)
opener = urllib2.build_opener(auth_handler)

last_id = ''


def parseDom(doc):
    sts = doc.getElementsByTagName('status')
    get_textValue = lambda tag: \
        (lambda d:d.getElementsByTagName(tag)[0].childNodes[0].data)
    create_time = get_textValue('created_at')
    id_str = get_textValue('id')
    text = get_textValue('text')
    username = get_textValue('name')
    return [(username(node), text(node), create_time(node), id_str(node))
                for node in sts][::-1]

def refresh():
    global last_id
    print last_id
    res = opener.open(friends_timeline_url)
    doc = xml.dom.minidom.parseString(res.read())
    posts = parseDom(doc)
    id_list = [ids for (_, _, _, ids) in posts]
    if last_id in id_list:
        id_list = id_list[id_list.index(last_id)+1:]
        winsound.MessageBeep(winsound.MB_ICONASTERISK)
    print 
    for (user, txt, tm, ids) in posts:
        print "{%s}: %s [%s]" % (user, txt, tm.split()[3])
    last_id = posts[-1][-1]
    print 'MyMsg:',

def refresh_thread(times=0, timeout=60):
    f = False
    if times== 0:
        f = True
    while f or times:
        refresh()
        time.sleep(timeout)
    
def post_statue(msg, reply=''):
    if not msg:
        return
    #msg = unicode(msg, 'gbk')
    data = {'status' : msg,
            'in_reply_to_status_id' : reply,
            'source' : 'ffsh'}
    data = urllib.urlencode(data)
    opener.open(statuses_update_url, data)
  

thread.start_new_thread(refresh_thread, (0,30))

while True:
    mymsg = raw_input('MyMsg: ')
    post_statue( mymsg ) 
    

2009年2月22日星期日

用 python 暴百度贴吧

当然, 只是原型, 但基本功能有了. 目前在刷回帖的时候会有乱码, 编码问题需要解决. 成果展示: ---------------------------------------------------------------


#!/usr/bin/python
# -*- coding:utf-8 -*-

import urllib2
import urllib
import socket
import libxml2dom
import Image
import time



def domToQueryDict(doc, formIndex=0):
   data = {}
   form = doc.getElementsByTagName('form')[formIndex]
   nodes = form.getElementsByTagName('input')
   nodes.extend(form.getElementsByTagName('textarea'))
   for node in nodes:
       if node.hasAttribute('name') and node.getAttribute('type')!= u"submit":
           if node.hasAttribute('value'):
               data[node.getAttribute('name')] = \
                   node.getAttribute('value').encode('gbk', 'ignore')
           else:
               data[node.getAttribute('name')] = ""
   return data

             
cookie_handler = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(cookie_handler)
urllib2.install_opener(opener)

# 获得登陆数据
url = "http://passport.baidu.com/?" + \
   "login&tpl=tb&u=http%3A//tieba.baidu.com/f%3Fkw%3Dpython"
doc = libxml2dom.parseString(opener.open(url).read(), html=1,
                            htmlencoding='gbk')
data = domToQueryDict(doc)

# 用户名/密码设置
data[u'username'] = '用户名'
data[u'password'] = '密码'

# 验证图片处理
veryfyPic = opener.open("https://passport.baidu.com/?verifypic")
picfile = file("pic.jpg", 'wb')
picfile.write(veryfyPic.read())
picfile.close()
im = Image.open("pic.jpg")
im.show()
data["verifycode"] = raw_input("What you see?").strip()

# 登陆页面
data = urllib.urlencode(data)
b = opener.open(url, data)  # here login OK
print b.read()
print '-'*20

for i in xrange(20):
   # 分析主页数据
   # url = "http://tieba.baidu.com/f?kz=543530949"
   b = opener.open("http://tieba.baidu.com/f?kw=python&t=1")
   frontpage = b.read()
   doc = libxml2dom.parseString(frontpage, html=1, htmlencoding='gbk')
             
   data = domToQueryDict(doc, 1)
   print "*",
   # 发贴标题及内容
   data[u'ti'] = "Fledna 自爆专用~!!!!!!!!!!!!!!"+str(i)
   data[u'co'] = "For Test only. " + str(i)
   print data[u'ti']
   data = urllib.urlencode(data)
   post_url = "http://tieba.baidu.com/f"
   b = opener.open(post_url, data)
   time.sleep(20)