Some of Python Librarys

requests

https://github.com/kennethreitz/requests

下载文件:

@echo(cfg['retry_times'])
def download(url, d_path):
    file_name = re.split(r"/|=", url)[-1]
    try:
        print 'Downloading %s ...' % url 
        with open(os.path.join(d_path, file_name), 'wb') as fn:
            # 如果有跳转,请去掉headers
            #request = requests.get(url, stream=True, headers=settings.request_headers, timeout=cfg['timeout'])
            request = requests.get(url, stream=True, timeout=cfg['timeout'])
            if not request.ok:
                print request.reason

            for block in request.iter_content(1024):
                if not block:
                    break
                fn.write(block)
        print 'Download %s Success' % url
        return True
    except (socket.error,urllib2.HTTPError) as msg:
        print 'Download %s Failed ' % url
        print msg
        return False

echo

https://github.com/itsnauman/echo

一个微型库, 只作一件事儿, 对失败的操作重试!

import requests
from pyecho import echo

@echo(10) # Retry function upto 10 times
def fetch():
    r = requests.get('http://google.com')
    return r.text

fetch()

pyquery

https://github.com/gawel/pyquery

和BeautifulSoup一样,分析html和xml页面

from pyquery import PyQuery as pq

doc = pq('<html><body><p>Hello World</p></body></html>')
print doc('p').text()