urllib とか BeautifulSoupでのスクレイピングとかの練習を兼ねて作ってみた。何に使うかは想像にお任せする;p
# coding: utf-8 import urllib import re import argparse import os from BeautifulSoup import BeautifulSoup script_version = 'v0.0.1' re_image = re.compile(".+\.(jpg|png|gif)") def get_linked_images(soup): for a in soup("a"): for i in a("img"): a2 = i.parent if re_image.match(a2["href"]): image = a2["href"] file = url_to_filename(image) try: print image urllib.urlretrieve(image, file) except IOError: pass def get_embeded_images(soup): for i in soup("img"): image = i["src"] if re_image.match(image): file = url_to_filename(image) try: print image urllib.urlretrieve(image, file) except IOError: pass def url_to_filename(url): filename = url.split('/')[-1] filename = re.sub('\?.+', '', filename) if args.dir: filename = os.path.join(args.dir, filename) return filename parser = argparse.ArgumentParser(description="Download images from web page.") parser.add_argument('url', metavar='URL', nargs='?', action='store', help='specify URL.') parser.add_argument('-v', '--version', dest='version', action='store_true', help='show version and exit') parser.add_argument('-e', '--embeded-image', dest='embeded', action='store_true', help='download embeded images(default)') parser.add_argument('-l', '--linked-image', dest='linked', action='store_true', help='download linked images') parser.add_argument('-d', '--dir', dest='dir', metavar='DIR', action='store', help='download into DIR') args = parser.parse_args() if args.version: print script_version exit() url = args.url if args.dir: os.makedirs(args.dir) print "Download images from " + url + "\n" res = urllib.urlopen(url).read() soup = BeautifulSoup(res) if args.linked: get_linked_images(soup) else: get_enbeded_images(soup)