Python抓取网站图片

脚本内容如下:

	import urllib.request, os, re, urllib

	#获取html页面
	def getHtml(url):
		page = urllib.request.urlopen(url)
		html = page.read()
		return html.decode('UTF-8')

	def getImg(html):

		'''获取图片url'''
		# reg = r'src="(.+?\.png)" pic_ext'
		reg = r'src="(.+?\.png)"'
		imgre = re.compile(reg)

		imglist = imgre.findall(html)
		x = 1	
		path = 'images-2'

		#判断path目录是否存在,如果不存在,则创建
		if not os.path.isdir(path):
			os.makedirs(path)

		paths = path+'/'

		for imgurl in imglist:
			#下载图片,并设置图片命名格式
			urllib.request.urlretrieve(imgurl,'{0}{1}.png' .format(paths,x))
			print("It's start %s" % x) #显示信息
			x += 1
		return imglist

	# html = getHtml('http://tieba.baidu.com/p/3840085725')
	html = getHtml('https://findicons.com/pack/2787/beautiful_flat_icons')
	getImg(html)

最后更新于