#Importing the modules
#import urllib.request
from urllib.request import urlopen
from urllib.request import urlretrieve
from urllib.request import URLError
from urllib import request
import urllib
import json
import re
def extract(text, sub1, sub2):
"""
extract a substring from text between first
occurances of substrings sub1 and sub2
"""
return text.split(sub1, 1)[-1].split(sub2, 1)[0]
url = "https://images.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E6%9A%B4%E8%B5%B0%E6%BC%AB%E7%94%BB&ct=201326592&ic=0&lm=-1&width=&height=&v=flip"
word = input("Input key word: ")
word=urllib.parse.quote(word)
url = ''.join(["https://images.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=",word,"&ct=201326592&ic=0&lm=-1&width=&height=&v=flip"])
headers = {'User-Agent': 'User-Agent:Mozilla/5.0'}
req = request.Request(url, headers=headers)
data = urlopen(req)
##data = urlopen(url)
charset=data.info().get_content_charset()
html = data.read()
encoding = extract(str(html).lower(), 'charset=', '"')
print('-'*50)
print( "Encoding type = %s" % encoding )
print('-'*50)
html = html.decode(encoding)
#data.close()
#reg = r'src="(.+?\.jpg)"'
#imgre = re.compile(reg)
#imglist = re.findall(imgre, html)
imglist = re.findall('"objURL":"(.*?)",',html,re.S)
for x, imgurl in enumerate(imglist):
print(imgurl)
try:
urlretrieve(imgurl, "%s.jpg" % x)
except URLError as e:
if e.code == 404:
print("4 0 4")
else:
print("%s" % e)
continue
您的打赏是对我最大的鼓励!