import
urllib
import
urllib2
import
os
import
re
import
sys
def
schedule(a,b,c):
per
=
100.0
*
a
*
b
/
c
if
per >
100
:
per
=
100
print
'%.2f%%'
%
per
def
getHtml(url):
page
=
urllib.urlopen(url)
html
=
page.read()
return
html
def
downloadImg(html, num, foldername):
picpath
=
'%s'
%
(foldername)
if
not
os.path.exists(picpath):
os.makedirs(picpath)
target
=
picpath
+
'/%s.jpg'
%
num
myItems
=
re.findall(
'<p><a href="http:\/\/www.mzitu.com/.*&#63;"><img src="(.*&#63;)" alt=".*&#63;"></a></p>'
,html,re.S)
print
'Downloading image to location: '
+
target
urllib.urlretrieve(myItems[
0
], target, schedule)
def
findPage(html):
myItems
=
re.findall(
'<span>(\d*)</span>'
, html, re.S)
return
myItems.pop()
def
findList(html):
myItems
=
re.findall(
'<h2><a href="http://www.mzitu.com/(\d*)" title="(.*&#63;)" target="_blank">.*&#63;</a></h2>'
, html, re.S)
return
myItems
def
totalDownload(modelUrl):
listHtml5
=
getHtml(modelUrl)
listCOntent
=
findList(listHtml)
for
list
in
listContent:
totalNum
=
findPage(html)
for
num
in
range
(
1
,
int
(totalNum)
+
1
):
if
num
=
=
1
:
html5
=
getHtml(url)
downloadImg(html5,
str
(num),
str
(
list
[
1
]))
else
:
html5
=
getHtml(url)
downloadImg(html5,
str
(num),
str
(
list
[
1
]))
if
__name__
=
=
'__main__'
:
for
model
in
range
(
1
,
int
(findPage(listHtml))
+
1
):
if
model
=
=
1
:
totalDownload(modelUrl)
else
:
totalDownload(modelUrl)
print
"Download has finished."