python豆瓣电影爬虫课程设计,python爬取网页视频的代码

本文中的示例介绍了python实现的攀登电影下载链接的功能。分享仅供参考。具体如下。

#！ /usr/时尚八宝粥/python

#编码=utf-8

导入系统

import urllib2

导入操作系统

导入订单

来自bs4导入基础软件

重新加载(sys ) )。

sys.set default编码(' utf-8 ' ) )。

#从电影html页面获取视频下载地址

defget_movie_download_URL(html ) :

soup=beautifulsoup(html，' html.parser ' ) )。

fixed_html=soup.prettify (

TD=soup.find(TD )，attrs ) ) style ) : ) word-wrap:break-word ) )

URL_a=TD.find('a ' ) )。

url_a=url_a.string

返回URL _ a

#从电影html页面获取电影标题

efget_movie_title(html ) :

soup=beautifulsoup(html，' html.parser ' ) )。

fixed_html=soup.prettify (

title=soup.find('H1 ' ) ) )。

title=title.string

返回标题

访问url并返回html页面

defget_html(URL ) :

req=urllib2.Request(url )

req.add_header('user-agent '，' Mozilla/5.0 ' ) )

response=urllib2.urlopen(url )

html=response.read (

返回html

#从电影列表页面获取电影的url，连接起来，保存到列表中后返回

defget_movie_list(URL ) :

m_list=[]

html=get_html(url )

soup=beautifulsoup(html，' html.parser ' ) )。

fixed_html=soup.prettify (

a_URLS=soup.find_all(a，attrs={'class':'ulink'} )

host='http://www.ygdy8.net '

for a_url in a_urls:

m_URL=a_URL.get(href ) ) )。

m_list.append(Hostm_URL ) )。

return m_list

保存到txt文件

deffile_edit(wr_str ) :

f1=open (r ' e : (down _ load _ URL.txt '，' a ' ) ) ) ) ) ) ) )

F1.write(wr_str )。

f1.close () )。

#收集电影链接列表，获取下载地址，写入文件

defwrite_to_txt(a_urls ) :

for a_url in a_urls:

html=get_html(a_URL )

html=html.decode(gbk ) ) )。

write _ title=get _ movie _ title (html )

write _ URL=get _ movie _ download _ URL (html )

文件_ edit (write _ title 'n ' )

file_edit(write_URL'n ' )。

file_edit('n ' )。

#传递页数并返回这些页面的url列表

defget_pages_URL(num ) :

urls_list=[]

URL=' http://www.ygdy8. net/html/gndy/dyzz/list _ 23 _ '

forninrange(1，num 1) :

new_URL=URLstr(n ) '.html '

urls_list.append(new_url )

return urls_list

if __name__=='__main__':

pages=2 #我打算爬几页电影

p_url=get_pages_url(pages )

for i in p_url:

write_to_txt(get_movie_list(I ) )写入执行

print 'done '

本文希望对大家的Python编程有所帮助。