是获取豆瓣电影top100的信息的
import requests
from bs4 import BeautifulSoup
def get_urls(url):
all_url=[]
for i in range(0,100,25):
if i== 0:
all_url.append(url)
else:
urls='https://movie.douban.com/top250?start={}&filter='.format(i)
all_url.append(urls)
return(all_url)
def get_details(url):
res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')
contents =soup.select('.info')
for content in contents:
names = content.select('.title')[0].text
links = content.select('a')[0]['href']
comments = content.select('.rating_num')[0].text
print(names,comments,links)
return
def get_movie(main_url):
all_urls = get_urls(main_url)
print(all_urls)
for url in all_urls:
contents=get_details(url)
print(contents)
main_url='https://movie.douban.com/top250'
info = get_movie(main_url)


import requests
from bs4 import BeautifulSoup
def get_urls(url):
all_url=[]
for i in range(0,100,25):
if i== 0:
all_url.append(url)
else:
urls='https://movie.douban.com/top250?start={}&filter='.format(i)
all_url.append(urls)
return(all_url)
def get_details(url):
res = requests.get(url)
soup = BeautifulSoup(res.text,'html.parser')
contents =soup.select('.info')
for content in contents:
names = content.select('.title')[0].text
links = content.select('a')[0]['href']
comments = content.select('.rating_num')[0].text
print(names,comments,links)
return
def get_movie(main_url):
all_urls = get_urls(main_url)
print(all_urls)
for url in all_urls:
contents=get_details(url)
print(contents)
main_url='https://movie.douban.com/top250'
info = get_movie(main_url)


