import requestsrespones=requests.get(url='http://www.baidu.com')respones.encoding='UTF-8'print(respones.text)with open ('badui.html','w',encoding='UTF-8')as f: f.write(respones.text)#爬baidu主页写成HTML
https://movie.douban.com/top250?start=0&filter=https://movie.douban.com/top250?start=25&filter=https://movie.douban.com/top250?start=50&filter=1.发送请求2.解析数据3.保存数据'''import requestsimport re# 爬虫三部曲# 1.发送请求def get_page(base_url): response = requests.get(base_url) return response# 2.解析文本def parse_index(text): res = re.findall(' .*?
(.*?).*?
.*?(.*?).*?导演:(.*?).*?(.*?).*?(.*?)人评价.*?(.*?)', text, re.S) # print(res) return res# 3.保存数据def save_data(data): with open('douban.txt', 'a', encoding='utf-8') as f: f.write(data)# main + 回车键if __name__ == '__main__': # num = 10 # base_url = 'https://movie.douban.com/top250?start={}&filter='.format(num) num = 0 for line in range(10): base_url = f'https://movie.douban.com/top250?start={num}&filter=' num += 25 print(base_url) # 1.发送请求,调用函数 response = get_page(base_url) # 2.解析文本 movie_list = parse_index(response.text) # 3.保存数据 # 数据的格式化 for movie in movie_list: # print(movie) # 解压赋值 # 电影排名、电影url、电影名称、导演 - 主演 - 类型、电影评分、评价人数、电影简介 v_top, v_url, v_name, v_daoyan, v_point, v_num, v_desc = movie # v_top = movie[0] # v_url = movie[1] moive_content = f''' 电影排名: {v_top} 电影url: {v_url} 电影名称: {v_name} 导演主演: {v_daoyan} 电影评分: {v_point} 评价人数: {v_num} 电影简介: {v_desc} \n ''' print(moive_content) # 保存数据 save_data(moive_content)