需求:python采集头条热搜词并自动去重保存为txt文件,服务器可以加一个定时采集脚本,就会一直采集头条热搜词
import requests import json url = "https://api.toutiaoapi.com/hot-event/hot-board/?origin=hot_board" response = requests.get(url) data = json.loads(response.text) titles = [] for item in data['data']: titles.append(item['Title']) with open('toutiaoresou.txt', 'a+', encoding='utf-8') as f: f.seek(0) # 把文件指针移到文件开头 lines = f.readlines() # 读取所有行 existed_titles = set(line.strip() for line in lines) # 把已存在的标题做成集合 for title in titles: if title not in existed_titles: # 如果标题不存在于已存在的集合中,就追加到文件末尾 f.write(title + '\n')