百度搜索引擎取真实地址-python代码
代码 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 def parseBaidu(keyword, pagenum): keywordsBaseURL = 'https://www.baidu.com/s?wd=' + str(quote(keyword)) + '&oq=' + str(quote(keyword)) + '&ie=utf-8' + '&pn=' pnum = 0 while pnum <= int(pagenum): baseURL = keywordsBaseURL + str(pnum*10) try: request = requests.get(baseURL, headers=headers) soup = BeautifulSoup(request.text, "html.parser") for a in soup.select('div.c-container > h3 > a'): url = requests.get(a['href'], headers=headers).url yield url except: yield None finally: pnum += 1 ...