1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding:utf-8 -*-
import json
import socket
import urllib.error
import requests
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
import re
import datetime
import time
import os
import xlwings as xw
import pandas as pd
from pandas import Series, DataFrame
from icecream import ic

headers = {
'cookie': 'SESSIONID=NbZdSDM9vxjCm49QloVUek1p4SESon111VN5lO28kIn; JOID=UFsVAkiKe3CkNfAXfIk7aNWUKxBp0hE3kWC-TR7bCwudQoEkFfollsI18hV_tVbo9R13y8QFnN59RSz0mIJJkc8=; osd=V1kVB0uNeXChNvcVfIw4b9eULhNu0BEykme8TRvYDAmdR4IjF_oglcU38hB8slTo8B5wycQAn9l_RSn3n4BJlMw=; __snaker__id=ShIwuWTxIbqCt5mH; _xsrf=MRidZTLvBDfDoBlr8SxykMmNOIownvTz; _zap=5ab57ada-5fad-4c43-bad7-98ba7cfd8b70; d_c0=AaDYTqHYBxePThzIvRSgV3wVIYN8gFqkMO0=|1688443541; YD00517437729195%3AWM_TID=viqubj%2ByzcNEAQQBFQKQ10KuEzBFBq%2Fw; YD00517437729195%3AWM_NI=JwTUymyovorPwgUZoHdFS7xEKPwSVWvE2kGkegE2%2FXs%2BbaNEXzkGw%2BMTLU1Hnh0InX6PLo3JAh%2Fp4ipfCa29n8rZqShNVWeX92copvzUSz0mQJJ%2FrSn762KrYimXIdfHVVI%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6ee8fe564f6889b93c565a2bc8ab6d85a939a9badc56db4bf97b0e246f7b88eb2c82af0fea7c3b92a8aa99cb6fc67bbb1ba90f15cb5b99bbbee44a89a9dd8fb4f8990b9b4d55db69bfe8cc661a7b98bd3ae54b1f5ae82cd4f9ca6a5a8f6218f8c84b6b444aeb800aaed54f6908490b24283b8e191b763b4ba8d82b133aee8bebbf459ac929ab8e854fbeead90f37eba8b9d9bb367939686d0f159aee9fad3ca7a9bb7a3b9cc70a6989ab7cc37e2a3; __snaker__id=kU2jSIVgLwdBiosC; q_c1=07f28f63c8c341e38b8e77b1a3baadee|1709526862000|1709526862000; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1709175682,1709470462,1709526865; tst=r; SESSIONID=YiF7TOYcPqx8up2ja7VWg1rFOpIQJ4C0kxOFXpgAffB; JOID=VFkVCkpWcMxjjOUZYVkw0BgvMh5wCBiDXN-tQQsBBrVS8ZIsCgE3KAmM4xFgFBlzF9xHSAC72FpVUGsPRDJTT44=; osd=V10QAUxVdMloiuYdZFI20xwqORhzDB2IWtypRAAHBbFX-pQvDgQ8LgqI5hpmFx12HNpETAWw3llRVWAJRzZWRIg=; z_c0=2|1:0|10:1709526878|4:z_c0|80:MS4xY0RqOFNnQUFBQUFtQUFBQVlBSlZUWUZCeldiVDBweFVSTUppZ2t3cnN0ZWFEcHozbk9ZZTlRPT0=|a6c65f3e3c7bef8e605f054bb77d1b2a73ced4b9e47dc5c656ce5cc08975d3bc; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1709530226; unlock_ticket=AUASYgdHahcmAAAAYAJVTXlj5WVi2jfvOou3I0SLnFLDR_-HFDvhTA==; KLBRSID=d017ffedd50a8c265f0e648afe355952|1709530226|1709526862',#用自己的cookie
'referer':'https://www.zhihu.com/question/327436952/answer/1210845801',#就是问题网站的网址
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
}
#

def answer():
socket.setdefaulttimeout(20)
i = 0
while True:
url = 'https://www.zhihu.com/api/v4/answers/1210845801/root_comments?limit=20\u0026offset={}\u0026order=normal\u0026status=open'.format(
i)#主要的是这里,把1210845801这串改为网站的answersid即可
i += 20
try:
req = urllib.request.Request(url=url, headers=headers)
response = urllib.request.urlopen(req)
html = response.read()
jsonfile = json.loads(html)
next_page = jsonfile['paging']['is_end'] # is_end是用来判断评论是否为最后一页
print(next_page)
for data in jsonfile['data']:
id = data['id']
content = data['content']
author = data['author']['member']['name']
print(id, content, author)
response.close()
if next_page == True:
break
except urllib.error.URLError as e:
print(e.reason)
time.sleep(20)


answer()