python3 爬取某乎”有一双大长腿是一种怎么样的体验?”的图片

import requests

headers = {
    "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36",
    "sec-ch-ua":'"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
    "cookie":'zap=3670ff93-c941-43ef-8ace-21ad83571b78; d_c0="AKAb6PEScxKPTkbBuXCZX69mxv3JTCEBo-o=|1609740130"; _xsrf=ZX3RAatNtTVmSwddO8FaFNobeO27gPqd; __snaker__id=KVBSWmR2uta6A5Rb; _9755xjdesxxd_=32; YD00517437729195%3AWM_NI=HPXZShcTzhYSGF3IJijRVStJZ7Xu8fC7bmisS4dgsaVmZkFYeeFxQGa%2F9g4vKBcU6pyTxtirJuMm%2Bxgal5wl4HH0C4adbReYb8Zhy1%2B2PsLg9XDUbPq6UB8B%2F3lV3jvcT0o%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eea6ca6bfca6a688d35f95928fa2d55a969a9aafaa649c8aaab0aa54ae8786ccd72af0fea7c3b92a8c9098bbd264a18cb689dc59859afdaac963aa8e9ab4f7429ae99896bb6e81e9ae8eec7991ecac88f26b96b3968cc553a7bb8a99d44da3f1f788b77e899eadb6b57cbb8cb895ec7a8aab838eaa74a5bfa4d6f668f89888d1c240a3bcaeb3e853a2e89689fc3be9eea094cf6dfb888cb6e15cb3ea9ba3bb4aededa4b7b262b6bd9cd1ee37e2a3; YD00517437729195%3AWM_TID=8NGjvT1dKpZFVEEVVQd%2BxGFOLvXncc%2FS; l_n_c=1; r_cap_id="YzJjMTkzYjgxODQ5NDIzNDk2MWRlNTQxZjBjMzI1MDk=|1616039964|3f2841310d1068c4ce9b87295c21d01814df1b5b"; cap_id="N2VmYjkwYTFkMmFmNDUyMWFmNWJhYmU3NWQwNDgxYzc=|1616039964|29231f10e2e3248b3f4152177a6c8635544d9603"; l_cap_id="N2M5ZDI0OWRlYjUwNDE0YzgxZjViZjU0NGE2N2QzNWQ=|1616039964|2ee6600999064ae976f067d038d87c00d96eafb0"; n_c=1; auth_type=cXFjb25u|1616039967|898d4d871fc9dcdbd74169e0dcd00b79fb8aa844; token="MjMzNENDRUNCOEZCRjI3MzcyMDVBMjJCNzNFRTIyMTU=|1616039967|c5796ad5f1d193ea50ceda306a127d63b815ffba"; client_id="MjUwNjlCNTVCQjk2RDY1NEJDNDM3NzZDOEQzMDFBQzE=|1616039967|3bbf39532b12b9928a0474b84ff3174767af787a"; capsion_ticket="2|1:0|10:1616039967|14:capsion_ticket|44:MDE5NzkyMzc0YTU1NDZmZDgyMjI4YjQyZWQyMTIwNDU=|fd4926a9409090d6e257da1c9e107b9d63ca4ab2fc4db598af9ceda407d264c9"; captcha_session_v2="2|1:0|10:1616046393|18:captcha_session_v2|88:djZYWVBISTdJdWVlbTQ3elRKZFB2cU5rR3BuWE9IQ2RMUVhXUDVrZndCMHNWNW55MnFsdHlaRlpBSkplSkZvcw==|c2cb990c6c001613247a84d08dc9623ac738c323abd981fe8d8aacd1005f6cd3"; SESSIONID=tLOvP4xWfoye3lxadCWh2lFH6rmI2D7SJyd0qbSJ3EV; JOID=W1oRBkhukwxMXHCrF2IyV3h8b9kIEdlCPAZP1UEC93MoaRbYcFl9WSBdfawQXkvH23gIAc6RdAFy8FMOVbBpbFw=; osd=UFgVBk1lkQhMWXupE2I3XHp4b9wDE91COQ1N0UEH_HEsaRPTcl19XCtfeawVVUnD230DA8qRcQpw9FMLXrJtbFk=; gdxidpyhxdE=2G%2Bb6wbUXZ4qiexpHrPHb3plG4AXM2AlVJNq0%5C%2BQTDJ2tMtXeMgRV9OBB%5CW3Pxm4TwqgHVgXUIkUpuoSwPhLwd%2B0xxjS2zDvD1P83SjCuObcsIYbUtjaJEVGdd0UgpxQ7aRkt0aU2DcbPKsO%5CUfXpiWEXWpC7bXKXgn9nJEGU8yWjuDO%3A1616047294185; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1616047376; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1616046415,1616046440,1616046470,1616047376; KLBRSID=5430ad6ccb1a51f38ac194049bce5dfe|1616050531|1616046393'
}

def download(url):
    file_name = str(url).split('/')[-1] #从url获取文件名
    r = requests.get(url, stream=True,headers=headers) #请求链接
    with open('test\%s'%file_name, 'wb+') as f: #创建文件
        f.write(r.content) #写入图片
    print("%s 下载完成..."%url) #打印信息

def spider(questions,offset): #接受参数 问题id,数量为20
    url = "https://www.zhihu.com/api/v4/questions/{}/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cattachment%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Cis_labeled%2Cpaid_info%2Cpaid_info_content%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_recognized%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics%3Bdata%5B%2A%5D.settings.table_of_content.enabled&limit=20&offset={}&platform=desktop&sort_by=default".format(questions,offset)
    respones = requests.get(url,headers=headers).json() #请求链接
    for i in range(20): #20写死,因为limit写的是20
        tmp_str = str(respones['data'][i]['content'])  # 获取到评论
        img_num = tmp_str.count('<noscript><img src="',0) #获取到图片数量
        for i in range(1,img_num + 1): #循环每一张图片
            img = tmp_str.split('<noscript><img src="')[i].split("?")[0] #切割出图片的地址
            download(img)

if __name__ == '__main__':
    question_id = input("输入链接: ")
    question_id = str(question_id).split("question/")[1].split("/")[0].strip()
    print(question_id)
    for i in range(0,100,20): #下载100张图片,步长为20,
        spider(question_id,i) #函数传参

图片[1]-python3 爬取某乎”有一双大长腿是一种怎么样的体验?”的图片-百分数

运行结果

图片[2]-python3 爬取某乎”有一双大长腿是一种怎么样的体验?”的图片-百分数

图片[3]-python3 爬取某乎”有一双大长腿是一种怎么样的体验?”的图片-百分数

© 版权声明
THE END
喜欢就支持一下吧
点赞10
分享
评论 抢沙发