账号密码登录
微信安全登录
微信扫描二维码登录

登录后绑定QQ、微信即可实现信息互通

手机验证码登录
找回密码返回
邮箱找回 手机找回
注册账号返回
其他登录方式
分享
  • 收藏
    X
    pythpn多线程爬虫queue队列无法返回数据
    20
    0

    import requests
    from lxml import html
    from requests.exceptions import RequestException
    import time
    import queue
    import threading

    class MyThread(threading.Thread):

    def __init__(self, func):
        threading.Thread.__init__(self)
        self.func = func
    
    def run(self):
        self.func()
    

    def worker():

    while not q.empty():
        page = q.get()  # 获得任务
        print('成功获取 : 第' + str(page) + '页url列表')
        headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
        main_page_url = 'https://www.qiushibaike.com/hot/page/' + str(page)
        url_list = []
        try:
            rep = requests.get(main_page_url, headers = headers)
            time.sleep(1)
            if rep.status_code == 200:
                print("第" +str(page) + "页链接成功")
                con = rep.content
                sel = html.fromstring(con)
                urls = sel.xpath('//a[@class="contentHerf"]/@href')
                for url in urls:                    
                    message_url = 'https://www.qiushibaike.com' + url
                    url_list.append(message_url)
                print(url_list)
                **#return url_list**
                                
        except RequestException:
            print("链接失败")
            return None
        time.sleep(1)
    

    def main():

    threads = []
    #all_url = []
    #url_list = worker()
    for page in range(1, 7):    #爬前6页
        q.put(page)
    for i in range(threadNum): #开启2个线程
        thread = MyThread(worker)
        thread.start()
        #all_url.append(url_list)
        threads.append(thread)
    for thread in threads:
        thread.join()   #运行2个线程后再运行2个线程
    

    if name == '__main__':

    q = queue.Queue()
    threadNum = 2   #线程数量 
    main()
    

    worker里面每次return就只运行前两个线程然后结束 正常来说 是运行两个线程之后继续运行两个线程。还是我返回数据的方法不对 望解答

    0
    打赏
    收藏
    点击回答
        全部回答
    • 0
    • 放ジ荡〆 普通会员 1楼
      502 Bad Gateway

      502 Bad Gateway


      nginx
    更多回答
    扫一扫访问手机版
    • 回到顶部
    • 回到顶部