This article brings you a detailed introduction (code example) about Python multi-threading. It has certain reference value. Friends in need can refer to it. I hope it will be helpful to you.
global interpreter lock (cpython)
Only one thread is running on one CPU at the same time executing bytecode (multiple threads cannot be mapped to multiple CPUs)
import dis def add(a): a = a + 1 return a print(dis.dis(add))
The results are different each time. Security issues between threads
GIL will be released based on the number of direct code lines or time slices executed. Release GIL
Actively release when encountering IO operations
total = 0 def add(): #1. dosomething1 #2. io操作 # 1. dosomething3 global total for i in range(1000000): total += 1 def desc(): global total for i in range(1000000): total -= 1 import threading thread1 = threading.Thread(target=add) thread2 = threading.Thread(target=desc) thread1.start() thread2.start() thread1.join() thread2.join() print(total)
The smallest unit that the operating system can schedule is the process, because the process consumes a lot of system resources. Large, so it later evolved into threads. Threads actually depend on our processes (what we can actually see in the task manager are actually processes). The smallest unit that the operating system can schedule is a thread.
For programming that focuses on io operations, the performance difference between multi-process and multi-first output is not big. The performance of multi-threading is even higher than that of multi-process, because multi-threaded programming is more lightweight.
import time from threading import Thread def get_detail_html(url): print("get detail html started") time.sleep(2) print("get detail html end") def get_detail_url(url): print("get detail url started") time.sleep(4) print("get detail url end") if __name__ == '__main__': thread1 = Thread(target=get_detail_html, args=("",)) thread2 = Thread(target=get_detail_url, args=("",)) # 设置为守护线程 当主线程运行完时 子线程被kill掉 thread1.setDaemon(True) thread2.setDaemon(True) start_time = time.time() thread1.start() thread2.start() # 设置为阻塞 等待线程运行完再关闭主线程 thread1.join() thread2.join() # 默认情况下 主线程退出与时 子线程不会被kill掉 print("last time: {}".format(time.time() - start_time))
import time import threading def get_detail_html(url): print("get detail html started") time.sleep(2) print("get detail html end") def get_detail_url(url): print("get detail url started") time.sleep(4) print("get detail url end") #2. 通过集成Thread来实现多线程 class GetDetailHtml(threading.Thread): def __init__(self, name): super().__init__(name=name) def run(self): print("get detail html started") time.sleep(2) print("get detail html end") class GetDetailUrl(threading.Thread): def __init__(self, name): super().__init__(name=name) def run(self): print("get detail url started") time.sleep(4) print("get detail url end") if __name__ == "__main__": thread1 = GetDetailHtml("get_detail_html") thread2 = GetDetailUrl("get_detail_url") start_time = time.time() thread1.start() thread2.start() thread1.join() thread2.join() #当主线程退出的时候, 子线程kill掉 print ("last time: {}".format(time.time()-start_time))
Use queue
# filename: thread_queue_test.py # 通过queue的方式进行线程间同步 from queue import Queue import time import threading def get_detail_html(queue): # 死循环 爬取文章详情页 while True: url = queue.get() # for url in detail_url_list: print("get detail html started") time.sleep(2) print("get detail html end") def get_detail_url(queue): # 死循环 爬取文章列表页 while True: print("get detail url started") time.sleep(4) for i in range(20): # put 等到有空闲位置 再放入 # put_nowait 非阻塞方式 queue.put("http://projectsedu.com/{id}".format(id=i)) print("get detail url end") # 1. 线程通信方式- 共享变量 if __name__ == "__main__": detail_url_queue = Queue(maxsize=1000) thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,)) for i in range(10): html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,)) html_thread.start() start_time = time.time() # 调用task_down从主线程退出 detail_url_queue.task_done() # 从queue的角度阻塞 detail_url_queue.join() print("last time: {}".format(time.time() - start_time))
Problems that must be faced in multi-threaded programming
# 没有锁 def add1(a): a += 1 def desc1(a): a -= 1 """add 1. load a a = 0 2. load 1 1 3. + 1 4. 赋值给a a=1 """ """add 1. load a a = 0 2. load 1 1 3. - 1 4. 赋值给a a=-1 """ import dis print(dis.dis(add1)) print(dis.dis(desc1))
Using locks will affect performance, and locks will cause deadlock (acquiring the lock twice, not releasing it after acquiring the lock, waiting for each other (a needs b's resources b needs a's resources))
import threading from threading import Lock total = 0 # 定义一把锁 lock = Lock() def add(): global total global lock for i in range(1000000): # 获取锁 lock.acquire() total += 1 # 释放锁 lock.release() def desc(): global total for i in range(1000000): lock.acquire() total -= 1 lock.release() thread1 = threading.Thread(target=add) thread2 = threading.Thread(target=desc) thread1.start() thread2.start() thread1.join() thread2.join() print(total)
""" A(a、b) acquire (a) acquire (b) B(a、b) acquire (b) acquire (a) # 解决办法 B(a、b) acquire (a) acquire (b) """
import threading from threading import RLock total = 0 # 可重入锁 可以在同一个线程中可载入多次 lock = RLock() def add(lock): global total for i in range(1000000): # 获取锁 lock.acquire() lock.acquire() total += 1 do_something(lock) # 释放锁 lock.release() lock.release() def desc(): global total for i in range(1000000): lock.acquire() total -= 1 lock.release() def do_something(lock): lock.acquire() # do something lock.release() thread1 = threading.Thread(target=add) thread2 = threading.Thread(target=desc) thread1.start() thread2.start() thread1.join() thread2.join() print(total)
Used for complex inter-thread synchronization
# 没有条件锁 不能实现对话 import threading class XiaoAi(threading.Thread): def __init__(self, lock): super().__init__(name="小爱") self.lock = lock def run(self): self.lock.acquire() print("{} : 在 ".format(self.name)) self.lock.release() self.lock.acquire() print("{} : 好啊 ".format(self.name)) self.lock.release() class TianMao(threading.Thread): def __init__(self, lock): super().__init__(name="天猫精灵") self.lock = lock def run(self): self.lock.acquire() print("{} : 小爱同学 ".format(self.name)) self.lock.release() self.lock.acquire() print("{} : 我们来对古诗吧 ".format(self.name)) self.lock.release() if __name__ == "__main__": cond = threading.Condition() xiaoai = XiaoAi(cond) tianmao = TianMao(cond) xiaoai.start() tianmao.start()
# 条件锁 import threading class XiaoAi(threading.Thread): def __init__(self, cond): super().__init__(name="小爱") self.cond = cond def run(self): with self.cond: self.cond.wait() print("{} : 在 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 好啊 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 君住长江尾 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 共饮长江水 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 此恨何时已 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 定不负相思意 ".format(self.name)) self.cond.notify() class TianMao(threading.Thread): def __init__(self, cond): super().__init__(name="天猫精灵") self.cond = cond def run(self): with self.cond: print("{} : 小爱同学 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 我们来对古诗吧 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 我住长江头 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 日日思君不见君 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 此水几时休 ".format(self.name)) self.cond.notify() self.cond.wait() print("{} : 只愿君心似我心 ".format(self.name)) self.cond.notify() self.cond.wait() if __name__ == "__main__": from concurrent import futures cond = threading.Condition() xiaoai = XiaoAi(cond) tianmao = TianMao(cond) # 启动顺序很重要 # 在调用with cond之后才能调用wait或者notify方法 # condition有两层锁, 一把底层锁会在线程调用了wait方法的时候释放, # 上面的锁会在每次调用wait的时候分配一把并放入到cond的等待队列中, # 等到notify方法的唤醒 xiaoai.start() tianmao.start()
The above is the detailed content of Detailed introduction to python multithreading (code example). For more information, please follow other related articles on the PHP Chinese website!