问题背景
最近项目中遇到个需求, 主动杀死多个正在运行或睡眠的线程, 便于明确管理回收资源;
首先想到的是用 pthread_cancel() 杀死指定线程;
1. 获取 linux 线程 ID
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
//#include <pthread.h>
void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID
//sleep(100);
return NULL;
}
int main(int argc,char* argv[])
{
pthread_t tid;
printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
pthread_create(&tid,NULL,thread,NULL);
printf("child's tid=%lu\n",tid);
sleep(100); //wait child
return 0;
}
执行结果:
(base) leon@sys:~/work/2022$ ./a.out
main thread id=4241463104
child's tid=140329412953856
child thread id=4241458944
发现主线程打印的 ID,与 thread 线程打印 ID 并不相等,加上头文件 pthread.h
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID
//sleep(100);
return NULL;
}
int main(int argc,char* argv[])
{
pthread_t tid;
printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
pthread_create(&tid,NULL,thread,NULL);
printf("child's tid=%lu\n",tid);
sleep(100); //wait child
return 0;
}
结果显示 ID 相同了:
(base) leon@sys:~/work/2022$ ./a.out
main thread id=140524112881472
child's tid=140524112877312
child thread id=140524112877312
2. 指定线程名字
为便于调试, 给线程添加命名,给当前线程命名, 执行如下代码
char buf[256];
snprintf(buf,256,"album_thread_pid%d__tid%lu\n\n",getpid(), pthread_self());
printf("%s",buf);
prctl(PR_SET_NAME,buf);
3. 对指定线程调用 pthread_cancel()
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/prctl.h>
void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID
char buf[256];
snprintf(buf,256,"thread_tid%lu\n\n",getpid(), pthread_self());
printf("%s",buf);
prctl(PR_SET_NAME,buf);
while(1) {;}
return (void *)0;
}
int main(int argc,char* argv[])
{
pthread_t tid;
void *ret =NULL;
printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
pthread_create(&tid,NULL,thread,NULL);
printf("child's tid=%lu\n",tid);
sleep(1);
pthread_cancel(tid);
pthread_join(tid, &ret);
printf("thread %lu exit code %d\n",(int)ret);
sleep(100);
return 0;
}
执行结果:
(base) leon@sys:~/work/2022$ ./a.out
main thread id=140480980535104
child's tid=140480980530944
child thread id=140480980530944
thread_pid139972__tid140480980530944
top 查看线程信息,thread_tidxx 并没有被杀死:
MiB Mem : 15942.9 total, 478.4 free, 2259.4 used, 13205.1 buff/cache
MiB Swap: 2048.0 total, 2041.5 free, 6.5 used. 13248.4 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
139990 leon 20 0 10952 1764 1640 R 99.9 0.0 0:38.44 thread_tid13980
421 root 20 0 8020 4792 3012 S 0.7 0.0 35:04.44 plymouthd
原因 :pthread_cancel 调用并不等待线程终止,它只提出请求。只有在被取消线程下次系统调用时,才会真正结束线程。
如果线程里面没有执行系统调用,可以使用 pthread_testcancel 解决。
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/prctl.h>
void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID
char buf[256];
snprintf(buf,256,"thread_tid%lu\n\n", pthread_self());
printf("%s",buf);
prctl(PR_SET_NAME,buf);
while(1) {pthread_testcancel();
;
}
return (void *)0;
}
int main(int argc,char* argv[])
{
pthread_t tid;
void *ret =NULL;
printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
pthread_create(&tid,NULL,thread,NULL);
printf("child's tid=%lu\n",tid);
sleep(1);
pthread_cancel(tid);
pthread_join(tid, &ret);
printf("thread %lu exit code %d\n",(int)ret);
sleep(100);
return 0;
}
执行结果:
(base) leon@sys:~/work/2022$ ./a.out
main thread id=139980168910656
child's tid=139980168906496
child thread id=139980168906496
thread_tid139980168906496
thread 4294967295 exit code -1403026720
可见线程被成功杀死。
4. 多线程涉及同步锁时
场景延伸到多个线程,并且线程之间有同步机制,此时要杀线程,更复杂
#include <stdio.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#define MAX_REGISTER_THREAD (4)
pthread_t register_pid[MAX_REGISTER_THREAD];
pthread_mutex_t register_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t register_cond= PTHREAD_COND_INITIALIZER;
void thread_Register(char *arg)
{char buf[256];
snprintf(buf,256,"thread_tid%lu\n\n", pthread_self());
printf("%s",buf);
prctl(PR_SET_NAME,buf);
while(1) {pthread_mutex_lock(register_lock);
//...
pthread_cond_wait(register_cond, register_lock);
//...
pthread_mutex_unlock(register_lock);
printf("thread_pid%d__tid%lu\n\n",getpid(), pthread_self());
pthread_testcancel();}
}
int main()
{for (int i=0; i< MAX_REGISTER_THREAD;i++) {pthread_create(register_pid[i], NULL, thread_Register,NULL);
printf("---id[%d],Create ppid:%lu\n",i,register_pid[i]);
}
sleep(3);
pthread_cond_broadcast(register_cond);
for (int i=0; i < MAX_REGISTER_THREAD;i++) {printf("---cancel thread[%d],pid:%lu\n",i,register_pid[i]);
pthread_cond_broadcast(register_cond);
pthread_cancel(register_pid[i]);
pthread_join(register_pid[i],NULL);
}
sleep(10);
}
此时涉及两个问题:
1. 线程被唤醒后被杀死,持有的锁没释放,导致其他线程无法唤醒 (拿不到锁);
2. 循环调用 pthread_cancel 杀线程,但要杀的线程未必被唤醒;
似乎将问题愈加复杂化了,难怪 android 弃用了 pthread_cancel;
换个思路,将异步杀死线程的方法,换成线程自己退出;
(1) 可以解决上述循环调用 pthread_cancel 问题;
(2) 可以更好的管理同步锁等资源;
5 用 pthread_eixt() 退出线程
用一个全局变量控制逻辑,每个线程内部合理释放锁等资源,退出线程;
主线程,调用 pthread_join 回收线程资源 (否则线程栈等不会释放)
#include <stdio.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#define MAX_REGISTER_THREAD (4)
pthread_t register_pid[MAX_REGISTER_THREAD];
pthread_mutex_t register_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t register_cond= PTHREAD_COND_INITIALIZER;
int flag_destory=0;
void thread_Register(char *arg)
{char buf[256];
snprintf(buf,256,"thread_tid%lu\n\n", pthread_self());
printf("%s",buf);
prctl(PR_SET_NAME,buf);
while(1) {pthread_mutex_lock(register_lock);
//...
pthread_cond_wait(register_cond, register_lock);
if (flag_destory) {pthread_mutex_unlock(register_lock);
pthread_exit(NULL);
}
//...
pthread_mutex_unlock(register_lock);
printf("thread_pid%d__tid%lu\n\n",getpid(), pthread_self());
}
}
int main()
{for (int i=0; i< MAX_REGISTER_THREAD;i++) {pthread_create(register_pid[i], NULL, thread_Register,NULL);
printf("---id[%d],Create ppid:%lu\n",i,register_pid[i]);
}
sleep(3);
flag_destory = 1;
pthread_cond_broadcast(register_cond);
for (int i=0; i < MAX_REGISTER_THREAD;i++) {pthread_join(register_pid[i],NULL);
printf("phthread_join pid=%lu\n",register_pid[i]);
}
sleep(10);
}
执行结果:
(base) leon@sys:~/work/2022$ ./a.out
---id[0],Create ppid:140648202745600
thread_tid140648202745600
---id[1],Create ppid:140648194352896
thread_tid140648194352896
---id[2],Create ppid:140648185960192
thread_tid140648185960192
thread_tid140648177567488
---id[3],Create ppid:140648177567488
phthread_join pid=140648202745600
phthread_join pid=140648194352896
phthread_join pid=140648185960192
phthread_join pid=140648177567488
(base) leon@sys:~/work/2022$
可见,即正确释放了锁资源,也释放了线程资源,问题解决。