做人呢,最紧要就系开心啦

pthread_cancel无法取消线程引起的若干问题思考

846次阅读
没有评论

问题背景

最近项目中遇到个需求, 主动杀死多个正在运行或睡眠的线程, 便于明确管理回收资源;

首先想到的是用 pthread_cancel() 杀死指定线程;

1. 获取 linux 线程 ID

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
//#include <pthread.h>

void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID
    //sleep(100);
     return NULL;
 }

 int main(int argc,char* argv[])
 {
     pthread_t tid;
     printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
     pthread_create(&tid,NULL,thread,NULL);
     printf("child's tid=%lu\n",tid);
     sleep(100); //wait child
     return 0;
 }

执行结果:

(base) leon@sys:~/work/2022$ ./a.out
main thread id=4241463104
child's tid=140329412953856
child thread id=4241458944

发现主线程打印的 ID,与 thread 线程打印 ID 并不相等,加上头文件 pthread.h

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>

void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID
    //sleep(100);
     return NULL;
 }

 int main(int argc,char* argv[])
 {
     pthread_t tid;
     printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
     pthread_create(&tid,NULL,thread,NULL);
     printf("child's tid=%lu\n",tid);
     sleep(100); //wait child
     return 0;
 }

结果显示 ID 相同了:

(base) leon@sys:~/work/2022$ ./a.out
main thread id=140524112881472
child's tid=140524112877312
child thread id=140524112877312

2. 指定线程名字

为便于调试, 给线程添加命名,给当前线程命名, 执行如下代码

char buf[256];
snprintf(buf,256,"album_thread_pid%d__tid%lu\n\n",getpid(), pthread_self());
printf("%s",buf);
prctl(PR_SET_NAME,buf);

3. 对指定线程调用 pthread_cancel()

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/prctl.h>

void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID

    char buf[256];
    snprintf(buf,256,"thread_tid%lu\n\n",getpid(), pthread_self());
    printf("%s",buf);
    prctl(PR_SET_NAME,buf);

    while(1) {;}
     return (void *)0;
 }

 int main(int argc,char* argv[])
 {
     pthread_t tid;
     void *ret =NULL;
     printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
     pthread_create(&tid,NULL,thread,NULL);
     printf("child's tid=%lu\n",tid);
     sleep(1);

     pthread_cancel(tid);
     pthread_join(tid, &ret);
     printf("thread %lu exit code %d\n",(int)ret);

     sleep(100); 
     return 0;
 }

执行结果:

(base) leon@sys:~/work/2022$ ./a.out 
main thread id=140480980535104
child's tid=140480980530944
child thread id=140480980530944
thread_pid139972__tid140480980530944

top 查看线程信息,thread_tidxx 并没有被杀死:

MiB Mem :  15942.9 total,    478.4 free,   2259.4 used,  13205.1 buff/cache
MiB Swap:   2048.0 total,   2041.5 free,      6.5 used.  13248.4 avail Mem 

    PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND                                                              
 139990 leon      20   0   10952   1764   1640 R  99.9   0.0   0:38.44 thread_tid13980                                                      
    421 root      20   0    8020   4792   3012 S   0.7   0.0  35:04.44 plymouthd       

原因 :pthread_cancel 调用并不等待线程终止,它只提出请求。只有在被取消线程下次系统调用时,才会真正结束线程。
如果线程里面没有执行系统调用,可以使用 pthread_testcancel 解决。

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/prctl.h>

void* thread(void* p)
{printf("child thread id=%lu\n",pthread_self());// 获取当前线程 ID

    char buf[256];
    snprintf(buf,256,"thread_tid%lu\n\n", pthread_self());
    printf("%s",buf);
    prctl(PR_SET_NAME,buf);

    while(1) {pthread_testcancel();
        ;
    }
     return (void *)0;
 }

 int main(int argc,char* argv[])
 {
     pthread_t tid;
     void *ret =NULL;
     printf("main thread id=%lu\n",pthread_self());// 获取当前线程 ID
     pthread_create(&tid,NULL,thread,NULL);
     printf("child's tid=%lu\n",tid);
     sleep(1);

     pthread_cancel(tid);
     pthread_join(tid, &ret);
     printf("thread %lu exit code %d\n",(int)ret);

     sleep(100); 
     return 0;
 }

执行结果:

(base) leon@sys:~/work/2022$ ./a.out 
main thread id=139980168910656
child's tid=139980168906496
child thread id=139980168906496
thread_tid139980168906496

thread 4294967295 exit code -1403026720

可见线程被成功杀死。

4. 多线程涉及同步锁时

场景延伸到多个线程,并且线程之间有同步机制,此时要杀线程,更复杂

#include <stdio.h>
#include <math.h>
#include <sys/types.h>  
#include <sys/stat.h>  
#include <dirent.h>  
#include <sys/prctl.h>

#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>

#define MAX_REGISTER_THREAD (4)

pthread_t register_pid[MAX_REGISTER_THREAD];

pthread_mutex_t register_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t register_cond= PTHREAD_COND_INITIALIZER;

void thread_Register(char *arg)
{char buf[256];
    snprintf(buf,256,"thread_tid%lu\n\n", pthread_self());
    printf("%s",buf);
    prctl(PR_SET_NAME,buf);

    while(1) {pthread_mutex_lock(register_lock);
        //...
        pthread_cond_wait(register_cond, register_lock);    
        //...
        pthread_mutex_unlock(register_lock);    

        printf("thread_pid%d__tid%lu\n\n",getpid(), pthread_self());
        pthread_testcancel();}

}
int main()
{for (int i=0; i< MAX_REGISTER_THREAD;i++) {pthread_create(register_pid[i], NULL, thread_Register,NULL);
        printf("---id[%d],Create ppid:%lu\n",i,register_pid[i]);
    }

    sleep(3);
    pthread_cond_broadcast(register_cond);

    for (int i=0; i < MAX_REGISTER_THREAD;i++) {printf("---cancel thread[%d],pid:%lu\n",i,register_pid[i]);
        pthread_cond_broadcast(register_cond);
        pthread_cancel(register_pid[i]);        
        pthread_join(register_pid[i],NULL);  
    }
    sleep(10);
}

此时涉及两个问题:
1. 线程被唤醒后被杀死,持有的锁没释放,导致其他线程无法唤醒 (拿不到锁);
2. 循环调用 pthread_cancel 杀线程,但要杀的线程未必被唤醒;

似乎将问题愈加复杂化了,难怪 android 弃用了 pthread_cancel;

换个思路,将异步杀死线程的方法,换成线程自己退出;
(1) 可以解决上述循环调用 pthread_cancel 问题;
(2) 可以更好的管理同步锁等资源;

5 用 pthread_eixt() 退出线程

用一个全局变量控制逻辑,每个线程内部合理释放锁等资源,退出线程;
主线程,调用 pthread_join 回收线程资源 (否则线程栈等不会释放)

#include <stdio.h>
#include <math.h>
#include <sys/types.h>  
#include <sys/stat.h>  
#include <dirent.h>  
#include <sys/prctl.h>

#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>

#define MAX_REGISTER_THREAD (4)

pthread_t register_pid[MAX_REGISTER_THREAD];

pthread_mutex_t register_lock = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t register_cond= PTHREAD_COND_INITIALIZER;
int flag_destory=0;

void thread_Register(char *arg)
{char buf[256];
    snprintf(buf,256,"thread_tid%lu\n\n", pthread_self());
    printf("%s",buf);
    prctl(PR_SET_NAME,buf);

    while(1) {pthread_mutex_lock(register_lock);
        //...
        pthread_cond_wait(register_cond, register_lock);    
        if (flag_destory) {pthread_mutex_unlock(register_lock);    
            pthread_exit(NULL);
        }
        //...
        pthread_mutex_unlock(register_lock);    

        printf("thread_pid%d__tid%lu\n\n",getpid(), pthread_self());
    }
}
int main()
{for (int i=0; i< MAX_REGISTER_THREAD;i++) {pthread_create(register_pid[i], NULL, thread_Register,NULL);
        printf("---id[%d],Create ppid:%lu\n",i,register_pid[i]);
    }

    sleep(3);
    flag_destory = 1;
    pthread_cond_broadcast(register_cond);

    for (int i=0; i < MAX_REGISTER_THREAD;i++) {pthread_join(register_pid[i],NULL);  
        printf("phthread_join pid=%lu\n",register_pid[i]);
    }
    sleep(10);
}

执行结果:

(base) leon@sys:~/work/2022$ ./a.out 
---id[0],Create ppid:140648202745600
thread_tid140648202745600

---id[1],Create ppid:140648194352896
thread_tid140648194352896

---id[2],Create ppid:140648185960192
thread_tid140648185960192

thread_tid140648177567488

---id[3],Create ppid:140648177567488
phthread_join pid=140648202745600
phthread_join pid=140648194352896
phthread_join pid=140648185960192
phthread_join pid=140648177567488
(base) leon@sys:~/work/2022$ 

可见,即正确释放了锁资源,也释放了线程资源,问题解决。

正文完
 
admin
版权声明:本站原创文章,由 admin 2022-02-10发表,共计6325字。
转载说明:除特殊说明外本站文章皆由CC-4.0协议发布,转载请注明出处。
评论(没有评论)