poll分析

    技术2022-05-20  58

    转自: http://hi.baidu.com/rwen2012/blog/item/3140db827b42f3bd6c8119df.html struct pollfd {     int fd;     //当前描述符     short events;     //进程关心的该描述符的事件     short revents;    //返回的事件 }; asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,             long timeout_msecs) {     s64 timeout_jiffies; //超时时间处理     if (timeout_msecs > 0) { #if HZ > 1000         /* We can only overflow if HZ > 1000 */         if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ)             timeout_jiffies = -1;         else #endif             timeout_jiffies = msecs_to_jiffies(timeout_msecs);     } else {         /* Infinite (< 0) or no (0) timeout */         timeout_jiffies = timeout_msecs;     } //实际处理函数     return do_sys_poll(ufds, nfds, &timeout_jiffies); } struct poll_list {     struct poll_list *next;     int len;     struct pollfd entries[0]; }; int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) {     struct poll_wqueues table;      int fdcount, err;      unsigned int i;     struct poll_list *head;      struct poll_list *walk;     /* Allocate small arguments on the stack to save memory and be        faster - use long to make sure the buffer is aligned properly        on 64 bit archs to avoid unaligned access */     long stack_pps[POLL_STACK_ALLOC/sizeof(long)]; //栈的分配会更快     struct poll_list *stack_pp = NULL;     //检查描述符个数是否超过系统的限制     /* Do a sanity check on nfds ... */     if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur)         return -EINVAL;     //首先是一个初始化工作, 主要初始化poll_table这个函数指针     poll_initwait(&table);     head = NULL;     walk = NULL;     i = nfds;     err = -ENOMEM;     //这个循环所作的工作就是将从用户传过来的多个pollfd结构信息拷贝到内核,         //由于可能结构的个数可能超过一页内存所能存储的范围,所以就用了循环来完成,         //每次拷贝一页内存能装载的个数。并且再将它们用链表链起来。     while(i!=0) {         struct poll_list *pp;         int num, size;         if (stack_pp == NULL)             num = N_STACK_PPS;         else             num = POLLFD_PER_PAGE; //这里保证kmalloc分配的空间不会超过一个页面         if (num > i)             num = i;         size = sizeof(struct poll_list) + sizeof(struct pollfd)*num;         //如果描述符的个数比较小时,或在比较大的时候,第一次会使用栈来存储         if (!stack_pp)              stack_pp = pp = (struct poll_list *)stack_pps;         else {             pp = kmalloc(size, GFP_KERNEL);              if (!pp)                 goto out_fds;         }         pp->next=NULL;         pp->len = num;         if (head == NULL)             head = pp;         else             walk->next = pp;         walk = pp;         if (copy_from_user(pp->entries, ufds + nfds-i,                  sizeof(struct pollfd)*num)) {             err = -EFAULT;             goto out_fds;         }         i -= pp->len;     }     //真正的POLL操作,返回的结果在head中     fdcount = do_poll(nfds, head, &table, timeout);     //双重循环,将事件拷贝回给用户空间     /* OK, now copy the revents fields back to user space. */     walk = head;     err = -EFAULT;     while(walk != NULL) {         struct pollfd *fds = walk->entries;         int j;         for (j=0; j < walk->len; j++, ufds++) {             if(__put_user(fds[j].revents, &ufds->revents))                 goto out_fds;         }         walk = walk->next;     }     err = fdcount;     if (!fdcount && signal_pending(current))         err = -EINTR;     //以下是释放空间 out_fds:     walk = head;     while(walk!=NULL) {         struct poll_list *pp = walk->next;         if (walk != stack_pp)             kfree(walk);         walk = pp;     }     poll_freewait(&table);     return err; } //这个函数就是将当前进程加入等待队列,这个等待队列由驱动或文件系统或网络协议栈来提供 //这个函数是由驱动的file->poll中调用poll_wait()来间接调用的。 /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,                 poll_table *p) {     struct poll_table_entry *entry = poll_get_entry(p);     if (!entry)         return;     get_file(filp);     entry->filp = filp;     entry->wait_address = wait_address;     init_waitqueue_entry(&entry->wait, current);     add_wait_queue(wait_address,&entry->wait); } void poll_initwait(struct poll_wqueues *pwq) {     //在poll()中初始化为__pollwait(),注意在epoll中又会不同     init_poll_funcptr(&pwq->pt, __pollwait);      pwq->error = 0;     pwq->table = NULL;     pwq->inline_index = 0; } =========================================== static int do_poll(unsigned int nfds, struct poll_list *list,            struct poll_wqueues *wait, s64 *timeout) {     int count = 0;     poll_table* pt = &wait->pt;     /* Optimise the no-wait case */     if (!(*timeout))   //进程不设超时         pt = NULL;     for (;;) {         struct poll_list *walk;         long __timeout;         也是一个双重循环,处理每个文件描述符事件         set_current_state(TASK_INTERRUPTIBLE);         for (walk = list; walk != NULL; walk = walk->next) {             struct pollfd * pfd, * pfd_end;             pfd = walk->entries;             pfd_end = pfd + walk->len;             for (; pfd != pfd_end; pfd++) {                 /*                 * Fish for events. If we found one, record it                 * and kill the poll_table, so we don't                 * needlessly register any other waiters after                 * this. They'll get immediately deregistered                 * when we break out and return.                 */                 if (do_pollfd(pfd, pt)) { //处理每个文件描述符                     count++;                     pt = NULL;                 }             }         }         //超时处理         /*         * All waiters have already been registered, so don't provide         * a poll_table to them on the next loop iteration.         */         pt = NULL;         if (count || !*timeout || signal_pending(current))             break;         count = wait->error;         if (count)             break;         if (*timeout < 0) {             /* Wait indefinitely */             __timeout = MAX_SCHEDULE_TIMEOUT;         } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) {             /*             * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in             * a loop             */             __timeout = MAX_SCHEDULE_TIMEOUT - 1;             *timeout -= __timeout;         } else {             __timeout = *timeout;             *timeout = 0;         }         //进程切换         __timeout = schedule_timeout(__timeout);         //进程被唤醒, 继续执行         if (*timeout >= 0)             *timeout += __timeout;     }     __set_current_state(TASK_RUNNING);     return count; } /* * Fish for pollable events on the pollfd->fd file descriptor. We're only * interested in events matching the pollfd->events mask, and the result * matching that mask is both recorded in pollfd->revents and returned. The * pwait poll_table will be used by the fd-provided poll handler for waiting, * if non-NULL. */ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) {     unsigned int mask;     int fd;     mask = 0;     fd = pollfd->fd;     if (fd >= 0) {         int fput_needed;         struct file * file;         file = fget_light(fd, &fput_needed);         mask = POLLNVAL;         if (file != NULL) {             mask = DEFAULT_POLLMASK;             //调用驱动或文件系统的poll函数, 是否将当前进程加入驱动的等待队列,                       //取决是file->poll()第二个参数是否为空.             if (file->f_op && file->f_op->poll)                 mask = file->f_op->poll(file, pwait);              /* Mask out unneeded events. */             mask &= pollfd->events | POLLERR | POLLHUP;             fput_light(file, fput_needed);         }     }     pollfd->revents = mask; //更新参数返回值     return mask; //如果可读/写返回非0值 } ================================= 驱动或文件系统的poll()实现原型: test_poll(struct file *filep, poll_table *wait) {     ...     poll_wait(filep, &dev->wait_queue_head, wait);     ...     if (dev->readable)         mask |= POLLIN | POLLRDNORM;     if (dev->writable)         mask |= POLLOUT | POLLWRNORM;     ... } static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) {     if (p && wait_address)         p->qproc(filp, wait_address, p); //这个函数就是上面又poll_initwait()初始化的__pollwait()了. }


    最新回复(0)