epoll 解析
來(lái)源:程序員人生 發(fā)布時(shí)間:2015-02-06 09:12:47 閱讀次數(shù):2976次
概述
epoll 實(shí)際上是
poll 的1種改進(jìn),它可以處理大批量的句柄。而 poll 又是select 的1種改進(jìn)。在select 中對(duì)所打開(kāi)的文件描寫(xiě)符個(gè)數(shù)有1定的限制,該限制由
FD_SETSIZE 設(shè)置(1般為 1024 或 2048), 而且內(nèi)核中的
select 的實(shí)現(xiàn)是采取輪詢來(lái)處理描文件描寫(xiě)符集,因此效力低。當(dāng)文件描寫(xiě)符集中的某個(gè)描寫(xiě)符處于可讀、可寫(xiě)或異常狀態(tài)時(shí),select 采取內(nèi)存拷貝方法通知用戶空間。因此, 在select 模型中文件描寫(xiě)符個(gè)數(shù)受限且效力低的問(wèn)題就很明顯。為了解決select
對(duì)文件描寫(xiě)符個(gè)數(shù)的限制,采取了 poll 模型,但是
poll 仍然不能解決 select 的效力問(wèn)題。所以,終究epoll 模型重新對(duì)poll 模型進(jìn)行改進(jìn) 。
epoll 的優(yōu)點(diǎn)以下所示:
- 處理大批量文件句柄:1個(gè)進(jìn)程可以處理大批量的文件句柄,可處理文件描寫(xiě)符的個(gè)數(shù)遠(yuǎn)大于 2048;
- 高效力:內(nèi)核實(shí)現(xiàn)中 epoll 是根據(jù)每一個(gè)描寫(xiě)符上面的回調(diào)函數(shù)實(shí)現(xiàn)的,并且只有處于活動(dòng)狀態(tài)的套接字才會(huì)主動(dòng)調(diào)用該回調(diào)函數(shù),其他不活動(dòng)的套接字其實(shí)不會(huì)去調(diào)用,因此,epoll 沒(méi)必要掃描全部文件描寫(xiě)符集,只需要掃描處于活動(dòng)狀態(tài)的文件描寫(xiě)符。所有大大減低了效力。
- 加快內(nèi)核與用戶的消息傳遞:epoll 是通過(guò)內(nèi)核與用戶空間mmap 同1塊內(nèi)存實(shí)現(xiàn)內(nèi)核與用戶之間消息的傳遞。
- 內(nèi)核微調(diào):可以根據(jù)運(yùn)行時(shí)所需內(nèi)存動(dòng)態(tài)調(diào)劑內(nèi)存大小。
epoll 系統(tǒng)調(diào)用
調(diào)用函數(shù) epoll_create 創(chuàng)建
epoll 文件描寫(xiě)符,該函數(shù)原型以下:
/* epoll 系統(tǒng)調(diào)用函數(shù) */
#include <sys/epoll.h>
/*
* 函數(shù)功能:創(chuàng)建epoll文件描寫(xiě)符;
* 返回值:若成功則返回新創(chuàng)建的文件描寫(xiě)符;
* 函數(shù)原型:
*/
int epoll_create(int size);
/*
* 參數(shù)size是epoll的最大文件描寫(xiě)符個(gè)數(shù);
* 在新的系統(tǒng)內(nèi)核中size已不被使用;
*/
當(dāng)創(chuàng)建好
epoll 文件描寫(xiě)符以后,接下來(lái)對(duì)需要監(jiān)聽(tīng)的相干套接字描寫(xiě)符進(jìn)行操作,由
epoll 操作函數(shù)
epoll_ctl 實(shí)現(xiàn),其原型以下:
/*
* 函數(shù)功能:操作某個(gè)epoll文件描寫(xiě)符;
* 返回值:若成功則返回0,否則出錯(cuò)返回⑴;
* 函數(shù)原型:
*/
int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
/*
* 參數(shù):
* epfd:由epoll_create創(chuàng)建的epoll文件描寫(xiě)符;
* fd:是關(guān)聯(lián)的文件描寫(xiě)符;
* op:是操作方式,有以下3種操作方式:
* EPOLL_CTL_ADD 將fd注冊(cè)到epfd中;
* EPOLL_CTL_MOD 修改已在epfd中注冊(cè)的fd事件;
* EPOLL_CTL_DEL 將fd從epfd中刪除;
*
* event:指向struct epoll_event 結(jié)構(gòu),表示需要監(jiān)聽(tīng)fd的某種事件;
*/
/* struct epoll_event 結(jié)構(gòu)體定義以下 */
typedef union epoll_data {
void *ptr;
int fd;
uint32_t u32;
uint64_t u64;
} epoll_data_t;
struct epoll_event {
uint32_t events; /* Epoll events */
epoll_data_t data; /* User data variable */
};
/*
* 其中events有以下的取值:
* EPOLLIN 表示對(duì)應(yīng)的文件描寫(xiě)符可讀;
* EPOLLOUT 表示對(duì)應(yīng)的文件描寫(xiě)符可寫(xiě);
* EPOLLPRI 表示對(duì)應(yīng)的文件描寫(xiě)符有緊急數(shù)據(jù)可讀;
* EPOLLERR 表示對(duì)應(yīng)的文件描寫(xiě)符產(chǎn)生毛病;
* EPOLLHUP 表示對(duì)應(yīng)的文件描寫(xiě)符被掛載;
* EPOLLET 表示將EPOLL設(shè)置為邊沿觸發(fā)模式(Edge Triggered);
* EPOLLONESHOT 表示只監(jiān)聽(tīng)1次事件,當(dāng)監(jiān)聽(tīng)此次事件完成,若想繼續(xù)監(jiān)聽(tīng),則需再次把該套接字描寫(xiě)符加入到EPOLL隊(duì)列中;
*/
例如:
struct epoll_event ev;
/* 設(shè)置與要處理的事件相干的文件描寫(xiě)符 */
ev.data.fd=listenfd;
/* 設(shè)置要處理的事件類型 */
ev.events=EPOLLIN|EPOLLET;
/* 注冊(cè)epoll事件 */
epoll_ctl(epfd,EPOLL_CTL_ADD,listenfd,&ev);
經(jīng)過(guò)上面的操作以后,等待某些事情的產(chǎn)生由函數(shù)
epoll_wait 實(shí)現(xiàn),其原型以下:
/*
* 函數(shù)功能:搜集在epoll監(jiān)聽(tīng)事件中已產(chǎn)生的事件;
* 返回值:若成功則返回所產(chǎn)生的事件數(shù),否則出錯(cuò)返回⑴;
* 函數(shù)原型:
*/
int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);
/*
* 參數(shù):
* epfd:由epoll_create創(chuàng)建的epoll文件描寫(xiě)符;
* events:指向epoll_event結(jié)構(gòu)體,用于保存已產(chǎn)生的事件;
* maxevents:每次能處理的最大事件數(shù);
* timeout:等待IO 事件產(chǎn)生的超時(shí)時(shí)間:⑴相當(dāng)于阻塞,即不會(huì)立即返回;0相當(dāng)于非阻塞,即立即返回;
*/
epoll 工作模式
epoll 有兩種工作模式:
- LT(level triggered):水平觸發(fā)是缺省的工作方式,并且同時(shí)支持block 和no-block socket。內(nèi)核告知你1個(gè)文件描寫(xiě)符是不是就緒了,可以對(duì)這個(gè)就緒的fd
進(jìn)行IO操作。若不進(jìn)行任何操作,內(nèi)核還是會(huì)繼續(xù)通知。
- ET(edge-triggered):邊沿觸發(fā)是高速工作方式,僅當(dāng)狀態(tài)產(chǎn)生變化時(shí)才取得通知。用戶收到1次通知后能夠完全地處理事件,內(nèi)核不再通知這1事件。
具體實(shí)例可參考文章:How to use epoll? A complete example in C
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/epoll.h>
#include <errno.h>
#define MAXEVENTS 64
static int
make_socket_non_blocking (int sfd)
{
int flags, s;
flags = fcntl (sfd, F_GETFL, 0);
if (flags == ⑴)
{
perror ("fcntl");
return ⑴;
}
flags |= O_NONBLOCK;
s = fcntl (sfd, F_SETFL, flags);
if (s == ⑴)
{
perror ("fcntl");
return ⑴;
}
return 0;
}
static int
create_and_bind (char *port)
{
struct addrinfo hints;
struct addrinfo *result, *rp;
int s, sfd;
memset (&hints, 0, sizeof (struct addrinfo));
hints.ai_family = AF_UNSPEC; /* Return IPv4 and IPv6 choices */
hints.ai_socktype = SOCK_STREAM; /* We want a TCP socket */
hints.ai_flags = AI_PASSIVE; /* All interfaces */
s = getaddrinfo (NULL, port, &hints, &result);
if (s != 0)
{
fprintf (stderr, "getaddrinfo: %s
", gai_strerror (s));
return ⑴;
}
for (rp = result; rp != NULL; rp = rp->ai_next)
{
sfd = socket (rp->ai_family, rp->ai_socktype, rp->ai_protocol);
if (sfd == ⑴)
continue;
s = bind (sfd, rp->ai_addr, rp->ai_addrlen);
if (s == 0)
{
/* We managed to bind successfully! */
break;
}
close (sfd);
}
if (rp == NULL)
{
fprintf (stderr, "Could not bind
");
return ⑴;
}
freeaddrinfo (result);
return sfd;
}
int
main (int argc, char *argv[])
{
int sfd, s;
int efd;
struct epoll_event event;
struct epoll_event *events;
if (argc != 2)
{
fprintf (stderr, "Usage: %s [port]
", argv[0]);
exit (EXIT_FAILURE);
}
sfd = create_and_bind (argv[1]);
if (sfd == ⑴)
abort ();
s = make_socket_non_blocking (sfd);
if (s == ⑴)
abort ();
s = listen (sfd, SOMAXCONN);
if (s == ⑴)
{
perror ("listen");
abort ();
}
efd = epoll_create1 (0);
if (efd == ⑴)
{
perror ("epoll_create");
abort ();
}
event.data.fd = sfd;
event.events = EPOLLIN | EPOLLET;
s = epoll_ctl (efd, EPOLL_CTL_ADD, sfd, &event);
if (s == ⑴)
{
perror ("epoll_ctl");
abort ();
}
/* Buffer where events are returned */
events = calloc (MAXEVENTS, sizeof event);
/* The event loop */
while (1)
{
int n, i;
n = epoll_wait (efd, events, MAXEVENTS, ⑴);
for (i = 0; i < n; i++)
{
if ((events[i].events & EPOLLERR) ||
(events[i].events & EPOLLHUP) ||
(!(events[i].events & EPOLLIN)))
{
/* An error has occured on this fd, or the socket is not
ready for reading (why were we notified then?) */
fprintf (stderr, "epoll error
");
close (events[i].data.fd);
continue;
}
else if (sfd == events[i].data.fd)
{
/* We have a notification on the listening socket, which
means one or more incoming connections. */
while (1)
{
struct sockaddr in_addr;
socklen_t in_len;
int infd;
char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
in_len = sizeof in_addr;
infd = accept (sfd, &in_addr, &in_len);
if (infd == ⑴)
{
if ((errno == EAGAIN) ||
(errno == EWOULDBLOCK))
{
/* We have processed all incoming
connections. */
break;
}
else
{
perror ("accept");
break;
}
}
s = getnameinfo (&in_addr, in_len,
hbuf, sizeof hbuf,
sbuf, sizeof sbuf,
NI_NUMERICHOST | NI_NUMERICSERV);
if (s == 0)
{
printf("Accepted connection on descriptor %d "
"(host=%s, port=%s)
", infd, hbuf, sbuf);
}
/* Make the incoming socket non-blocking and add it to the
list of fds to monitor. */
s = make_socket_non_blocking (infd);
if (s == ⑴)
abort ();
event.data.fd = infd;
event.events = EPOLLIN | EPOLLET;
s = epoll_ctl (efd, EPOLL_CTL_ADD, infd, &event);
if (s == ⑴)
{
perror ("epoll_ctl");
abort ();
}
}
continue;
}
else
{
/* We have data on the fd waiting to be read. Read and
display it. We must read whatever data is available
completely, as we are running in edge-triggered mode
and won't get a notification again for the same
data. */
int done = 0;
while (1)
{
ssize_t count;
char buf[512];
count = read (events[i].data.fd, buf, sizeof buf);
if (count == ⑴)
{
/* If errno == EAGAIN, that means we have read all
data. So go back to the main loop. */
if (errno != EAGAIN)
{
perror ("read");
done = 1;
}
break;
}
else if (count == 0)
{
/* End of file. The remote has closed the
connection. */
done = 1;
break;
}
/* Write the buffer to standard output */
s = write (1, buf, count);
if (s == ⑴)
{
perror ("write");
abort ();
}
}
if (done)
{
printf ("Closed connection on descriptor %d
",
events[i].data.fd);
/* Closing the descriptor will make epoll remove it
from the set of descriptors which are monitored. */
close (events[i].data.fd);
}
}
}
}
free (events);
close (sfd);
return EXIT_SUCCESS;
}
參考資料:
《epoll詳解》
《Epoll詳解及源碼分析》
生活不易,碼農(nóng)辛苦
如果您覺(jué)得本網(wǎng)站對(duì)您的學(xué)習(xí)有所幫助,可以手機(jī)掃描二維碼進(jìn)行捐贈(zèng)