2 // detail/impl/epoll_reactor.ipp
3 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 // Copyright (c) 2003-2011 Christopher M. Kohlhoff (chris at kohlhoff dot com)
7 // Distributed under the Boost Software License, Version 1.0. (See accompanying
8 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 #ifndef BOOST_ASIO_DETAIL_IMPL_EPOLL_REACTOR_IPP
12 #define BOOST_ASIO_DETAIL_IMPL_EPOLL_REACTOR_IPP
14 #if defined(_MSC_VER) && (_MSC_VER >= 1200)
16 #endif // defined(_MSC_VER) && (_MSC_VER >= 1200)
18 #include <boost/asio/detail/config.hpp>
20 #if defined(BOOST_ASIO_HAS_EPOLL)
23 #include <sys/epoll.h>
24 #include <boost/asio/detail/epoll_reactor.hpp>
25 #include <boost/asio/detail/throw_error.hpp>
26 #include <boost/asio/error.hpp>
28 #if defined(BOOST_ASIO_HAS_TIMERFD)
29 # include <sys/timerfd.h>
30 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
32 #include <boost/asio/detail/push_options.hpp>
38 epoll_reactor::epoll_reactor(boost::asio::io_service& io_service)
39 : boost::asio::detail::service_base<epoll_reactor>(io_service),
40 io_service_(use_service<io_service_impl>(io_service)),
43 epoll_fd_(do_epoll_create()),
44 timer_fd_(do_timerfd_create()),
47 // Add the interrupter's descriptor to epoll.
48 epoll_event ev = { 0, { 0 } };
49 ev.events = EPOLLIN | EPOLLERR | EPOLLET;
50 ev.data.ptr = &interrupter_;
51 epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, interrupter_.read_descriptor(), &ev);
52 interrupter_.interrupt();
54 // Add the timer descriptor to epoll.
57 ev.events = EPOLLIN | EPOLLERR;
58 ev.data.ptr = &timer_fd_;
59 epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, timer_fd_, &ev);
63 epoll_reactor::~epoll_reactor()
71 void epoll_reactor::shutdown_service()
73 mutex::scoped_lock lock(mutex_);
77 op_queue<operation> ops;
79 while (descriptor_state* state = registered_descriptors_.first())
81 for (int i = 0; i < max_ops; ++i)
82 ops.push(state->op_queue_[i]);
83 state->shutdown_ = true;
84 registered_descriptors_.free(state);
87 timer_queues_.get_all_timers(ops);
89 io_service_.abandon_operations(ops);
92 void epoll_reactor::fork_service(boost::asio::io_service::fork_event fork_ev)
94 if (fork_ev == boost::asio::io_service::fork_child)
99 epoll_fd_ = do_epoll_create();
104 timer_fd_ = do_timerfd_create();
106 interrupter_.recreate();
108 // Add the interrupter's descriptor to epoll.
109 epoll_event ev = { 0, { 0 } };
110 ev.events = EPOLLIN | EPOLLERR | EPOLLET;
111 ev.data.ptr = &interrupter_;
112 epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, interrupter_.read_descriptor(), &ev);
113 interrupter_.interrupt();
115 // Add the timer descriptor to epoll.
118 ev.events = EPOLLIN | EPOLLERR;
119 ev.data.ptr = &timer_fd_;
120 epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, timer_fd_, &ev);
125 // Re-register all descriptors with epoll.
126 mutex::scoped_lock descriptors_lock(registered_descriptors_mutex_);
127 for (descriptor_state* state = registered_descriptors_.first();
128 state != 0; state = state->next_)
130 ev.events = EPOLLIN | EPOLLERR | EPOLLHUP | EPOLLOUT | EPOLLPRI | EPOLLET;
132 int result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, state->descriptor_, &ev);
135 boost::system::error_code ec(errno,
136 boost::asio::error::get_system_category());
137 boost::asio::detail::throw_error(ec, "epoll re-registration");
143 void epoll_reactor::init_task()
145 io_service_.init_task();
148 int epoll_reactor::register_descriptor(socket_type descriptor,
149 epoll_reactor::per_descriptor_data& descriptor_data)
151 descriptor_data = allocate_descriptor_state();
154 mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
156 descriptor_data->reactor_ = this;
157 descriptor_data->descriptor_ = descriptor;
158 descriptor_data->shutdown_ = false;
160 for (int i = 0; i < max_ops; ++i)
161 descriptor_data->op_queue_is_empty_[i] =
162 descriptor_data->op_queue_[i].empty();
165 epoll_event ev = { 0, { 0 } };
166 ev.events = EPOLLIN | EPOLLERR | EPOLLHUP | EPOLLOUT | EPOLLPRI | EPOLLET;
167 ev.data.ptr = descriptor_data;
168 int result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev);
175 int epoll_reactor::register_internal_descriptor(
176 int op_type, socket_type descriptor,
177 epoll_reactor::per_descriptor_data& descriptor_data, reactor_op* op)
179 descriptor_data = allocate_descriptor_state();
182 mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
184 descriptor_data->reactor_ = this;
185 descriptor_data->descriptor_ = descriptor;
186 descriptor_data->shutdown_ = false;
187 descriptor_data->op_queue_[op_type].push(op);
189 for (int i = 0; i < max_ops; ++i)
190 descriptor_data->op_queue_is_empty_[i] =
191 descriptor_data->op_queue_[i].empty();
194 epoll_event ev = { 0, { 0 } };
195 ev.events = EPOLLIN | EPOLLERR | EPOLLHUP | EPOLLOUT | EPOLLPRI | EPOLLET;
196 ev.data.ptr = descriptor_data;
197 int result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev);
204 void epoll_reactor::move_descriptor(socket_type,
205 epoll_reactor::per_descriptor_data& target_descriptor_data,
206 epoll_reactor::per_descriptor_data& source_descriptor_data)
208 target_descriptor_data = source_descriptor_data;
209 source_descriptor_data = 0;
212 void epoll_reactor::start_op(int op_type, socket_type descriptor,
213 epoll_reactor::per_descriptor_data& descriptor_data,
214 reactor_op* op, bool allow_speculative)
216 if (!descriptor_data)
218 op->ec_ = boost::asio::error::bad_descriptor;
219 post_immediate_completion(op);
223 bool perform_speculative = allow_speculative;
224 if (perform_speculative)
226 if (descriptor_data->op_queue_is_empty_[op_type]
227 && (op_type != read_op
228 || descriptor_data->op_queue_is_empty_[except_op]))
232 io_service_.post_immediate_completion(op);
235 perform_speculative = false;
239 mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
241 if (descriptor_data->shutdown_)
243 post_immediate_completion(op);
247 for (int i = 0; i < max_ops; ++i)
248 descriptor_data->op_queue_is_empty_[i] =
249 descriptor_data->op_queue_[i].empty();
251 if (descriptor_data->op_queue_is_empty_[op_type])
253 if (allow_speculative)
255 if (perform_speculative
256 && (op_type != read_op
257 || descriptor_data->op_queue_is_empty_[except_op]))
261 descriptor_lock.unlock();
262 io_service_.post_immediate_completion(op);
269 epoll_event ev = { 0, { 0 } };
270 ev.events = EPOLLIN | EPOLLERR | EPOLLHUP
271 | EPOLLOUT | EPOLLPRI | EPOLLET;
272 ev.data.ptr = descriptor_data;
273 epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, descriptor, &ev);
277 descriptor_data->op_queue_[op_type].push(op);
278 descriptor_data->op_queue_is_empty_[op_type] = false;
279 io_service_.work_started();
282 void epoll_reactor::cancel_ops(socket_type,
283 epoll_reactor::per_descriptor_data& descriptor_data)
285 if (!descriptor_data)
288 mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
290 op_queue<operation> ops;
291 for (int i = 0; i < max_ops; ++i)
293 while (reactor_op* op = descriptor_data->op_queue_[i].front())
295 op->ec_ = boost::asio::error::operation_aborted;
296 descriptor_data->op_queue_[i].pop();
301 descriptor_lock.unlock();
303 io_service_.post_deferred_completions(ops);
306 void epoll_reactor::deregister_descriptor(socket_type descriptor,
307 epoll_reactor::per_descriptor_data& descriptor_data, bool closing)
309 if (!descriptor_data)
312 mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
314 if (!descriptor_data->shutdown_)
318 // The descriptor will be automatically removed from the epoll set when
323 epoll_event ev = { 0, { 0 } };
324 epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, descriptor, &ev);
327 op_queue<operation> ops;
328 for (int i = 0; i < max_ops; ++i)
330 while (reactor_op* op = descriptor_data->op_queue_[i].front())
332 op->ec_ = boost::asio::error::operation_aborted;
333 descriptor_data->op_queue_[i].pop();
338 descriptor_data->descriptor_ = -1;
339 descriptor_data->shutdown_ = true;
341 descriptor_lock.unlock();
343 free_descriptor_state(descriptor_data);
346 io_service_.post_deferred_completions(ops);
350 void epoll_reactor::deregister_internal_descriptor(socket_type descriptor,
351 epoll_reactor::per_descriptor_data& descriptor_data)
353 if (!descriptor_data)
356 mutex::scoped_lock descriptor_lock(descriptor_data->mutex_);
358 if (!descriptor_data->shutdown_)
360 epoll_event ev = { 0, { 0 } };
361 epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, descriptor, &ev);
363 op_queue<operation> ops;
364 for (int i = 0; i < max_ops; ++i)
365 ops.push(descriptor_data->op_queue_[i]);
367 descriptor_data->descriptor_ = -1;
368 descriptor_data->shutdown_ = true;
370 descriptor_lock.unlock();
372 free_descriptor_state(descriptor_data);
377 void epoll_reactor::run(bool block, op_queue<operation>& ops)
379 // This code relies on the fact that the task_io_service queues the reactor
380 // task behind all descriptor operations generated by this function. This
381 // means, that by the time we reach this point, any previously returned
382 // descriptor operations have already been dequeued. Therefore it is now safe
383 // for us to reuse and return them for the task_io_service to queue again.
385 // Calculate a timeout only if timerfd is not used.
388 timeout = block ? -1 : 0;
391 mutex::scoped_lock lock(mutex_);
392 timeout = block ? get_timeout() : 0;
395 // Block on the epoll descriptor.
396 epoll_event events[128];
397 int num_events = epoll_wait(epoll_fd_, events, 128, timeout);
399 #if defined(BOOST_ASIO_HAS_TIMERFD)
400 bool check_timers = (timer_fd_ == -1);
401 #else // defined(BOOST_ASIO_HAS_TIMERFD)
402 bool check_timers = true;
403 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
405 // Dispatch the waiting events.
406 for (int i = 0; i < num_events; ++i)
408 void* ptr = events[i].data.ptr;
409 if (ptr == &interrupter_)
411 // No need to reset the interrupter since we're leaving the descriptor
412 // in a ready-to-read state and relying on edge-triggered notifications
413 // to make it so that we only get woken up when the descriptor's epoll
414 // registration is updated.
416 #if defined(BOOST_ASIO_HAS_TIMERFD)
419 #else // defined(BOOST_ASIO_HAS_TIMERFD)
421 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
423 #if defined(BOOST_ASIO_HAS_TIMERFD)
424 else if (ptr == &timer_fd_)
428 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
431 // The descriptor operation doesn't count as work in and of itself, so we
432 // don't call work_started() here. This still allows the io_service to
433 // stop if the only remaining operations are descriptor operations.
434 descriptor_state* descriptor_data = static_cast<descriptor_state*>(ptr);
435 descriptor_data->set_ready_events(events[i].events);
436 ops.push(descriptor_data);
442 mutex::scoped_lock common_lock(mutex_);
443 timer_queues_.get_ready_timers(ops);
445 #if defined(BOOST_ASIO_HAS_TIMERFD)
448 itimerspec new_timeout;
449 itimerspec old_timeout;
450 int flags = get_timeout(new_timeout);
451 timerfd_settime(timer_fd_, flags, &new_timeout, &old_timeout);
453 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
457 void epoll_reactor::interrupt()
459 epoll_event ev = { 0, { 0 } };
460 ev.events = EPOLLIN | EPOLLERR | EPOLLET;
461 ev.data.ptr = &interrupter_;
462 epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, interrupter_.read_descriptor(), &ev);
465 int epoll_reactor::do_epoll_create()
467 #if defined(EPOLL_CLOEXEC)
468 int fd = epoll_create1(EPOLL_CLOEXEC);
469 #else // defined(EPOLL_CLOEXEC)
472 #endif // defined(EPOLL_CLOEXEC)
474 if (fd == -1 && errno == EINVAL)
476 fd = epoll_create(epoll_size);
478 ::fcntl(fd, F_SETFD, FD_CLOEXEC);
483 boost::system::error_code ec(errno,
484 boost::asio::error::get_system_category());
485 boost::asio::detail::throw_error(ec, "epoll");
491 int epoll_reactor::do_timerfd_create()
493 #if defined(BOOST_ASIO_HAS_TIMERFD)
494 # if defined(TFD_CLOEXEC)
495 int fd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC);
496 # else // defined(TFD_CLOEXEC)
499 # endif // defined(TFD_CLOEXEC)
501 if (fd == -1 && errno == EINVAL)
503 fd = timerfd_create(CLOCK_MONOTONIC, 0);
505 ::fcntl(fd, F_SETFD, FD_CLOEXEC);
509 #else // defined(BOOST_ASIO_HAS_TIMERFD)
511 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
514 epoll_reactor::descriptor_state* epoll_reactor::allocate_descriptor_state()
516 mutex::scoped_lock descriptors_lock(registered_descriptors_mutex_);
517 return registered_descriptors_.alloc();
520 void epoll_reactor::free_descriptor_state(epoll_reactor::descriptor_state* s)
522 mutex::scoped_lock descriptors_lock(registered_descriptors_mutex_);
523 registered_descriptors_.free(s);
526 void epoll_reactor::do_add_timer_queue(timer_queue_base& queue)
528 mutex::scoped_lock lock(mutex_);
529 timer_queues_.insert(&queue);
532 void epoll_reactor::do_remove_timer_queue(timer_queue_base& queue)
534 mutex::scoped_lock lock(mutex_);
535 timer_queues_.erase(&queue);
538 void epoll_reactor::update_timeout()
540 #if defined(BOOST_ASIO_HAS_TIMERFD)
543 itimerspec new_timeout;
544 itimerspec old_timeout;
545 int flags = get_timeout(new_timeout);
546 timerfd_settime(timer_fd_, flags, &new_timeout, &old_timeout);
549 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
553 int epoll_reactor::get_timeout()
555 // By default we will wait no longer than 5 minutes. This will ensure that
556 // any changes to the system clock are detected after no longer than this.
557 return timer_queues_.wait_duration_msec(5 * 60 * 1000);
560 #if defined(BOOST_ASIO_HAS_TIMERFD)
561 int epoll_reactor::get_timeout(itimerspec& ts)
563 ts.it_interval.tv_sec = 0;
564 ts.it_interval.tv_nsec = 0;
566 long usec = timer_queues_.wait_duration_usec(5 * 60 * 1000 * 1000);
567 ts.it_value.tv_sec = usec / 1000000;
568 ts.it_value.tv_nsec = usec ? (usec % 1000000) * 1000 : 1;
570 return usec ? 0 : TFD_TIMER_ABSTIME;
572 #endif // defined(BOOST_ASIO_HAS_TIMERFD)
574 struct epoll_reactor::perform_io_cleanup_on_block_exit
576 explicit perform_io_cleanup_on_block_exit(epoll_reactor* r)
577 : reactor_(r), first_op_(0)
581 ~perform_io_cleanup_on_block_exit()
585 // Post the remaining completed operations for invocation.
587 reactor_->io_service_.post_deferred_completions(ops_);
589 // A user-initiated operation has completed, but there's no need to
590 // explicitly call work_finished() here. Instead, we'll take advantage of
591 // the fact that the task_io_service will call work_finished() once we
596 // No user-initiated operations have completed, so we need to compensate
597 // for the work_finished() call that the task_io_service will make once
598 // this operation returns.
599 reactor_->io_service_.work_started();
603 epoll_reactor* reactor_;
604 op_queue<operation> ops_;
605 operation* first_op_;
608 epoll_reactor::descriptor_state::descriptor_state()
609 : operation(&epoll_reactor::descriptor_state::do_complete)
613 operation* epoll_reactor::descriptor_state::perform_io(uint32_t events)
615 perform_io_cleanup_on_block_exit io_cleanup(reactor_);
616 mutex::scoped_lock descriptor_lock(mutex_);
618 // Exception operations must be processed first to ensure that any
619 // out-of-band data is read before normal data.
620 static const int flag[max_ops] = { EPOLLIN, EPOLLOUT, EPOLLPRI };
621 for (int j = max_ops - 1; j >= 0; --j)
623 if (events & (flag[j] | EPOLLERR | EPOLLHUP))
625 while (reactor_op* op = op_queue_[j].front())
630 io_cleanup.ops_.push(op);
638 // The first operation will be returned for completion now. The others will
639 // be posted for later by the io_cleanup object's destructor.
640 io_cleanup.first_op_ = io_cleanup.ops_.front();
641 io_cleanup.ops_.pop();
642 return io_cleanup.first_op_;
645 void epoll_reactor::descriptor_state::do_complete(
646 io_service_impl* owner, operation* base,
647 const boost::system::error_code& ec, std::size_t bytes_transferred)
651 descriptor_state* descriptor_data = static_cast<descriptor_state*>(base);
652 uint32_t events = static_cast<uint32_t>(bytes_transferred);
653 if (operation* op = descriptor_data->perform_io(events))
655 op->complete(*owner, ec, 0);
660 } // namespace detail
664 #include <boost/asio/detail/pop_options.hpp>
666 #endif // defined(BOOST_ASIO_HAS_EPOLL)
668 #endif // BOOST_ASIO_DETAIL_IMPL_EPOLL_REACTOR_IPP