[make-initrd] [PATCH v1 02/11] feature/procacct: Use epoll

Alexey Gladkov gladkov.alexey at gmail.com
Thu Jun 15 20:59:11 MSK 2023


This is preparation for adding more event sources.

Signed-off-by: Alexey Gladkov <gladkov.alexey at gmail.com>
---
 features/debug-procacct/src/procacct.c | 288 +++++++++++++++++--------
 1 file changed, 200 insertions(+), 88 deletions(-)

diff --git a/features/debug-procacct/src/procacct.c b/features/debug-procacct/src/procacct.c
index 9302edb0..cfb4de1f 100644
--- a/features/debug-procacct/src/procacct.c
+++ b/features/debug-procacct/src/procacct.c
@@ -31,6 +31,7 @@
 #include <sys/stat.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
+#include <sys/epoll.h>
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -43,6 +44,7 @@
 #include <search.h>
 
 #include "rd/logging.h"
+#include "rd/memory.h"
 
 /* Maximum size of response requested or message sent */
 #define MAX_MSG_SIZE 1024
@@ -63,6 +65,7 @@
 static pid_t current_pid;
 static int rcvbufsz;
 static char name[100];
+static int fd_out = 1;
 
 struct msgtemplate {
 	struct nlmsghdr n;
@@ -77,14 +80,44 @@ struct proc_cmdline {
 	char *cmdline;
 };
 
+struct ctx_netlink {
+	uint16_t cpuid;
+	uint16_t cpumask_len;
+	char cpumask[100 + 6 * MAX_CPUS];
+};
+
+enum {
+	FD_NETLINK,
+	FD_MAX,
+};
+
+#define EV_MAX (FD_MAX * 32)
+
+struct fd_handler;
+typedef int (*fdhandler_t)(struct fd_handler *);
+
+struct fd_handler {
+	int fd;
+	fdhandler_t fd_prepare;
+	fdhandler_t fd_handler;
+	fdhandler_t fd_finish;
+	void *data;
+};
+
+static struct fd_handler fd_handler_list[FD_MAX];
+
 static void usage(void)                                                       __attribute__((noreturn));
 static int proc_compare(const void *a, const void *b)                         __attribute__((nonnull(1, 2)));
-static int create_nl_socket(int protocol);
+static int process_netlink_events(struct fd_handler *el);
+static int prepare_netlink(struct fd_handler *el);
+static int finish_netlink(struct fd_handler *el);
+static void setup_netlink_fd(struct fd_handler *el);
 static ssize_t send_cmd(int fd, uint16_t nlmsg_type, uint8_t genl_cmd,
                         uint16_t nla_type, void *nla_data, uint16_t nla_len);
 static uint16_t get_family_id(int fd);
 static void print_procacct(int fd, struct taskstats *t)                       __attribute__((nonnull(2)));
 static void handle_aggr(struct nlattr *na, int fd)                            __attribute__((nonnull(1)));
+static int setup_epoll_fd(struct fd_handler list[FD_MAX]);
 
 int proc_compare(const void *a, const void *b)
 {
@@ -100,44 +133,35 @@ int proc_compare(const void *a, const void *b)
 
 void usage(void)
 {
-	fprintf(stderr, "procacct [-o logfile] [-r bufsize] [-m cpumask]\n");
+	fprintf(stderr, "procacct [-o logfile] [-r bufsize]\n");
 	exit(1);
 }
 
 /*
  * Create a raw netlink socket and bind
  */
-int create_nl_socket(int protocol)
+void setup_netlink_fd(struct fd_handler *el)
 {
-	int fd;
 	struct sockaddr_nl local;
 
-	fd = socket(AF_NETLINK, SOCK_RAW, protocol);
-	if (fd < 0)
-		return -1;
+	el->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, NETLINK_GENERIC);
+	if (el->fd < 0)
+		rd_fatal("error creating Netlink socket: %m");
 
-	if (rcvbufsz) {
-		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbufsz, sizeof(rcvbufsz)) < 0) {
-			rd_err("unable to set socket rcv buf size to %d", rcvbufsz);
-			goto error;
-		}
-	}
+	if (rcvbufsz && setsockopt(el->fd, SOL_SOCKET, SO_RCVBUF, &rcvbufsz, sizeof(rcvbufsz)) < 0)
+		rd_fatal("unable to set socket rcv buf size to %d", rcvbufsz);
 
 	memset(&local, 0, sizeof(local));
 	local.nl_family = AF_NETLINK;
 
-	if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) {
-		rd_err("unable bind to socket");
-		goto error;
-	}
+	if (bind(el->fd, (struct sockaddr *) &local, sizeof(local)) < 0)
+		rd_fatal("unable bind to socket");
 
-	return fd;
-error:
-	close(fd);
-	return -1;
+	el->fd_handler = process_netlink_events;
+	el->fd_prepare = prepare_netlink;
+	el->fd_finish  = finish_netlink;
 }
 
-
 ssize_t send_cmd(int fd, uint16_t nlmsg_type, uint8_t genl_cmd, uint16_t nla_type, void *nla_data, uint16_t nla_len)
 {
 	struct nlattr *na;
@@ -268,89 +292,80 @@ void handle_aggr(struct nlattr *na, int fd)
 	}
 }
 
-int main(int argc, char *argv[])
+int prepare_netlink(struct fd_handler *el)
 {
+	struct ctx_netlink *ctx;
 	ssize_t ret;
-	uint16_t id;
 
-	char cpumask[100 + 6 * MAX_CPUS];
-	uint16_t cpumask_len;
+	ctx = rd_calloc_or_die(1, sizeof(*ctx));
 
-	int fd_nlink = -1;
-	int fd_out = 1;
-	int write_file = 0;
-	int maskset = 0;
-	char *logfile = NULL;
+	long np = sysconf(_SC_NPROCESSORS_ONLN);
+	if (np > 1)
+		snprintf(ctx->cpumask, sizeof(ctx->cpumask), "0-%ld", np - 1);
+	else
+		snprintf(ctx->cpumask, sizeof(ctx->cpumask), "1");
 
-	while (1) {
-		int c = getopt(argc, argv, "m:o:r:");
-		if (c < 0)
-			break;
-
-		switch (c) {
-			case 'o':
-				logfile = strdup(optarg);
-				write_file = 1;
-				break;
-			case 'r':
-				rcvbufsz = atoi(optarg);
-				if (rcvbufsz < 0)
-					rd_fatal("invalid rcv buf size");
-				break;
-			case 'm':
-				strlcpy(cpumask, optarg, sizeof(cpumask));
-				maskset = 1;
-				break;
-			default:
-				usage();
-		}
-	}
-	if (!maskset) {
-		long np = sysconf(_SC_NPROCESSORS_ONLN);
-		if (np > 1)
-			snprintf(cpumask, sizeof(cpumask), "0-%ld", np - 1);
-		else
-			snprintf(cpumask, sizeof(cpumask), "1");
+	if ((strlen(ctx->cpumask) + 1) > USHRT_MAX) {
+		rd_err("cpumask too long");
+		free(ctx);
+		return -1;
 	}
 
-	if ((strlen(cpumask) + 1) > USHRT_MAX)
-		rd_fatal("cpumask too long");
+	ctx->cpumask_len = (uint16_t) strlen(ctx->cpumask) + 1;
 
-	cpumask_len = (uint16_t) strlen(cpumask) + 1;
+	ctx->cpuid = get_family_id(el->fd);
+	if (!ctx->cpuid) {
+		rd_err("error getting family id, errno=%d", errno);
+		free(ctx);
+		return -1;
+	}
 
-	if (write_file) {
-		fd_out = open(logfile, O_WRONLY | O_CREAT | O_TRUNC | O_SYNC, 0644);
-		if (fd_out < 0)
-			rd_fatal("cannot open output file: %s: %m", logfile);
+	ret = send_cmd(el->fd, ctx->cpuid, TASKSTATS_CMD_GET,
+	               TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+		       &ctx->cpumask, ctx->cpumask_len);
+	if (ret < 0) {
+		rd_err("error sending register cpumask");
+		free(ctx);
+		return -1;
 	}
 
-	fd_nlink = create_nl_socket(NETLINK_GENERIC);
-	if (fd_nlink < 0)
-		rd_fatal("error creating Netlink socket: %m");
+	el->data = ctx;
 
-	current_pid = getpid();
+	return 0;
+}
 
-	id = get_family_id(fd_nlink);
-	if (!id) {
-		rd_err("error getting family id, errno=%d", errno);
-		goto err;
-	}
+int finish_netlink(struct fd_handler *el)
+{
+	struct ctx_netlink *ctx = el->data;
+	ssize_t ret;
 
-	ret = send_cmd(fd_nlink, id, TASKSTATS_CMD_GET,
-	               TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, &cpumask, cpumask_len);
+	ret = send_cmd(el->fd, ctx->cpuid, TASKSTATS_CMD_GET,
+	               TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
+		       &ctx->cpumask, ctx->cpumask_len);
 	if (ret < 0) {
-		rd_err("error sending register cpumask");
-		goto err;
+		rd_err("error sending deregister cpumask");
+		return -1;
 	}
+	free(ctx);
+	return 0;
+}
+
+int process_netlink_events(struct fd_handler *el)
+{
+	ssize_t ret;
 
 	while (1) {
 		struct msgtemplate msg;
 		struct nlattr *na;
 
-		ret = recv(fd_nlink, &msg, sizeof(msg), 0);
+		errno = 0;
+		ret = recv(el->fd, &msg, sizeof(msg), 0);
 		if (ret < 0) {
-			rd_err("nonfatal reply error: errno=%d", errno);
-			continue;
+			if (errno == EAGAIN || errno == EWOULDBLOCK)
+				return 0;
+			if (errno == EINTR)
+				continue;
+			rd_fatal("nonfatal reply error: %m");
 		}
 
 		if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), ret)) {
@@ -381,12 +396,109 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	ret = send_cmd(fd_nlink, id, TASKSTATS_CMD_GET,
-	               TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, &cpumask, cpumask_len);
-	if (ret < 0)
-		rd_fatal("error sending deregister cpumask");
+	return 0;
+}
+
+int setup_epoll_fd(struct fd_handler list[FD_MAX])
+{
+	int epollfd;
+	struct epoll_event ev = { .events = EPOLLIN };
+
+	if ((epollfd = epoll_create1(EPOLL_CLOEXEC)) < 0)
+		rd_fatal("epoll_create1: %m");
+
+	for (int i = 0; i < FD_MAX; i++) {
+		ev.data.ptr = &list[i];
+
+		if (epoll_ctl(epollfd, EPOLL_CTL_ADD, list[i].fd, &ev) < 0)
+			rd_fatal("epoll_ctl: %m");
+	}
+
+	return epollfd;
+}
+
+int main(int argc, char *argv[])
+{
+	int fd_epoll = -1;
+	int write_file = 0;
+	char *logfile = NULL;
+
+	while (1) {
+		int c = getopt(argc, argv, "o:r:");
+		if (c < 0)
+			break;
+
+		switch (c) {
+			case 'o':
+				logfile = strdup(optarg);
+				write_file = 1;
+				break;
+			case 'r':
+				rcvbufsz = atoi(optarg);
+				if (rcvbufsz < 0)
+					rd_fatal("invalid rcv buf size");
+				break;
+			default:
+				usage();
+		}
+	}
+
+	if (write_file) {
+		fd_out = open(logfile, O_WRONLY | O_CREAT | O_TRUNC | O_SYNC, 0644);
+		if (fd_out < 0)
+			rd_fatal("cannot open output file: %s: %m", logfile);
+	}
+
+	current_pid = getpid();
+
+	setup_netlink_fd(&fd_handler_list[FD_NETLINK]);
+	fd_epoll = setup_epoll_fd(fd_handler_list);
+
+	for (int i = 0; i < FD_MAX; i++) {
+		if (fd_handler_list[i].fd < 0 || !fd_handler_list[i].fd_prepare)
+			continue;
+		if (fd_handler_list[i].fd_prepare(&fd_handler_list[i]) < 0)
+			rd_fatal("unable to prepare file descriptor");
+	}
+
+	while (1) {
+		struct epoll_event ev[EV_MAX];
+		int fdcount;
+
+		errno = 0;
+		fdcount = epoll_wait(fd_epoll, ev, EV_MAX, -1);
+
+		if (fdcount < 0) {
+			if (errno == EINTR)
+				continue;
+			rd_fatal("epoll_wait: %m");
+		}
+
+		for (int i = 0; i < fdcount; i++) {
+			if (!(ev[i].events & EPOLLIN))
+				continue;
+
+			struct fd_handler *fde = ev[i].data.ptr;
+
+			if (fde->fd_handler(fde) != 0)
+				goto err;
+		}
+	}
+
 err:
-	close(fd_nlink);
+	for (int i = 0; i < FD_MAX; i++) {
+		if (fd_handler_list[i].fd < 0)
+			continue;
+
+		if (epoll_ctl(fd_epoll, EPOLL_CTL_DEL, fd_handler_list[i].fd, NULL) < 0)
+			rd_err("epoll_ctl: %m");
+
+		if (fd_handler_list[i].fd_finish)
+			fd_handler_list[i].fd_finish(&fd_handler_list[i]);
+
+		close(fd_handler_list[i].fd);
+	}
+	close(fd_epoll);
 
 	if (fd_out)
 		close(fd_out);
-- 
2.33.8



More information about the Make-initrd mailing list