[CentOS] forwarding packets to service in same host without using loopback network

Wed Apr 25 18:58:22 UTC 2012
Arif Hossain <aftnix at gmail.com>

This question is not about linux usage. But still i think  user list
is a good crowd for linux programmer. So here it goes.
I have this libnetfilter_queue application which receives packets from
kernel based on some iptables rule. Before going straight to my
problem, i'm giving a sample workable code and other tools to set up a
test environment so that We problem definition and possible solutions
can be more accurate and robust.

The following code describes the core functionality of the application:

    #include <stdio.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <netinet/in.h>
    #include <linux/types.h>
    #include <linux/netfilter.h>	/* for NF_ACCEPT */
    #include <errno.h>

    #include <libnetfilter_queue/libnetfilter_queue.h>
    #define PREROUTING 0
    #define POSTROUTING 4
    #define OUTPUT 3

    /* returns packet id */
    static u_int32_t
    print_pkt (struct nfq_data *tb)
      int id = 0;
      struct nfqnl_msg_packet_hdr *ph;
      struct nfqnl_msg_packet_hw *hwph;
      u_int32_t mark, ifi;
      int ret;
      unsigned char *data;

      ph = nfq_get_msg_packet_hdr (tb);
      if (ph)
          id = ntohl (ph->packet_id);
          printf ("hw_protocol=0x%04x hook=%u id=%u ",
    	      ntohs (ph->hw_protocol), ph->hook, id);

      hwph = nfq_get_packet_hw (tb);
      if (hwph)
          int i, hlen = ntohs (hwph->hw_addrlen);

          printf ("hw_src_addr=");
          for (i = 0; i < hlen - 1; i++)
    	printf ("%02x:", hwph->hw_addr[i]);
          printf ("%02x ", hwph->hw_addr[hlen - 1]);

      mark = nfq_get_nfmark (tb);
      if (mark)
        printf ("mark=%u ", mark);

      ifi = nfq_get_indev (tb);
      if (ifi)
        printf ("indev=%u ", ifi);

      ifi = nfq_get_outdev (tb);
      if (ifi)
        printf ("outdev=%u ", ifi);
      ifi = nfq_get_physindev (tb);
      if (ifi)
        printf ("physindev=%u ", ifi);

      ifi = nfq_get_physoutdev (tb);
      if (ifi)
        printf ("physoutdev=%u ", ifi);

      ret = nfq_get_payload (tb, &data);
      if (ret >= 0)
        printf ("payload_len=%d ", ret);

      fputc ('\n', stdout);

      return id;

    static int
    cb (struct nfq_q_handle *qh, struct nfgenmsg *nfmsg,
        struct nfq_data *nfa, void *data)
      uint32_t ip_src, ip_dst;
      struct in_addr s_ip;
      struct in_addr d_ip;
      uint16_t src_port;
      uint16_t dst_port;
      int verdict;
      int id;
      int ret;
      unsigned char *buffer;
      struct nfqnl_msg_packet_hdr *ph = nfq_get_msg_packet_hdr (nfa);
      if (ph)
          id = ntohl (ph->packet_id);
          printf ("received packet with id %d", id);
      ret = nfq_get_payload (nfa, &buffer);
      ip_src = *((uint32_t *) (buffer + 12));
      ip_dst = *((uint32_t *) (buffer + 16));
      src_port = *((uint16_t *) (buffer + 20));
      dst_port = *((uint16_t *) (buffer + 22));
      s_ip.s_addr = (uint32_t) ip_src;
      d_ip.s_addr = (uint32_t) ip_dst;
      *(buffer + 26) = 0x00;
      *(buffer + 27) = 0x00;
      printf ( "source IP %s", inet_ntoa (s_ip));
      printf ( "destination IP %s", inet_ntoa (d_ip));
      printf ( "source port %d", src_port);
      printf ( "destination port %d", dst_port);
      if (ret)
          switch (ph->hook)
    	case PREROUTING:
    	  printf ( "inbound packet");
    	case OUTPUT:
    	  printf ( "outbound packet");
      verdict = nfq_set_verdict (qh, id, NF_ACCEPT, ret, buffer);
      if (verdict)
        printf ( "verdict ok");
      return verdict;

    main (int argc, char **argv)
      struct nfq_handle *h;
      struct nfq_q_handle *qh;
      struct nfnl_handle *nh;
      int fd;
      int rv;
      char buf[4096] __attribute__ ((aligned));

      printf ("opening library handle\n");
      h = nfq_open ();
      if (!h)
          fprintf (stderr, "error during nfq_open()\n");
          exit (1);

      printf ("unbinding existing nf_queue handler for AF_INET (if any)\n");
      if (nfq_unbind_pf (h, AF_INET) < 0)
          fprintf (stderr, "error during nfq_unbind_pf()\n");
          exit (1);

      printf ("binding nfnetlink_queue as nf_queue handler for AF_INET\n");
      if (nfq_bind_pf (h, AF_INET) < 0)
          fprintf (stderr, "error during nfq_bind_pf()\n");
          exit (1);

      printf ("binding this socket to queue '0'\n");
      qh = nfq_create_queue (h, 0, &cb, NULL);
      if (!qh)
          fprintf (stderr, "error during nfq_create_queue()\n");
          exit (1);

      printf ("setting copy_packet mode\n");
      if (nfq_set_mode (qh, NFQNL_COPY_PACKET, 0xffff) < 0)
          fprintf (stderr, "can't set packet_copy mode\n");
          exit (1);

      fd = nfq_fd (h);

      for (;;)
          if ((rv = recv (fd, buf, sizeof (buf), 0)) >= 0)
    	  printf ("pkt received\n");
    	  nfq_handle_packet (h, buf, rv);
          /* if your application is too slow to digest the packets that
           * are sent from kernel-space, the socket buffer that we use
           * to enqueue packets may fill up returning ENOBUFS. Depending
           * on your application, this error may be ignored. Please, see
           * the doxygen documentation of this library on how to improve
           * this situation.
          if (rv < 0 && errno == ENOBUFS)
    	  printf ("losing packets!\n");
          perror ("recv failed");

      printf ("unbinding from queue 0\n");
      nfq_destroy_queue (qh);

    #ifdef INSANE
      /* normally, applications SHOULD NOT issue this command, since
       * it detaches other programs/sockets from AF_INET, too ! */
      printf ("unbinding from AF_INET\n");
      nfq_unbind_pf (h, AF_INET);

      printf ("closing library handle\n");
      nfq_close (h);

      exit (0);

Notice in the callback function two calls to my_mangling_fun() is
commented out. This is where i mangle the incoming and outgoing
packet. I think this code would be sufficient to describe my case. If
further clarification is need please ask, i will post further details.

Lets say accompanying iptables rules are following :

    $iptables -t mangle -A PREROUTING -p udp --dport 5000 -j NFQUEUE
    $iptables -t mangle -A OUTPUT -p udp --sport 5000 -j NFQUEUE

lets compile and fire udp the thing.

    $gcc -g3 nfq_test.c -lnfnetlink -lnetfilter_queue
    $./a.out (should be as root)

now we can feed garbage udp payload to this thing by netcat both
client and server mode

    $nc -ul 5000
    $nc -uvv <IP> 5000

This will print the packet from my netfilter_queue app in stdout. Now
that the development environment is set up, we can move to the next

What we are trying to achieve is following :

Our server is listening on 5000 port. Now all incoming packet destined
to udp port 5000 will be queued by kernel. And the handle to this
queue will be given to user application we listed earlier. This queue
mechanism works like this: When a packet is available, the callback
function(cb() in our code) is called. after processing, the callback
function calls nfq_set_verdict(). after a **verdict** is returned,
next packet will pop from the queue. notice that a packet will not pop
from queue if its preceding packet has not been issued a verdict. This
verdict values are NF_ACCEPT for accepting packet, NF_DROP for
dropping the packet.

Now what if i want to concatenate the udp payloads of the incoming and
outgoing packet without touching client and server side code?

If i want to concatenate udp payloads from our app this very app, then
we need to have multiple packets at hand. But we have seen that a
packet does not pops from queue before a verdict is issued to its
preceding one.

So how can this be done?

One possible solution is issue a NF_DROP to every packet and save
those packets in an intermediate data structure. Let's say we have
done it. But how can this packet can be delivered to the service
listening on 5000 port?

We can't use network stack for delivering the packet, because if we
do, then packets will end up in NFQUEUE again.

Another problem is, the server is totally agnostic about this app.
That means it should not see any difference in the packets. It should
see packets as if it came from the original client.

I have heard that a application can send data to a server in the same
host without using network layer(ip,port) by writing some files. I do
not know the validity of this statement. But if anyone knows anything
about it , it will be wonderful.

Another possible solution proposed to me is :

1.store packets in the application and return verdict NF_DROP
2.re-inject packets into the network stack using RAW sockets
3.tag concatenated UDP packets with a DSCP (see IP packet format)
4.in iptables, add a rule to match on this DSCP (--dscp) and ACCEPT
the packet directly, without it passing through your netfilter
5.If the provider already tags some packets with DSCP, you can add
some iptables rules to clear them, like:

$iptables -t mangle -A INPUT -j DSCP --set-dscp 0