Hello, network developers.

I'm pleased to announce first pre-alpha version
of the Zero-copy sniffer "device".
It acts as packet socket, i.e. gets all packets 
using prot_hook.func(), but never copy it.

Basic idea behind zero-copy is remapping of the 
physical pages where skb->data lives to the
userspace process.

According to my tests, which can be found commented
in the code (packet_mmap()), 
remapping of one page gets from 5 upto 20
times faster than copying the same amount of data
(i.e. PAGE_SIZE).

Since current VM code requires PTE to be unmapped,
when remapping, but only exports unmap_mapping_range()
and __flush_tlb(), I used them, although they are quite
heavy monsters.
It also required mm->mmap_sem to be held, 
so I placed main remapping code into workqueue.

skbs are queued in prot_hook.func() and then workqueue
is being scheduled, where skb is unlinked and remapped.
It is not freed there, as it should be, since userspace
will never found real data then, but instead
some smart algo should be investigated to defer skb freeing,
or simple defering using timer and redefined skb destructor.
It also should remap several skbs at once, so rescheduling
would not appeared very frequently.
First mapped page is information page, where offset in page
of the skb->data is placed, so userspace can detect
where actual data lives on the next page.

Such schema is very suitable for applications that
do not require the whole data flow, but only select some data
from the flow, based on packet content.
I'm quite sure it will be slower than copying for small packets, 
so this two ideas must be combined to achieve 
the maximum sniffer performance.

Current code is basically proof-of-concept, so
it has tons of dirty quirks, and I'm not a VM hacker, 
so I would gladly listen your thoughts about the code and idea itself.

Attached files:
af_tlb.[ch] - kernel side sniffer implementation.
tlb_test.c - userspace "sniffer".
Makefile - build kernel side with "all" target and userspace
with "test" target.

Thank you.

-- 
        Evgeniy Polyakov
obj-m           := af_tlb.o

KDIR    := /lib/modules/`uname -r`/build
#KDIR   := /usr/local/src/linux-2.6
PWD     := $(shell pwd)
UCFLAGS := -W -Wall

default:
        $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules

test:
        gcc $(UCFLAGS) tlb_test.c -o tlb_test

clean:
        $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) clean
        @rm -f *~
/*
 *      af_tlb.c
 * 
 * 2005 Copyright (c) Evgeniy Polyakov <[EMAIL PROTECTED]>
 * All rights reserved.
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

 
#include <linux/config.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/fcntl.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_packet.h>
#include <linux/wireless.h>
#include <linux/kmod.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/workqueue.h>

#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/fs.h>
#include <linux/shm.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/hugetlb.h>
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/swapops.h>

#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>

#include "af_tlb.h"

static void test_timer_func(void *data);
static DECLARE_WORK(w, test_timer_func, NULL);

static inline struct packet_sock *pkt_sk(struct sock *sk)
{
        return (struct packet_sock *)sk;
}

static void packet_sock_destruct(struct sock *sk)
{
        BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
        BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));

        if (!sock_flag(sk, SOCK_DEAD)) {
                printk("Attempt to release alive packet socket: %p\n", sk);
                return;
        }
}


static struct proto_ops packet_ops_spkt;

static void dump_skb(struct sk_buff *skb)
{
        struct ethhdr *eth;
        int i;

        printk(KERN_INFO "shared=%d, cloned=%d, type=%d, len=%d.\n", 
skb_shared(skb), skb_cloned(skb), skb->pkt_type, skb->len);

        eth = eth_hdr(skb);

        printk(KERN_INFO "MAC: proto=%04x, src=", eth->h_proto);
        for (i=0; i<ETH_ALEN-1; ++i)
                printk(KERN_INFO "%02x:", eth->h_source[i]);
        printk(KERN_INFO "%02x, dst=", eth->h_source[ETH_ALEN-1]);
        for (i=0; i<ETH_ALEN-1; ++i)
                printk(KERN_INFO "%02x:", eth->h_dest[i]);
        printk(KERN_INFO "%02x.\n", eth->h_dest[ETH_ALEN-1]);
}

static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct 
packet_type *pt)
{
        struct sock *sk;
        struct sockaddr_pkt *spkt;
        struct packet_sock *po;

        sk = pt->af_packet_priv;
        po = pkt_sk(sk);

        /*
         *      Yank back the headers [hope the device set this
         *      right or kerboom...]
         *
         *      Incoming packets have ll header pulled,
         *      push it back.
         *
         *      For outgoing ones skb->data == skb->mac.raw
         *      so that this procedure is noop.
         */

        if (skb->pkt_type == PACKET_LOOPBACK)
                goto out;

        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
                goto oom;

        /* drop any routing info */
        dst_release(skb->dst);
        skb->dst = NULL;

        spkt = (struct sockaddr_pkt*)skb->cb;

        skb_push(skb, skb->data-skb->mac.raw);

        /*
         *      The SOCK_PACKET socket receives _all_ frames.
         */

        spkt->spkt_family = dev->type;
        strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
        spkt->spkt_protocol = skb->protocol;

        /*
         *      Charge the memory to the socket. This is done specifically
         *      to prevent sockets using all the memory up.
         */

        if (sock_queue_rcv_skb(sk, skb) == 0) {
                if (test_bit(PACKET_MAPPED, &po->flags))
                        schedule_work(&w);
                return 0;
        }

out:
        kfree_skb(skb);
oom:
        return 0;
}


/*
 *      Close a PACKET socket. This is fairly simple. We immediately go
 *      to 'closed' state and remove our protocol entry in the device list.
 */

static int packet_release(struct socket *sock)
{
        struct sock *sk = sock->sk;
        struct packet_sock *po;

        if (!sk)
                return 0;

        po = pkt_sk(sk);

        sk_del_node_init(sk);

        if (test_bit(PACKET_RUNNING, &po->flags)) {
                dev_remove_pack(&po->prot_hook);
                clear_bit(PACKET_RUNNING, &po->flags);
                __sock_put(sk);
        }

        sock_orphan(sk);
        sock->sk = NULL;

        skb_queue_purge(&sk->sk_receive_queue);

        cancel_delayed_work(&w);
        flush_scheduled_work();

        free_page(po->page);

        sock_put(sk);
        return 0;
}

/*
 *      Attach a packet hook.
 */

static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
{
        struct packet_sock *po = pkt_sk(sk);
        /*
         *      Detach an existing hook if present.
         */

        lock_sock(sk);

        spin_lock(&po->bind_lock);
        if (test_bit(PACKET_RUNNING, &po->flags)) {
                __sock_put(sk);
                clear_bit(PACKET_RUNNING, &po->flags);
                po->num = 0;
                spin_unlock(&po->bind_lock);
                dev_remove_pack(&po->prot_hook);
                spin_lock(&po->bind_lock);
        }

        po->num = protocol;
        po->prot_hook.type = protocol;
        po->prot_hook.dev = dev;

        po->ifindex = dev ? dev->ifindex : 0;

        if (protocol == 0)
                goto out_unlock;

        if (dev) {
                if (dev->flags&IFF_UP) {
                        dev_add_pack(&po->prot_hook);
                        sock_hold(sk);
                        set_bit(PACKET_RUNNING, &po->flags);
                } else {
                        sk->sk_err = ENETDOWN;
                        if (!sock_flag(sk, SOCK_DEAD))
                                sk->sk_error_report(sk);
                }
        } else {
                dev_add_pack(&po->prot_hook);
                sock_hold(sk);
                set_bit(PACKET_RUNNING, &po->flags);
        }

out_unlock:
        spin_unlock(&po->bind_lock);
        release_sock(sk);
        return 0;
}

static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int 
addr_len)
{
        struct sock *sk=sock->sk;
        char name[15];
        struct net_device *dev;
        int err = -ENODEV;

        strlcpy(name, uaddr->sa_data, sizeof(name));
        printk( "%s: name=%s.\n", __func__, name);
        
        if(addr_len!=sizeof(struct sockaddr))
                return -EINVAL;

        dev = dev_get_by_name(name);
        if (dev) {
                err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
                dev_put(dev);
        }
        return err;
}

static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long 
arg)
{
        switch(cmd) {
                default:
                        return dev_ioctl(cmd, (void __user *)arg);
        }
        return 0;
}

static struct proto packet_proto = {
        .name     = "PACKET",
        .owner    = THIS_MODULE,
        .obj_size = sizeof(struct packet_sock),
};

static int packet_create(struct socket *sock, int protocol)
{
        struct sock *sk;
        struct packet_sock *po;
        int err;

        if (!capable(CAP_NET_RAW))
                return -EPERM;
        if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != 
SOCK_PACKET)
                return -ESOCKTNOSUPPORT;

        sock->state = SS_UNCONNECTED;

        err = -ENOBUFS;
        sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
        if (sk == NULL)
                goto out;

        sock->ops = &packet_ops_spkt;
        
        sock_init_data(sock, sk);

        po = pkt_sk(sk);
        sk->sk_family = PF_PACKET;
        po->num = protocol;

        sk->sk_destruct = packet_sock_destruct;

        po->flags = 0;
        po->budget = 1;
        spin_lock_init(&po->bind_lock);
        
        po->prot_hook.func = packet_rcv_spkt;
        
        po->prot_hook.af_packet_priv = sk;

        if (protocol) {
                po->prot_hook.type = protocol;
                dev_add_pack(&po->prot_hook);
                sock_hold(sk);
                set_bit(PACKET_RUNNING, &po->flags);
        }

        printk( "%s: protocol=%d.\n", __func__, protocol);

        return 0;
out:
        return err;
}

static void packet_mm_open(struct vm_area_struct *vma)
{
        struct file *file = vma->vm_file;
        struct inode *inode = file->f_dentry->d_inode;
        struct socket * sock = SOCKET_I(inode);
        struct sock *sk = sock->sk;
        
        printk( "%s, sk=%p.\n", __func__, sk);
}

static void packet_mm_close(struct vm_area_struct *vma)
{
        struct file *file = vma->vm_file;
        struct inode *inode = file->f_dentry->d_inode;
        struct socket *sock = SOCKET_I(inode);
        struct sock *sk = sock->sk;
        
        printk( "%s, sk=%p.\n", __func__, sk);
                        
        if (vma->vm_file)
                unmap_mapping_range(vma->vm_file->f_mapping, 0, 0, 1);

        if (sk) {
                struct packet_sock *po = pkt_sk(sk);

                if (po && po->tsk) {
                        po->tsk = NULL;
                        clear_bit(PACKET_MAPPED, &po->flags);
                }
        }
}

static struct vm_operations_struct packet_mmap_ops = {
        .open           = packet_mm_open,
        .close          = packet_mm_close,
};

struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
        struct page *pte;

#ifdef CONFIG_HIGHPTE
        pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
#else
        pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
#endif
        return pte;
}
static int update_address(struct mm_struct *mm, struct vm_area_struct *vma, 
unsigned long address)
{
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;

        if (is_vm_hugetlb_page(vma)) {
                printk("Is it even possible here?.\n");
                return -1;
        }

        pgd = pgd_offset(mm, address);
        spin_lock(&mm->page_table_lock);

        pud = pud_alloc(mm, pgd, address);
        if (!pud)
                goto oom;

        pmd = pmd_alloc(mm, pud, address);
        if (!pmd)
                goto oom;

        if (!pmd_present(*pmd)) {
                struct page *new;
                
                printk("PMD for 0x%lx is not presented.\n", address);
                
                spin_unlock(&mm->page_table_lock);
                new = pte_alloc_one(mm, address);
                spin_lock(&mm->page_table_lock);
                if (!new) {
                        printk("PTE allocation for 0x%lx failed.\n", address);
                        goto oom;
                }
                /*
                 * Because we dropped the lock, we should re-check the
                 * entry, as somebody else could have populated it..
                 */
                if (pmd_present(*pmd)) {
                        printk("PMD for 0x%lx is presented.\n", address);
                        pte_free(new);
                        goto out;
                }
                mm->nr_ptes++;
                inc_page_state(nr_page_table_pages);
                pmd_populate(mm, pmd, new);
        }
        
        pte = pte_offset_map(pmd, address);

        if (!pte_none(*pte))
                printk("pte %p for 0x%lx exists.\n", pte, address);
#if 1
        pte_mkyoung(*pte);

        if (!pte_none(*pte))
                printk("pte %p for 0x%lx exists after pte_mkyoung().\n", pte, 
address);

        pte_unmap(pte);
        
        if (!pte_none(*pte))
                printk("pte %p for 0x%lx exists after pte_unmap().\n", pte, 
address);
#endif  
out:
        spin_unlock(&mm->page_table_lock);
        printk("%s: exiting.\n", __func__);

        return 0;

oom:
        spin_unlock(&mm->page_table_lock);
        return -1;
}

static void test_timer_func(void *data)
{
        struct sock *sk = (struct sock *)data;
        struct packet_sock *po;
        struct packet_shared *ps;
        struct sk_buff *skb;
        unsigned long virt, start, end;
        int num = 0;

        if (!sk)
                return;

        po = pkt_sk(sk);
        if (!po || !po->tsk || !test_bit(PACKET_RUNNING, &po->flags) || 
!test_bit(PACKET_MAPPED, &po->flags))
                return;

        start = po->vma->vm_start;
        end = po->vma->vm_end;

        down_write(&po->tsk->mm->mmap_sem);

        /*
         * This actually should not be flush_tlb(), 
         * but it is only one call that can be used in modules.
         * --zbr
         */
        __flush_tlb();
        //update_address(po->vma->vm_mm, po->vma, __pa(virt));
        if (po->vma->vm_file) {
                unmap_mapping_range(po->vma->vm_file->f_mapping, PAGE_SIZE, 0, 
0);
        }

        ps = (struct packet_shared *)po->page;
        start += PAGE_SIZE;

        while ((skb = skb_dequeue(&sk->sk_receive_queue)) && ++num <= 
po->budget && start < end) {
                virt = (unsigned long)skb->mac.raw;
                if (!virt)
                        goto out;

                if (0) {
                        int i;

                        printk("offset=%lu, users=%d, dataref=%d.\n", 
                                        offset_in_page(virt), 
atomic_read(&skb->users), atomic_read(&skb_shinfo(skb)->dataref));
                        for (i=0; i<32; ++i)
                                printk("%02x ", ((unsigned char *)virt)[i]);
                        printk("\n");
                }
                ps->offset = offset_in_page(virt);
                //dump_skb(skb);

                SetPageReserved(virt_to_page(virt));
                if (remap_pfn_range(po->vma, start, __pa(virt) >> PAGE_SHIFT, 
PAGE_SIZE, po->vma->vm_page_prot)) {
                        ClearPageReserved(virt_to_page(virt));
                        goto out;
                }

                start += PAGE_SIZE;

out:
                /*
                 * Actually here should be some smart algo, which will defer 
skb freeing
                 * until userspace "read" it, so userspace should provide some 
kind of callback,
                 * which will require write permisions to the area, so it 
should be splitted.
                 * Or better just to free it after some timeout, say 100 msec 
should be enough.
                 * --zbr
                 */
                //kfree_skb(skb);
                continue;
        }

        up_write(&po->tsk->mm->mmap_sem);
}

static int packet_mmap(struct file *file, struct socket *sock, struct 
vm_area_struct *vma)
{
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        unsigned long size = vma->vm_end - vma->vm_start;
        int err = 0;

        printk( "%s: size=0x%lx\n", __func__, size);

#if 0
        {
                int i;
                struct timeval tv1, tv2;
                unsigned long start = vma->vm_start;
                u8 *data1, *data2;
                
                do_gettimeofday(&tv1);
                for (i=0; i<1000; i++) {
                        if (remap_pfn_range(vma, start,
                                             __pa(PAGE_OFFSET) >> PAGE_SHIFT,
                                             PAGE_SIZE,
                                             vma->vm_page_prot))
                                break;
                        __flush_tlb();
                        if (vma->vm_file)
                                unmap_mapping_range(vma->vm_file->f_mapping, 0, 
0, 1);
                        start += PAGE_SIZE;
                }
                do_gettimeofday(&tv2);

                printk("%s: 1000 remaps took %lu usec.\n", __func__, 
(tv2.tv_sec - tv1.tv_sec)*1000000 + tv2.tv_usec - tv1.tv_usec);
                
                data1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
                if (!data1)
                        return -ENOMEM;
                data2 = kmalloc(PAGE_SIZE, GFP_KERNEL);
                if (!data2) {
                        kfree(data2);
                        return -ENOMEM;
                }
                
                do_gettimeofday(&tv1);
                for (i=0; i<1000; i++) {
                        memcpy(data1, ((void *)sock)+i*PAGE_SIZE, PAGE_SIZE);
                }
                do_gettimeofday(&tv2);

                printk("%s: 1000 copyings took %lu usec.\n", __func__, 
(tv2.tv_sec - tv1.tv_sec)*1000000 + tv2.tv_usec - tv1.tv_usec);

                kfree(data1);
                kfree(data2);
        }
#endif
        vma->vm_ops = &packet_mmap_ops;

        lock_sock(sk);
        po->budget = size / PAGE_SIZE;
        
        po->tsk = current;
        if (!po->tsk) {
                err = -ENODEV;
                goto err_out_unlock;
        }

        po->page = __get_free_page(GFP_KERNEL);
        if (!po->page) {
                err = -ENOMEM;
                goto err_out_unlock;
        }

        memset((void *)po->page, 0, PAGE_SIZE);
        
        SetPageReserved(virt_to_page(po->page));
        if (remap_pfn_range(vma, vma->vm_start, __pa(po->page) >> PAGE_SHIFT, 
PAGE_SIZE, vma->vm_page_prot)) {
                ClearPageReserved(virt_to_page(po->page));
                err = -EIO;
                goto err_out_unlock;
        }

        po->vma = vma;

        release_sock(sk);

        INIT_WORK(&w, test_timer_func, sk);
        
        set_bit(PACKET_MAPPED, &po->flags);

        return 0;

err_out_unlock:
        release_sock(sk);
        return err;
}


static struct proto_ops packet_ops_spkt = {
        .family         = PF_PACKET,
        .owner          = THIS_MODULE,
        .release        = packet_release,
        .bind           = packet_bind,
        .connect        = sock_no_connect,
        .socketpair     = sock_no_socketpair,
        .accept         = sock_no_accept,
        .getname        = sock_no_getname,
        .poll           = sock_no_poll,
        .ioctl          = packet_ioctl,
        .listen         = sock_no_listen,
        .shutdown       = sock_no_shutdown,
        .setsockopt     = sock_no_setsockopt,
        .getsockopt     = sock_no_getsockopt,
        .sendmsg        = sock_no_sendmsg,
        .recvmsg        = sock_no_recvmsg,
        .mmap           = packet_mmap,
        .sendpage       = sock_no_sendpage,
};

static struct net_proto_family packet_family_ops = {
        .family         = PF_PACKET,
        .create         = packet_create,
        .owner          = THIS_MODULE,
};

static void __exit packet_exit(void)
{
        sock_unregister(PF_PACKET);
        proto_unregister(&packet_proto);
}

static int __init packet_init(void)
{
        int rc = proto_register(&packet_proto, 0);

        if (rc != 0)
                goto out;

        sock_register(&packet_family_ops);

        printk("%s: initialized at %lu.\n", __func__, jiffies);
out:
        return rc;
}

module_init(packet_init);
module_exit(packet_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_PACKET);
/*
 *      af_tlb.h
 * 
 * 2005 Copyright (c) Evgeniy Polyakov <[EMAIL PROTECTED]>
 * All rights reserved.
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef __AF_TLB_H
#define __AF_TLB_H

struct packet_shared {
        __u16                   offset;
};

#ifdef __KERNEL__

enum packet_flags {
        PACKET_RUNNING = 0,
        PACKET_MAPPED,
};

struct packet_sock {
        /* struct sock has to be the first member of packet_sock */
        struct sock             sk;
        struct tpacket_stats    stats;
        struct packet_type      prot_hook;
        spinlock_t              bind_lock;
        
        long                    flags;
        int                     ifindex;
        unsigned short          num;

        struct vm_area_struct   *vma;

        struct task_struct      *tsk;

        int                     budget;
        unsigned long           page;
};

#endif /* __KERNEL__ */

#endif /* __AF_TLB_H */

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/mman.h>

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>

#include <netinet/in.h>
#include <netinet/ip.h>
#include <net/ethernet.h>

#include <linux/if_ether.h>
#include <linux/types.h>

#include "af_tlb.h"

#define PAGE_SIZE       4096
static size_t mmap_size = 2*PAGE_SIZE;

#define ulog(f, a...)   do { fprintf(stderr, f, ##a); fflush(stderr); } while 
(0)
#define NIPQUAD(addr) \
        ((unsigned char *)&addr)[0], \
        ((unsigned char *)&addr)[1], \
        ((unsigned char *)&addr)[2], \
        ((unsigned char *)&addr)[3]

static int dump_network(__u16 offset, void *ptr)
{
        struct ether_header *eth = ptr;
        struct iphdr *ip;
        int i;
        unsigned short ether_type;

        ulog("offset=%x: ", offset);
        
        ether_type = ntohs(eth->ether_type);
        if (ether_type != ETH_P_IP && ether_type != ETH_P_ARP) {
                ulog("\n");
                return -1;
        }

        ulog("MAC: proto=%04x, src=", eth->ether_type);
        for (i=0; i<ETH_ALEN-1; ++i)
                ulog("%02x:", eth->ether_shost[i]);
        ulog("%02x, dst=", eth->ether_shost[ETH_ALEN-1]);
        for (i=0; i<ETH_ALEN-1; ++i)
                ulog("%02x:", eth->ether_dhost[i]);
        ulog("%02x. ", eth->ether_dhost[ETH_ALEN-1]);

        if (ether_type != ETH_P_IP) {
                ulog("\n");
                return 0;
        }

        ip = (struct iphdr *)(ptr + sizeof(*eth));

        ulog("%u.%u.%u.%u -> %u.%u.%u.%u.\n", NIPQUAD(ip->saddr), 
NIPQUAD(ip->daddr));

        return 0;
}

static void dump_data(void *ptr, __u16 offset, int size)
{
        int i;
        unsigned char *data = ptr + PAGE_SIZE + offset;

        ulog("%p: ", ptr);
        for (i=0; i<size; ++i)
                ulog("%02x ", data[i]);
        ulog("\n");
}

int main(int argc, char *argv[])
{
        struct sockaddr sa;
        int s, err;
        socklen_t len = sizeof(sa);
        void *mmap_ptr;
        struct packet_shared *ps;
        
        if (argc > 1)
                memcpy(sa.sa_data, argv[1], sizeof(sa.sa_data));
        else
                memcpy(sa.sa_data, "eth0", sizeof(sa.sa_data));

        s = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
        if (s == -1) {
                ulog("Failed to create PF_PACKET socket: %s [%d].\n", 
                                strerror(errno), errno);
                return -1;
        }

        mmap_ptr = mmap(NULL, mmap_size, PROT_READ, MAP_SHARED, s, 0);
        if (mmap_ptr == MAP_FAILED) {
                ulog("Failed to map socket %d: %s [%d].\n", s, strerror(errno), 
errno);
                err = -errno;
                goto err_out_close;
        }
        
        err = bind(s, &sa, len);
        if (err == -1) {
                ulog("Failed to bind socket %d to device %s: %s [%d].\n",
                                s, sa.sa_data, strerror(errno), errno);
                goto err_out_unmap;
        }
                
        ps = (struct packet_shared *)mmap_ptr;
        
        while (1) {
                err = dump_network(ps->offset, mmap_ptr + PAGE_SIZE + 
ps->offset);
                if (err && ps->offset)
                        dump_data(mmap_ptr, ps->offset, 32);
        }

        err = 0;
        
err_out_unmap:
        munmap(mmap_ptr, mmap_size);
err_out_close:
        close(s);

        return err;
}

Reply via email to