http://www.skyfree.org/linux/kernel_network/socket.html
INET
Sockets Tour: Where is the roots of socket()?
Last
updated 2002-01-31 10:43 am
SUMMARY
socket(), a glibc-2.0 library
function, execute a system call (software interrupt 0x80) with a
function number of 102 and a sub-function number of 1. Finally, the
system call branches to sys_socket().
|
I can't find the definition of socket() in Linux kernel source listings!
| Do you know where is the
definition of socket function? |
| It is not directly described in
the kernel source, but it resides in glibc-2.0. |
| Let's explore the source! |
Test code
| To examine the origin of socket
function, I wrote a simple program. |
| Do not forget to include
<sys/socket.h> definition file. |
| socktest.c |
#include <sys/socket.h>
int main(int argc, char** argv)
{
int res = socket( 1, 2, 3 ); /* Call socket function */
}
|
|
| Then compile the source and
generate an executable file. |
| Dump the a.out using nm command. |
|
nm reveals
that socket() is a library function from glibc-2.0.
|
| The location of socket() |
~$ gcc socktes.c
~$ nm a.out
.....
080483d4 T main
.....
U socket@@GLIBC_2.0
|
|
|
Next,
compile the source with debugging (-g) and static link (-static) options.
|
|
This
version of a.out is so huge (the former is 4.7K bytes and this one is
246K bytes!), but it includes a body of socket().
|
| Execute gdb command. |
|
Finally,
you will find the substance of socket function.
|
It simply
activates int 0x80 with a function number of 102 (decimal) and a sub-function
number 1.
|
| Compile, link statically and
disassemble |
~$ gcc -static -g socktes.c
~$ gdb a.out
GNU gdb 19990928
Copyright 1998 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "i686-pc-linux-gnu"...
(gdb) disassemble socket
Dump of assembler code for function socket:
0x804ca90 <socket>: mov %ebx,%edx
0x804ca92 <socket+2>: mov $0x66,%eax
0x804ca97 <socket+7>: mov $0x1,%ebx
0x804ca9c <socket+12>: lea 0x4(%esp,1),%ecx
0x804caa0 <socket+16>: int $0x80
0x804caa2 <socket+18>: mov %edx,%ebx
0x804caa4 <socket+20>: cmp $0xffffff83,%eax
0x804caa7 <socket+23>: jae 0x804ccd0 <__syscall_error>
0x804caad <socket+29>: ret
0x804caae <socket+30>: nop
0x804caaf <socket+31>: nop
End of assembler dump.
(gdb)
|
|
Into the kernel
| There is a system call table
(sys_call_table) in the kernel. |
| 102nd entry of the table is sys_socketcall. |
| arch/i386/kernel/entry.S |
ENTRY(system_call)
pushl %eax # save orig_eax
SAVE_ALL
GET_CURRENT(%ebx)
cmpl $(NR_syscalls),%eax
jae badsys
testb $0x02,tsk_ptrace(%ebx) # PT_TRACESYS
jne tracesys
call *SYMBOL_NAME(sys_call_table)(,%eax,4)
movl %eax,EAX(%esp) # save the return value
.....
.....
.data
ENTRY(sys_call_table)
.....
.long SYMBOL_NAME(sys_fstatfs) /* 100 */
.long SYMBOL_NAME(sys_ioperm)
.long SYMBOL_NAME(sys_socketcall) /* 102 */
.....
|
|
| Sub-function number 1 is mapped to sys_socket() |
| include/linux/net.h |
#define SYS_SOCKET 1 /* sys_socket(2) */
#define SYS_BIND 2 /* sys_bind(2) */
#define SYS_CONNECT 3 /* sys_connect(2) */
#define SYS_LISTEN 4 /* sys_listen(2) */
#define SYS_ACCEPT 5 /* sys_accept(2) */
#define SYS_GETSOCKNAME 6 /* sys_getsockname(2) */
#define SYS_GETPEERNAME 7 /* sys_getpeername(2) */
#define SYS_SOCKETPAIR 8 /* sys_socketpair(2) */
#define SYS_SEND 9 /* sys_send(2) */
#define SYS_RECV 10 /* sys_recv(2) */
#define SYS_SENDTO 11 /* sys_sendto(2) */
#define SYS_RECVFROM 12 /* sys_recvfrom(2) */
#define SYS_SHUTDOWN 13 /* sys_shutdown(2) */
#define SYS_SETSOCKOPT 14 /* sys_setsockopt(2) */
#define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */
|
|
| Here is an entrance to the sockets
world!. |
| net/socket.c/sys_socketcall |
/*
* System call vectors.
*
* Argument checking cleaned up. Saved 20% in size.
* This function doesn't need to set the kernel lock because
* it is set by the callees.
*/
asmlinkage long sys_socketcall(int call, unsigned long *args)
{
unsigned long a[6];
unsigned long a0,a1;
int err;
if(call<1||call>SYS_RECVMSG)
return -EINVAL;
/* copy_from_user should be SMP safe. */
if (copy_from_user(a, args, nargs[call]))
return -EFAULT;
a0=a[0];
a1=a[1];
switch(call)
{
case SYS_SOCKET:
err = sys_socket(a0,a1,a[2]);
break;
case SYS_BIND:
err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
break;
case SYS_CONNECT:
err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
break;
case SYS_LISTEN:
err = sys_listen(a0,a1);
break;
case SYS_ACCEPT:
err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
break;
case SYS_GETSOCKNAME:
err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
break;
case SYS_GETPEERNAME:
err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
break;
case SYS_SOCKETPAIR:
err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
break;
case SYS_SEND:
err = sys_send(a0, (void *)a1, a[2], a[3]);
break;
case SYS_SENDTO:
err = sys_sendto(a0,(void *)a1, a[2], a[3],
(struct sockaddr *)a[4], a[5]);
break;
case SYS_RECV:
err = sys_recv(a0, (void *)a1, a[2], a[3]);
break;
case SYS_RECVFROM:
err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
(struct sockaddr *)a[4], (int *)a[5]);
break;
case SYS_SHUTDOWN:
err = sys_shutdown(a0,a1);
break;
case SYS_SETSOCKOPT:
err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
break;
case SYS_GETSOCKOPT:
err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
break;
case SYS_SENDMSG:
err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
break;
case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
break;
default:
err = -EINVAL;
break;
}
return err;
}
|
|
| Finally, we reached sys_cocket()
from socket()! |
| net/socket.c/sys_socket() |
asmlinkage long sys_socket(int family, int type, int protocol)
{
int retval;
struct socket *sock;
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
goto out;
retval = sock_map_fd(sock);
if (retval < 0)
goto out_release;
out:
/* It may be already another descriptor 8) Not kernel problem. */
return retval;
out_release:
sock_release(sock);
return retval;
}
int sock_create(int family, int type, int protocol, struct socket **res)
{
int i;
struct socket *sock;
/*
* Check protocol is in range
*/
if(family<0 || family>=NPROTO)
return -EINVAL;
.....
net_family_read_lock();
if (net_families[family] == NULL) {
i = -EINVAL;
goto out;
}
/*
* Allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
*/
if (!(sock = sock_alloc()))
{
printk(KERN_WARNING "socket: no more sockets\n");
i = -ENFILE; /* Not exactly a match, but its the
closest posix thing */
goto out;
}
sock->type = type;
if ((i = net_families[family]->create(sock, protocol)) < 0)
{
sock_release(sock);
goto out;
}
*res = sock;
out:
net_family_read_unlock();
return i;
}
/**
* sock_alloc - allocate a socket
*
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
* NULL is returned.
*/
struct socket *sock_alloc(void)
{
struct inode * inode;
struct socket * sock;
inode = get_empty_inode();
if (!inode)
return NULL;
sock = socki_lookup(inode);
inode->i_mode = S_IFSOCK|S_IRWXUGO;
inode->i_sock = 1;
inode->i_uid = current->fsuid;
inode->i_gid = current->fsgid;
sock->inode = inode;
init_waitqueue_head(&sock->wait);
sock->fasync_list = NULL;
sock->state = SS_UNCONNECTED;
sock->flags = 0;
sock->ops = NULL;
sock->sk = NULL;
sock->file = NULL;
sockets_in_use[smp_processor_id()].counter++;
return sock;
}
extern __inline__ struct socket *socki_lookup(struct inode *inode)
{
return &inode->u.socket_i;
}
/*
* Obtains the first available file descriptor and sets it up for use.
*
* This functions creates file structure and maps it to fd space
* of current process. On success it returns file descriptor
* and file struct implicitly stored in sock->file.
* Note that another thread may close file descriptor before we return
* from this function. We use the fact that now we do not refer
* to socket after mapping. If one day we will need it, this
* function will inincrement ref. count on file by 1.
*
* In any case returned fd MAY BE not valid!
* This race condition is inavoidable
* with shared fd spaces, we cannot solve is inside kernel,
* but we take care of internal coherence yet.
*/
static int sock_map_fd(struct socket *sock)
{
int fd;
/*
* Find a file descriptor suitable for return to the user.
*/
fd = get_unused_fd();
if (fd >= 0) {
struct file *file = get_empty_filp();
if (!file) {
put_unused_fd(fd);
fd = -ENFILE;
goto out;
}
file->f_dentry = d_alloc_root(sock->inode);
/* MOUNT_REWRITE: set to sockfs internal vfsmnt */
file->f_vfsmnt = NULL;
if (!file->f_dentry) {
put_filp(file);
put_unused_fd(fd);
fd = -ENOMEM;
goto out;
}
sock->file = file;
file->f_op = &socket_file_ops;
file->f_mode = 3;
file->f_flags = O_RDWR;
file->f_pos = 0;
fd_install(fd, file);
}
out:
return fd;
}
/*
* Socket files have a set of 'special' operations as well as the generic file ones. Th
ese don't appear
* in the operation structures but are done directly via the socketcall() multiplexor.
*/
static struct file_operations socket_file_ops = {
llseek: sock_lseek,
read: sock_read,
write: sock_write,
poll: sock_poll,
ioctl: sock_ioctl,
mmap: sock_mmap,
open: sock_no_open, /* special open code to disallow open via /proc */
release: sock_close,
fasync: sock_fasync,
readv: sock_readv,
writev: sock_writev
};
|
|
To be continued.
|