PF_KEY protocol suite in the Linux kernel implementation (1)

2011-01-10  来源:本站原创  分类:Internet  人气:91 

This document Copyleft owned yfydz all use under the GPL, can be freely copied, reproduced, reprinted, please maintain the integrity of the document, for any commercial purposes is strictly prohibited.
msn: [email protected]
Source: http://yfydz.cublog.cn

1.   Foreword  

  Linux2.6 kernel comes in  PF_KEY  Protocol suite implementation, so do not like  2.4  As the patch to achieve. Kernel  PF_KEY  To achieve to complete the function is to achieve the security alliance to maintain core  (SA)  And security policy  (SP)  Databases, as well as the interface and the user space  .

  The following kernel code version 2.6.19.2, PF_KEY related code  net/key/  Directory, defines the kernel  PF_KEY  And user-space interface, the interface is  RFC  Definition and, therefore, are basically similar to achieve  :  However, the specific on SA and  SP  Internal implementation and management is implementation dependent, all to achieve their different  ,  In the linux kernel is to use  xfrm  Library to achieve, code  net/xfrm/  Directory defined  .

2.   Data Structures
  About SA and  SP  The data structure has been defined in RFC2367  ,   Header file for the include/linux/pfkeyv2.h, these are the user space and kernel space to share  ,  Just as the interface data structure  :  The kernel data structures used for the specific definition of the structure xfrm  ,  In include / net / xfrm.h defined  .

2.1 PF_KEY  Type  sock

struct pfkey_sock {
 /* struct sock must be the first member of struct pfkey_sock */
 struct sock sk;
//   Add two parameters than the average sock
//   Whether to register
 int  registered;
//   Is promiscuous mode
 int  promisc;
};

2.2   Status  (SA)

xfrm  Used to describe the state of SA in the concrete realization of the kernel  :
struct xfrm_state
{
 /* Note: bydst is re-used during gc */
//   Structure of each state in the list attached to the three HASH
 struct hlist_node bydst; //   By destination address  HASH
 struct hlist_node bysrc; //   By source address  HASH
 struct hlist_node byspi; //   According to SPI value  HASH
 atomic_t  refcnt; //   All use count
 spinlock_t  lock;   //   State Lock
 struct xfrm_id  id; // ID
 struct xfrm_selector sel; //   State selector
 u32   genid;
 /* Key manger bits */
 struct {
  u8  state;
  u8  dying;
  u32  seq;
 } km;
 /* Parameters of this state. */
 struct {
  u32  reqid;
  u8  mode;
  u8  replay_window;
  u8  aalgo, ealgo, calgo;
  u8  flags;
  u16  family;
  xfrm_address_t saddr;
  int  header_len;
  int  trailer_len;
 } props;
 struct xfrm_lifetime_cfg lft; //   Survival time
 /* Data for transformer */
 struct xfrm_algo *aalg; // hash  Algorithm
 struct xfrm_algo *ealg; //   Encryption algorithm
 struct xfrm_algo *calg; //   Compression algorithm
 /* Data for encapsulator */
 struct xfrm_encap_tmpl *encap; // NAT-T  Package Information
 /* Data for care-of address */
 xfrm_address_t *coaddr;
 /* IPComp needs an IPIP tunnel for handling uncompressed packets */
 struct xfrm_state *tunnel;
 /* If a tunnel, number of users + 1 */
 atomic_t  tunnel_users;
 /* State for replay detection */
 struct xfrm_replay_state replay;
 /* Replay detection state at the time we sent the last notification */
 struct xfrm_replay_state preplay;
 /* internal flag that only holds state for delayed aevent at the
  * moment
 */
 u32   xflags;
 /* Replay detection notification settings */
 u32   replay_maxage;
 u32   replay_maxdiff;
 /* Replay detection notification timer */
 struct timer_list rtimer;
 /* Statistics */
 struct xfrm_stats stats;
 struct xfrm_lifetime_cur curlft;
 struct timer_list timer;
 /* Last used time */
 u64   lastused;
 /* Reference to data common to all the instances of this
  * transformer. */
 struct xfrm_type *type;
 struct xfrm_mode *mode;
 /* Security context */
 struct xfrm_sec_ctx *security;
 /* Private data of this transformer, format is opaque,
  * interpreted by xfrm_type methods. */
 void   *data;
};

2.3   Strategy  (SP)

struct xfrm_policy
{
 struct xfrm_policy *next; //   The next strategy
 struct hlist_node bydst; //   HASH of the list according to the destination address
 struct hlist_node byidx; //   HASH of the list by index number
 /* This lock only affects elements except for entry. */
 rwlock_t  lock;
 atomic_t  refcnt;
 struct timer_list timer;
 u8   type;
 u32   priority;
 u32   index;
 struct xfrm_selector selector;
 struct xfrm_lifetime_cfg lft;
 struct xfrm_lifetime_cur curlft;
 struct dst_entry       *bundles;
 __u16   family;
 __u8   action;
 __u8   flags;
 __u8   dead;
 __u8   xfrm_nr;
 struct xfrm_sec_ctx *security;
 struct xfrm_tmpl        xfrm_vec[XFRM_MAX_DEPTH];
};

2.4   Event
struct km_event
{
 union {
  u32 hard;
  u32 proto;
  u32 byid;
  u32 aevent;
  u32 type;
 } data;
 u32 seq;
 u32 pid;
 u32 event;
};

3.   Initialization
/* net/key/af_key.c */
static int __init ipsec_pfkey_init(void)
{
//   Registration key_proto structure  ,   The structure is defined as follows  :
// static struct proto key_proto = {
// .name   = "KEY",
// .owner   = THIS_MODULE,
// .obj_size = sizeof(struct pfkey_sock),
//};
//   The last parameter is 0, means no  slab  Distribution, simply will  key_proto  Structure
//   Attached to the system network protocol list, this structure is to inform the main  pfkey sock  The size of the structure
 int err = proto_register(&key_proto, 0);
 if (err != 0)
  goto out;
//   Registration pfkey protocol suite structure of the operation
 err = sock_register(&pfkey_family_ops);
 if (err != 0)
  goto out_unregister_key_proto;
#ifdef CONFIG_PROC_FS
 err = -ENOMEM;
//   The establishment of the read-only pfkey  PROC  File  : /proc/net/pfkey
 if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL)
  goto out_sock_unregister;
#endif
//   Registration notification  (notify)  Process  pfkeyv2_mgr
 err = xfrm_register_km(&pfkeyv2_mgr);
 if (err != 0)
  goto out_remove_proc_entry;
out:
 return err;
out_remove_proc_entry:
#ifdef CONFIG_PROC_FS
 remove_proc_entry("net/pfkey", NULL);
out_sock_unregister:
#endif
 sock_unregister(PF_KEY);
out_unregister_key_proto:
 proto_unregister(&key_proto);
 goto out;
}

4. pfkey  Socket operation  

4.1    Establishing socket  

/* net/key/af_key.c */
// pfkey  Protocol suite operation in the user program  socket  Open pfkey type  socket  Call ,
//   Create function in the corresponding  __sock_create(net/socket.c)  Function call  :
static struct net_proto_family pfkey_family_ops = {
 .family = PF_KEY,
 .create = pfkey_create,
 .owner = THIS_MODULE,
};
//   In user space every time you open pfkey socket this function will be called  :
static int pfkey_create(struct socket *sock, int protocol)
{
 struct sock *sk;
 int err;
//   The establishment of PFKEY  socket  Must have ROOT privileges
 if (!capable(CAP_NET_ADMIN))
  return -EPERM;
// socket  Type must be RAW, agreement  PF_KEY_V2
 if (sock->type != SOCK_RAW)
  return -ESOCKTNOSUPPORT;
 if (protocol != PF_KEY_V2)
  return -EPROTONOSUPPORT;
 err = -ENOMEM;
//   Distribution sock structure  ,   And clear
 sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1);
 if (sk == NULL)
  goto out;
// PFKEY  Type socket operation
 sock->ops = &pfkey_ops;
//   Socket initialization parameters
 sock_init_data(sock, sk);
//   Initialize the type of sock and release functions of the family
 sk->sk_family = PF_KEY;
 sk->sk_destruct = pfkey_sock_destruct;
//   Increased use of number
 atomic_inc(&pfkey_socks_nr);
//   The sock attached to the system  sock  List
 pfkey_insert(sk);
 return 0;
out:
 return err;
}

4.2 PF_KEY  Socket operation  

static const struct proto_ops pfkey_ops = {
 .family  = PF_KEY,
 .owner  = THIS_MODULE,
 /* Operations that make no sense on pfkey sockets. */
 .bind  = sock_no_bind,
 .connect = sock_no_connect,
 .socketpair = sock_no_socketpair,
 .accept  = sock_no_accept,
 .getname = sock_no_getname,
 .ioctl  = sock_no_ioctl,
 .listen  = sock_no_listen,
 .shutdown = sock_no_shutdown,
 .setsockopt = sock_no_setsockopt,
 .getsockopt = sock_no_getsockopt,
 .mmap  = sock_no_mmap,
 .sendpage = sock_no_sendpage,
 /* Now the operations that really occur. */
 .release = pfkey_release,
 .poll  = datagram_poll,
 .sendmsg = pfkey_sendmsg,
 .recvmsg = pfkey_recvmsg,
};

PF_KEY  Most of the sock type operations are not defined  ,   This is because the data are PF_KEY within the unit in user space kernel space switching  ,  So the actual and network-related operations are not defined, so the data sent and introduced only between kernel and user space communication  .

4.2.1   Release socket  

static int pfkey_release(struct socket *sock)
{
//   From the socket to the  sock  Structural transformation
 struct sock *sk = sock->sk;
 if (!sk)
  return 0;
//   The sock from the system  sock  Off the list
 pfkey_remove(sk);
//   Set sock state  DEAD,   The sock is empty  socket  And sleep pointer
 sock_orphan(sk);
 sock->sk = NULL;
//   Clear current data queue
 skb_queue_purge(&sk->sk_write_queue);
//   Release  sock
 sock_put(sk);
 return 0;
}

4.2.2   Descriptor selection  

  Using a standard datagram selection function  : datagram_poll

4.2.3   Send data  

  Actually send the data from kernel space to user space program  :
static int pfkey_sendmsg(struct kiocb *kiocb,
    struct socket *sock, struct msghdr *msg, size_t len)
{
 struct sock *sk = sock->sk;
 struct sk_buff *skb = NULL;
 struct sadb_msg *hdr = NULL;
 int err;
 err = -EOPNOTSUPP;
// PF_KEY  MSG_OOB flag is not supported
 if (msg->msg_flags & MSG_OOB)
  goto out;
 err = -EMSGSIZE;
//   A length of not sending too much data
 if ((unsigned)len > sk->sk_sndbuf - 32)
  goto out;
 err = -ENOBUFS;
//   Get a free  skbuff
 skb = alloc_skb(len, GFP_KERNEL);
 if (skb == NULL)
  goto out;
 err = -EFAULT;
//   Copy data from the buffer to skbuff in
 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
  goto out;
//   SADB head pointer for the data
 hdr = pfkey_get_base_msg(skb, &err);
 if (!hdr)
  goto out;
 mutex_lock(&xfrm_cfg_mutex);
//   Send the data processing PFKEY
 err = pfkey_process(sk, skb, hdr);
 mutex_unlock(&xfrm_cfg_mutex);
out:
 if (err && hdr && pfkey_error(hdr, err, sk) == 0)
  err = 0;
 if (skb)
  kfree_skb(skb);
 return err ? : len;
}

static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr)
{
 void *ext_hdrs[SADB_EXT_MAX];
 int err;
//   Sent to the promiscuous mode of the sock  SA  News
 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
   BROADCAST_PROMISC_ONLY, NULL);
 memset(ext_hdrs, 0, sizeof(ext_hdrs));
//   SADB header parsing the message type
 err = parse_exthdrs(skb, hdr, ext_hdrs);
 if (!err) {
  err = -EOPNOTSUPP;
//   According to the message processing functions related to the type of call processing
  if (pfkey_funcs[hdr->sadb_msg_type])
   err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs);
 }
 return err;
}
4.2.4   Receive data  

  Actually send the data from user space to kernel space  :
static int pfkey_recvmsg(struct kiocb *kiocb,
    struct socket *sock, struct msghdr *msg, size_t len,
    int flags)
{
 struct sock *sk = sock->sk;
 struct sk_buff *skb;
 int copied, err;
 err = -EINVAL;
//   Only supports four types of signs
 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
  goto out;
 msg->msg_namelen = 0;
//   Receiving data packets
 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
 if (skb == NULL)
  goto out;
 copied = skb->len;
//   The data received over the receive buffer length, setting cut off mark
 if (copied > len) {
  msg->msg_flags |= MSG_TRUNC;
  copied = len;
 }
 skb->h.raw = skb->data;
//   Information packets will be copied to the receive buffer
 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 if (err)
  goto out_free;
//   Set Timestamp
 sock_recv_timestamp(msg, sk, skb);
 err = (flags & MSG_TRUNC) ? skb->len : copied;
out_free:
 skb_free_datagram(sk, skb);
out:
 return err;
}
4.2.5 pfkey  Broadcast  

pfkey  Radio is the core information in response to user space, all open  PF_KEY  Socket type of user space programs can receive  ,   So the user space program when a message is received to determine whether the message is to own, not to ignore  ,  This is more like broadcast netlink  .
/* Send SKB to all pfkey sockets matching selected criteria.  */
#define BROADCAST_ALL  0
#define BROADCAST_ONE  1
#define BROADCAST_REGISTERED 2
#define BROADCAST_PROMISC_ONLY 4
static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
      int broadcast_flags, struct sock *one_sk)
{
 struct sock *sk;
 struct hlist_node *node;
 struct sk_buff *skb2 = NULL;
 int err = -ESRCH;
 /* XXX Do we need something like netlink_overrun?  I think
  * XXX PF_KEY socket apps will not mind current behavior.
  */
 if (!skb)
  return -ENOMEM;
 pfkey_lock_table();
//   Through all of pfkey sock table  ,
 sk_for_each(sk, node, &pfkey_table) {
//   Get pfkey sock used to send a message
  struct pfkey_sock *pfk = pfkey_sk(sk);
  int err2;
  /* Yes, it means that if you are meant to receive this
   * pfkey message you receive it twice as promiscuous
   * socket.
   */
//   The pfkey sock is promiscuous mode  ,   First send time, as will be broadcast later sent  ,   So set the promiscuous mode  pfkey
// sock  Under normal circumstances will receive two
  if (pfk->promisc)
   pfkey_broadcast_one(skb, &skb2, allocation, sk);
  /* the exact target will be processed later */
//   If this is specified one_sk  one_sk  Corresponding to the user program will finally receive the package, now is not made in a loop
//   Made only after
  if (sk == one_sk)
   continue;
//   If it is not broadcast to all processes  , #define BROADCAST_ALL  0
  if (broadcast_flags != BROADCAST_ALL) {
//   If only broadcast to pfkey mixed-mode processes  ,   Skip the loop continues
   if (broadcast_flags & BROADCAST_PROMISC_ONLY)
    continue;
//   If only broadcast to the registration process and the sock did not register  ,   Skip the loop continues
   if ((broadcast_flags & BROADCAST_REGISTERED) &&
       !pfk->registered)
    continue;
//   Only broadcast to one, and  one_sk  With the use of this information will only be sent to  one_sk  And all the mixed modes  pfkey sock
   if (broadcast_flags & BROADCAST_ONE)
    continue;
  }
//   Sent to the  pfkey sock
  err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);
  /* Error is cleare after succecful sending to at least one
   * registered KM */
  if ((broadcast_flags & BROADCAST_REGISTERED) && err)
   err = err2;
 }
 pfkey_unlock_table();
//   If you specify one_sk, revert to the  pfkey sock  Send the  sock  The last received message
 if (one_sk != NULL)
  err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
//   Release  skb
 if (skb2)
  kfree_skb(skb2);
 kfree_skb(skb);
 return err;
}

//   Send a packet
static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
          gfp_t allocation, struct sock *sk)
{
 int err = -ENOBUFS;
 sock_hold(sk);
 if (*skb2 == NULL) {
// skb2  Package is a clone of the skb
  if (atomic_read(&skb->users) != 1) {
   *skb2 = skb_clone(skb, allocation);
  } else {
   *skb2 = skb;
//   Because the transmitter will reduce the use of skb count
   atomic_inc(&skb->users);
  }
 }
 if (*skb2 != NULL) {
//   When actually sent  skb2
  if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
   skb_orphan(*skb2);
   skb_set_owner_r(*skb2, sk);
   skb_queue_tail(&sk->sk_receive_queue, *skb2);
   sk->sk_data_ready(sk, (*skb2)->len);
   *skb2 = NULL;
   err = 0;
  }
 }
 sock_put(sk);
 return err;
}
......   Continued   ......

  At  : 2007-05-07,  Modified  : 2007-05-07 20:10,  View 3512 times  ,  There are 2 comments     Recommend Complaint
          Friends  :   Site users time  :2007-05-30 10:54:31 IP  Address  :58.211.149.  ★  

  And if pfkey_sendmsg  pfkey_recvmsg  Against the right direction to engage? Pfkey_sendmsg corresponding  socket write  Operations, from user space to kernel space to send data  (  Write ); pfkey_recvmsg  Corresponding to the socket read operation  ,  Send an announcement from the kernel to the user, the user to read  .

BTW:   Man, look at the code is only linked  ,  To see whether it is doing otherwise is just a sentence  C  Language, what use is it  ?

          Friends  : yfydz         Time  :2007-05-30 13:43:09 IP  Address  :218.247.216.  ★  

  Well, is written backwards  

  Code and then how, taken together, do not have to look at a sentence  ?  I have cut open into blocks, and do not want to look around to see the process too much detail can be
相关文章