3
0
mirror of https://github.com/Qortal/Brooklyn.git synced 2025-01-30 14:52:17 +00:00

T3Q being reported to gay IRS cuz he is gay

Update / Redo the entire stack to juice up like no other was juiced before.
This commit is contained in:
Scare Crowe 2021-10-22 13:54:53 +05:00
parent 4dac4c855c
commit 442d22459d
124 changed files with 56242 additions and 3 deletions

3
certs/.gitignore vendored
View File

@ -1,3 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
/x509_certificate_list
/x509_revocation_list

304
init/do_mounts_md.c Normal file
View File

@ -0,0 +1,304 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/delay.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_p.h>
#include "do_mounts.h"
/*
* When md (and any require personalities) are compiled into the kernel
* (not a module), arrays can be assembles are boot time using with AUTODETECT
* where specially marked partitions are registered with md_autodetect_dev(),
* and with MD_BOOT where devices to be collected are given on the boot line
* with md=.....
* The code for that is here.
*/
#ifdef CONFIG_MD_AUTODETECT
static int __initdata raid_noautodetect;
#else
static int __initdata raid_noautodetect=1;
#endif
static int __initdata raid_autopart;
static struct {
int minor;
int partitioned;
int level;
int chunk;
char *device_names;
} md_setup_args[256] __initdata;
static int md_setup_ents __initdata;
/*
* Parse the command-line parameters given our kernel, but do not
* actually try to invoke the MD device now; that is handled by
* md_setup_drive after the low-level disk drivers have initialised.
*
* 27/11/1999: Fixed to work correctly with the 2.3 kernel (which
* assigns the task of parsing integer arguments to the
* invoked program now). Added ability to initialise all
* the MD devices (by specifying multiple "md=" lines)
* instead of just one. -- KTK
* 18May2000: Added support for persistent-superblock arrays:
* md=n,0,factor,fault,device-list uses RAID0 for device n
* md=n,-1,factor,fault,device-list uses LINEAR for device n
* md=n,device-list reads a RAID superblock from the devices
* elements in device-list are read by name_to_kdev_t so can be
* a hex number or something like /dev/hda1 /dev/sdb
* 2001-06-03: Dave Cinege <dcinege@psychosis.com>
* Shifted name_to_kdev_t() and related operations to md_set_drive()
* for later execution. Rewrote section to make devfs compatible.
*/
static int __init md_setup(char *str)
{
int minor, level, factor, fault, partitioned = 0;
char *pername = "";
char *str1;
int ent;
if (*str == 'd') {
partitioned = 1;
str++;
}
if (get_option(&str, &minor) != 2) { /* MD Number */
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
return 0;
}
str1 = str;
for (ent=0 ; ent< md_setup_ents ; ent++)
if (md_setup_args[ent].minor == minor &&
md_setup_args[ent].partitioned == partitioned) {
printk(KERN_WARNING "md: md=%s%d, Specified more than once. "
"Replacing previous definition.\n", partitioned?"d":"", minor);
break;
}
if (ent >= ARRAY_SIZE(md_setup_args)) {
printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor);
return 0;
}
if (ent >= md_setup_ents)
md_setup_ents++;
switch (get_option(&str, &level)) { /* RAID level */
case 2: /* could be 0 or -1.. */
if (level == 0 || level == LEVEL_LINEAR) {
if (get_option(&str, &factor) != 2 || /* Chunk Size */
get_option(&str, &fault) != 2) {
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
return 0;
}
md_setup_args[ent].level = level;
md_setup_args[ent].chunk = 1 << (factor+12);
if (level == LEVEL_LINEAR)
pername = "linear";
else
pername = "raid0";
break;
}
/* FALL THROUGH */
case 1: /* the first device is numeric */
str = str1;
/* FALL THROUGH */
case 0:
md_setup_args[ent].level = LEVEL_NONE;
pername="super-block";
}
printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n",
minor, pername, str);
md_setup_args[ent].device_names = str;
md_setup_args[ent].partitioned = partitioned;
md_setup_args[ent].minor = minor;
return 1;
}
static void __init md_setup_drive(void)
{
int minor, i, ent, partitioned;
dev_t dev;
dev_t devices[MD_SB_DISKS+1];
for (ent = 0; ent < md_setup_ents ; ent++) {
int fd;
int err = 0;
char *devname;
mdu_disk_info_t dinfo;
char name[16];
minor = md_setup_args[ent].minor;
partitioned = md_setup_args[ent].partitioned;
devname = md_setup_args[ent].device_names;
sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
if (partitioned)
dev = MKDEV(mdp_major, minor << MdpMinorShift);
else
dev = MKDEV(MD_MAJOR, minor);
create_dev(name, dev);
for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
char *p;
char comp_name[64];
u32 rdev;
p = strchr(devname, ',');
if (p)
*p++ = 0;
dev = name_to_dev_t(devname);
if (strncmp(devname, "/dev/", 5) == 0)
devname += 5;
snprintf(comp_name, 63, "/dev/%s", devname);
rdev = bstat(comp_name);
if (rdev)
dev = new_decode_dev(rdev);
if (!dev) {
printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
break;
}
devices[i] = dev;
devname = p;
}
devices[i] = 0;
if (!i)
continue;
printk(KERN_INFO "md: Loading md%s%d: %s\n",
partitioned ? "_d" : "", minor,
md_setup_args[ent].device_names);
fd = ksys_open(name, 0, 0);
if (fd < 0) {
printk(KERN_ERR "md: open failed - cannot start "
"array %s\n", name);
continue;
}
if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
printk(KERN_WARNING
"md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
minor);
ksys_close(fd);
continue;
}
if (md_setup_args[ent].level != LEVEL_NONE) {
/* non-persistent */
mdu_array_info_t ainfo;
ainfo.level = md_setup_args[ent].level;
ainfo.size = 0;
ainfo.nr_disks =0;
ainfo.raid_disks =0;
while (devices[ainfo.raid_disks])
ainfo.raid_disks++;
ainfo.md_minor =minor;
ainfo.not_persistent = 1;
ainfo.state = (1 << MD_SB_CLEAN);
ainfo.layout = 0;
ainfo.chunk_size = md_setup_args[ent].chunk;
err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
for (i = 0; !err && i <= MD_SB_DISKS; i++) {
dev = devices[i];
if (!dev)
break;
dinfo.number = i;
dinfo.raid_disk = i;
dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
dinfo.major = MAJOR(dev);
dinfo.minor = MINOR(dev);
err = ksys_ioctl(fd, ADD_NEW_DISK,
(long)&dinfo);
}
} else {
/* persistent */
for (i = 0; i <= MD_SB_DISKS; i++) {
dev = devices[i];
if (!dev)
break;
dinfo.major = MAJOR(dev);
dinfo.minor = MINOR(dev);
ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
}
}
if (!err)
err = ksys_ioctl(fd, RUN_ARRAY, 0);
if (err)
printk(KERN_WARNING "md: starting md%d failed\n", minor);
else {
/* reread the partition table.
* I (neilb) and not sure why this is needed, but I cannot
* boot a kernel with devfs compiled in from partitioned md
* array without it
*/
ksys_close(fd);
fd = ksys_open(name, 0, 0);
ksys_ioctl(fd, BLKRRPART, 0);
}
ksys_close(fd);
}
}
static int __init raid_setup(char *str)
{
int len, pos;
len = strlen(str) + 1;
pos = 0;
while (pos < len) {
char *comma = strchr(str+pos, ',');
int wlen;
if (comma)
wlen = (comma-str)-pos;
else wlen = (len-1)-pos;
if (!strncmp(str, "noautodetect", wlen))
raid_noautodetect = 1;
if (!strncmp(str, "autodetect", wlen))
raid_noautodetect = 0;
if (strncmp(str, "partitionable", wlen)==0)
raid_autopart = 1;
if (strncmp(str, "part", wlen)==0)
raid_autopart = 1;
pos += wlen+1;
}
return 1;
}
__setup("raid=", raid_setup);
__setup("md=", md_setup);
static void __init autodetect_raid(void)
{
int fd;
/*
* Since we don't want to detect and use half a raid array, we need to
* wait for the known devices to complete their probing
*/
printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n");
printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n");
wait_for_device_probe();
fd = ksys_open("/dev/md0", 0, 0);
if (fd >= 0) {
ksys_ioctl(fd, RAID_AUTORUN, raid_autopart);
ksys_close(fd);
}
}
void __init md_run_setup(void)
{
create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
if (raid_noautodetect)
printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
else
autodetect_raid();
md_setup_drive();
}

104
net/ceph/ceph_fs.c Normal file
View File

@ -0,0 +1,104 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Some non-inline ceph helpers
*/
#include <linux/module.h>
#include <linux/ceph/types.h>
/*
* return true if @layout appears to be valid
*/
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
{
__u32 su = layout->stripe_unit;
__u32 sc = layout->stripe_count;
__u32 os = layout->object_size;
/* stripe unit, object size must be non-zero, 64k increment */
if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
return 0;
if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
return 0;
/* object size must be a multiple of stripe unit */
if (os < su || os % su)
return 0;
/* stripe count must be non-zero */
if (!sc)
return 0;
return 1;
}
void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
struct ceph_file_layout_legacy *legacy)
{
fl->stripe_unit = le32_to_cpu(legacy->fl_stripe_unit);
fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
fl->object_size = le32_to_cpu(legacy->fl_object_size);
fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
fl->stripe_count == 0 && fl->object_size == 0)
fl->pool_id = -1;
}
EXPORT_SYMBOL(ceph_file_layout_from_legacy);
void ceph_file_layout_to_legacy(struct ceph_file_layout *fl,
struct ceph_file_layout_legacy *legacy)
{
legacy->fl_stripe_unit = cpu_to_le32(fl->stripe_unit);
legacy->fl_stripe_count = cpu_to_le32(fl->stripe_count);
legacy->fl_object_size = cpu_to_le32(fl->object_size);
if (fl->pool_id >= 0)
legacy->fl_pg_pool = cpu_to_le32(fl->pool_id);
else
legacy->fl_pg_pool = 0;
}
EXPORT_SYMBOL(ceph_file_layout_to_legacy);
int ceph_flags_to_mode(int flags)
{
int mode;
#ifdef O_DIRECTORY /* fixme */
if ((flags & O_DIRECTORY) == O_DIRECTORY)
return CEPH_FILE_MODE_PIN;
#endif
switch (flags & O_ACCMODE) {
case O_WRONLY:
mode = CEPH_FILE_MODE_WR;
break;
case O_RDONLY:
mode = CEPH_FILE_MODE_RD;
break;
case O_RDWR:
case O_ACCMODE: /* this is what the VFS does */
mode = CEPH_FILE_MODE_RDWR;
break;
}
#ifdef O_LAZY
if (flags & O_LAZY)
mode |= CEPH_FILE_MODE_LAZY;
#endif
return mode;
}
EXPORT_SYMBOL(ceph_flags_to_mode);
int ceph_caps_for_mode(int mode)
{
int caps = CEPH_CAP_PIN;
if (mode & CEPH_FILE_MODE_RD)
caps |= CEPH_CAP_FILE_SHARED |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
if (mode & CEPH_FILE_MODE_WR)
caps |= CEPH_CAP_FILE_EXCL |
CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
if (mode & CEPH_FILE_MODE_LAZY)
caps |= CEPH_CAP_FILE_LAZYIO;
return caps;
}
EXPORT_SYMBOL(ceph_caps_for_mode);

3116
net/core/ethtool.c Normal file

File diff suppressed because it is too large Load Diff

226
net/ipv4/udp_tunnel.c Normal file
View File

@ -0,0 +1,226 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/udp.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <net/dst_metadata.h>
#include <net/net_namespace.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
{
int err;
struct socket *sock = NULL;
struct sockaddr_in udp_addr;
err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);
if (err < 0)
goto error;
if (cfg->bind_ifindex) {
err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
(void *)&cfg->bind_ifindex,
sizeof(cfg->bind_ifindex));
if (err < 0)
goto error;
}
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port;
err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
sizeof(udp_addr));
if (err < 0)
goto error;
if (cfg->peer_udp_port) {
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->peer_ip;
udp_addr.sin_port = cfg->peer_udp_port;
err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
sizeof(udp_addr), 0);
if (err < 0)
goto error;
}
sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
*sockp = sock;
return 0;
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}
*sockp = NULL;
return err;
}
EXPORT_SYMBOL(udp_sock_create4);
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct udp_tunnel_sock_cfg *cfg)
{
struct sock *sk = sock->sk;
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
inet_inc_convert_csum(sk);
rcu_assign_sk_user_data(sk, cfg->sk_user_data);
udp_sk(sk)->encap_type = cfg->encap_type;
udp_sk(sk)->encap_rcv = cfg->encap_rcv;
udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
udp_sk(sk)->encap_destroy = cfg->encap_destroy;
udp_sk(sk)->gro_receive = cfg->gro_receive;
udp_sk(sk)->gro_complete = cfg->gro_complete;
udp_tunnel_encap_enable(sock);
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
unsigned short type)
{
struct sock *sk = sock->sk;
struct udp_tunnel_info ti;
if (!dev->netdev_ops->ndo_udp_tunnel_add ||
!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
return;
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
}
EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
unsigned short type)
{
struct sock *sk = sock->sk;
struct udp_tunnel_info ti;
if (!dev->netdev_ops->ndo_udp_tunnel_del ||
!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
return;
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
}
EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
/* Notify netdevs that UDP port started listening */
void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct udp_tunnel_info ti;
struct net_device *dev;
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
if (!dev->netdev_ops->ndo_udp_tunnel_add)
continue;
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
continue;
dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port);
/* Notify netdevs that UDP port is no more listening */
void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct udp_tunnel_info ti;
struct net_device *dev;
ti.type = type;
ti.sa_family = sk->sk_family;
ti.port = inet_sk(sk)->inet_sport;
rcu_read_lock();
for_each_netdev_rcu(net, dev) {
if (!dev->netdev_ops->ndo_udp_tunnel_del)
continue;
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
continue;
dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
}
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port);
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
__be16 df, __be16 src_port, __be16 dst_port,
bool xnet, bool nocheck)
{
struct udphdr *uh;
__skb_push(skb, sizeof(*uh));
skb_reset_transport_header(skb);
uh = udp_hdr(skb);
uh->dest = dst_port;
uh->source = src_port;
uh->len = htons(skb->len);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
udp_set_csum(nocheck, skb, src, dst, skb->len);
iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
}
EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
void udp_tunnel_sock_release(struct socket *sock)
{
rcu_assign_sk_user_data(sock->sk, NULL);
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
__be16 flags, __be64 tunnel_id, int md_size)
{
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
if (family == AF_INET)
tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size);
else
tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size);
if (!tun_dst)
return NULL;
info = &tun_dst->u.tun_info;
info->key.tp_src = udp_hdr(skb)->source;
info->key.tp_dst = udp_hdr(skb)->dest;
if (udp_hdr(skb)->check)
info->key.tun_flags |= TUNNEL_CSUM;
return tun_dst;
}
EXPORT_SYMBOL_GPL(udp_tun_rx_dst);
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,29 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/module.h>
#include <net/netfilter/nf_tables_core.h>
static int __init nf_tables_set_module_init(void)
{
nft_register_set(&nft_set_hash_fast_type);
nft_register_set(&nft_set_hash_type);
nft_register_set(&nft_set_rhash_type);
nft_register_set(&nft_set_bitmap_type);
nft_register_set(&nft_set_rbtree_type);
return 0;
}
static void __exit nf_tables_set_module_exit(void)
{
nft_unregister_set(&nft_set_rbtree_type);
nft_unregister_set(&nft_set_bitmap_type);
nft_unregister_set(&nft_set_rhash_type);
nft_unregister_set(&nft_set_hash_type);
nft_unregister_set(&nft_set_hash_fast_type);
}
module_init(nf_tables_set_module_init);
module_exit(nf_tables_set_module_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NFT_SET();

269
net/rds/ib_fmr.c Normal file
View File

@ -0,0 +1,269 @@
/*
* Copyright (c) 2016 Oracle. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "ib_mr.h"
struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
{
struct rds_ib_mr_pool *pool;
struct rds_ib_mr *ibmr = NULL;
struct rds_ib_fmr *fmr;
int err = 0;
if (npages <= RDS_MR_8K_MSG_SIZE)
pool = rds_ibdev->mr_8k_pool;
else
pool = rds_ibdev->mr_1m_pool;
if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
/* Switch pools if one of the pool is reaching upper limit */
if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
if (pool->pool_type == RDS_IB_MR_8K_POOL)
pool = rds_ibdev->mr_1m_pool;
else
pool = rds_ibdev->mr_8k_pool;
}
ibmr = rds_ib_try_reuse_ibmr(pool);
if (ibmr)
return ibmr;
ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
rdsibdev_to_node(rds_ibdev));
if (!ibmr) {
err = -ENOMEM;
goto out_no_cigar;
}
fmr = &ibmr->u.fmr;
fmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
(IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_ATOMIC),
&pool->fmr_attr);
if (IS_ERR(fmr->fmr)) {
err = PTR_ERR(fmr->fmr);
fmr->fmr = NULL;
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err);
goto out_no_cigar;
}
ibmr->pool = pool;
if (pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
return ibmr;
out_no_cigar:
kfree(ibmr);
atomic_dec(&pool->item_count);
return ERR_PTR(err);
}
static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
struct rds_ib_mr *ibmr, struct scatterlist *sg,
unsigned int nents)
{
struct ib_device *dev = rds_ibdev->dev;
struct rds_ib_fmr *fmr = &ibmr->u.fmr;
struct scatterlist *scat = sg;
u64 io_addr = 0;
u64 *dma_pages;
u32 len;
int page_cnt, sg_dma_len;
int i, j;
int ret;
sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
if (unlikely(!sg_dma_len)) {
pr_warn("RDS/IB: %s failed!\n", __func__);
return -EBUSY;
}
len = 0;
page_cnt = 0;
for (i = 0; i < sg_dma_len; ++i) {
unsigned int dma_len = sg_dma_len(&scat[i]);
u64 dma_addr = sg_dma_address(&scat[i]);
if (dma_addr & ~PAGE_MASK) {
if (i > 0) {
ib_dma_unmap_sg(dev, sg, nents,
DMA_BIDIRECTIONAL);
return -EINVAL;
} else {
++page_cnt;
}
}
if ((dma_addr + dma_len) & ~PAGE_MASK) {
if (i < sg_dma_len - 1) {
ib_dma_unmap_sg(dev, sg, nents,
DMA_BIDIRECTIONAL);
return -EINVAL;
} else {
++page_cnt;
}
}
len += dma_len;
}
page_cnt += len >> PAGE_SHIFT;
if (page_cnt > ibmr->pool->fmr_attr.max_pages) {
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -EINVAL;
}
dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
rdsibdev_to_node(rds_ibdev));
if (!dma_pages) {
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
return -ENOMEM;
}
page_cnt = 0;
for (i = 0; i < sg_dma_len; ++i) {
unsigned int dma_len = sg_dma_len(&scat[i]);
u64 dma_addr = sg_dma_address(&scat[i]);
for (j = 0; j < dma_len; j += PAGE_SIZE)
dma_pages[page_cnt++] =
(dma_addr & PAGE_MASK) + j;
}
ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
if (ret) {
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
goto out;
}
/* Success - we successfully remapped the MR, so we can
* safely tear down the old mapping.
*/
rds_ib_teardown_mr(ibmr);
ibmr->sg = scat;
ibmr->sg_len = nents;
ibmr->sg_dma_len = sg_dma_len;
ibmr->remap_count++;
if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
ret = 0;
out:
kfree(dma_pages);
return ret;
}
struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev,
struct scatterlist *sg,
unsigned long nents,
u32 *key)
{
struct rds_ib_mr *ibmr = NULL;
struct rds_ib_fmr *fmr;
int ret;
ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
if (IS_ERR(ibmr))
return ibmr;
ibmr->device = rds_ibdev;
fmr = &ibmr->u.fmr;
ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
if (ret == 0)
*key = fmr->fmr->rkey;
else
rds_ib_free_mr(ibmr, 0);
return ibmr;
}
void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed,
unsigned long *unpinned, unsigned int goal)
{
struct rds_ib_mr *ibmr, *next;
struct rds_ib_fmr *fmr;
LIST_HEAD(fmr_list);
int ret = 0;
unsigned int freed = *nfreed;
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
list_for_each_entry(ibmr, list, unmap_list) {
fmr = &ibmr->u.fmr;
list_add(&fmr->fmr->list, &fmr_list);
}
ret = ib_unmap_fmr(&fmr_list);
if (ret)
pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret);
/* Now we can destroy the DMA mapping and unpin any pages */
list_for_each_entry_safe(ibmr, next, list, unmap_list) {
fmr = &ibmr->u.fmr;
*unpinned += ibmr->sg_len;
__rds_ib_teardown_mr(ibmr);
if (freed < goal ||
ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) {
if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
list_del(&ibmr->unmap_list);
ib_dealloc_fmr(fmr->fmr);
kfree(ibmr);
freed++;
}
}
*nfreed = freed;
}
void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr)
{
struct rds_ib_mr_pool *pool = ibmr->pool;
if (ibmr->remap_count >= pool->fmr_attr.max_maps)
llist_add(&ibmr->llnode, &pool->drop_list);
else
llist_add(&ibmr->llnode, &pool->free_list);
}

16
net/wireguard/Kbuild Normal file
View File

@ -0,0 +1,16 @@
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
ccflags-y := -O3
ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g
ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
ccflags-y += -Wframe-larger-than=2048
ccflags-$(if $(WIREGUARD_VERSION),y,) += -D'WIREGUARD_VERSION="$(WIREGUARD_VERSION)"'
wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o peerlookup.o allowedips.o ratelimiter.o cookie.o netlink.o
include $(src)/crypto/Kbuild.include
include $(src)/compat/Kbuild.include
obj-$(if $(KBUILD_EXTMOD),m,$(CONFIG_WIREGUARD)) := wireguard.o

33
net/wireguard/Kconfig Normal file
View File

@ -0,0 +1,33 @@
config WIREGUARD
tristate "IP: WireGuard secure network tunnel"
depends on NET && INET
depends on IPV6 || !IPV6
select NET_UDP_TUNNEL
select DST_CACHE
select CRYPTO
select CRYPTO_ALGAPI
select VFP
select VFPv3 if CPU_V7
select NEON if CPU_V7
select KERNEL_MODE_NEON if CPU_V7
default m
help
WireGuard is a secure, fast, and easy to use replacement for IPsec
that uses modern cryptography and clever networking tricks. It's
designed to be fairly general purpose and abstract enough to fit most
use cases, while at the same time remaining extremely simple to
configure. See www.wireguard.com for more info.
It's safe to say Y or M here, as the driver is very lightweight and
is only in use when an administrator chooses to add an interface.
config WIREGUARD_DEBUG
bool "Debugging checks and verbose messages"
depends on WIREGUARD
help
This will write log messages for handshake and other events
that occur for a WireGuard interface. It will also perform some
extra validation checks and unit tests at various points. This is
only useful for debugging.
Say N here unless you know what you're doing.

59
net/wireguard/Makefile Normal file
View File

@ -0,0 +1,59 @@
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
KERNELRELEASE ?= $(shell uname -r)
KERNELDIR ?= /lib/modules/$(KERNELRELEASE)/build
PREFIX ?= /usr
DESTDIR ?=
SRCDIR ?= $(PREFIX)/src
DKMSDIR ?= $(SRCDIR)/wireguard
DEPMOD ?= depmod
DEPMODBASEDIR ?= /
PWD := $(shell pwd)
all: module
debug: module-debug
ifneq ($(V),1)
MAKEFLAGS += --no-print-directory
endif
WIREGUARD_VERSION = $(patsubst v%,%,$(shell GIT_CEILING_DIRECTORIES="$(PWD)/../.." git describe --dirty 2>/dev/null))
module:
@$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules
module-debug:
@$(MAKE) -C $(KERNELDIR) M=$(PWD) V=1 CONFIG_WIREGUARD_DEBUG=y WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules
clean:
@$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
module-install:
@$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules_install
$(DEPMOD) -b "$(DEPMODBASEDIR)" -a $(KERNELRELEASE)
install: module-install
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
DKMS_SOURCES := version.h Makefile Kbuild Kconfig dkms.conf $(filter-out version.h wireguard.mod.c tests/%,$(call rwildcard,,*.c *.h *.S *.pl *.include))
dkms-install: $(DKMS_SOURCES)
@$(foreach f,$(DKMS_SOURCES),install -v -m0644 -D $(f) $(DESTDIR)$(DKMSDIR)/$(f);)
style:
$(KERNELDIR)/scripts/checkpatch.pl -f --max-line-length=4000 --codespell --color=always $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h) $(wildcard selftest/*.c)
check: clean
scan-build --html-title=wireguard-linux-compat -maxloop 100 --view --keep-going $(MAKE) module CONFIG_WIREGUARD_DEBUG=y C=2 CF="-D__CHECK_ENDIAN__"
coccicheck: clean
@$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_WIREGUARD_DEBUG=y coccicheck MODE=report
cloc:
@cloc --skip-uniqueness --by-file --extract-with="$$(readlink -f ../kernel-tree-scripts/filter-compat-defines.sh) >FILE< > \$$(basename >FILE<)" $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h)
-include tests/debug.mk
.PHONY: all module module-debug module-install install dkms-install clean cloc check style

382
net/wireguard/allowedips.c Normal file
View File

@ -0,0 +1,382 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "allowedips.h"
#include "peer.h"
static void swap_endian(u8 *dst, const u8 *src, u8 bits)
{
if (bits == 32) {
*(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
} else if (bits == 128) {
((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
}
}
static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
u8 cidr, u8 bits)
{
node->cidr = cidr;
node->bit_at_a = cidr / 8U;
#ifdef __LITTLE_ENDIAN
node->bit_at_a ^= (bits / 8U - 1U) % 8U;
#endif
node->bit_at_b = 7U - (cidr % 8U);
node->bitlen = bits;
memcpy(node->bits, src, bits / 8U);
}
#define CHOOSE_NODE(parent, key) \
parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
static void node_free_rcu(struct rcu_head *rcu)
{
kfree(container_of(rcu, struct allowedips_node, rcu));
}
static void push_rcu(struct allowedips_node **stack,
struct allowedips_node __rcu *p, unsigned int *len)
{
if (rcu_access_pointer(p)) {
WARN_ON(IS_ENABLED(DEBUG) && *len >= 128);
stack[(*len)++] = rcu_dereference_raw(p);
}
}
static void root_free_rcu(struct rcu_head *rcu)
{
struct allowedips_node *node, *stack[128] = {
container_of(rcu, struct allowedips_node, rcu) };
unsigned int len = 1;
while (len > 0 && (node = stack[--len])) {
push_rcu(stack, node->bit[0], &len);
push_rcu(stack, node->bit[1], &len);
kfree(node);
}
}
static void root_remove_peer_lists(struct allowedips_node *root)
{
struct allowedips_node *node, *stack[128] = { root };
unsigned int len = 1;
while (len > 0 && (node = stack[--len])) {
push_rcu(stack, node->bit[0], &len);
push_rcu(stack, node->bit[1], &len);
if (rcu_access_pointer(node->peer))
list_del(&node->peer_list);
}
}
static void walk_remove_by_peer(struct allowedips_node __rcu **top,
struct wg_peer *peer, struct mutex *lock)
{
#define REF(p) rcu_access_pointer(p)
#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
#define PUSH(p) ({ \
WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
stack[len++] = p; \
})
struct allowedips_node __rcu **stack[128], **nptr;
struct allowedips_node *node, *prev;
unsigned int len;
if (unlikely(!peer || !REF(*top)))
return;
for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
nptr = stack[len - 1];
node = DEREF(nptr);
if (!node) {
--len;
continue;
}
if (!prev || REF(prev->bit[0]) == node ||
REF(prev->bit[1]) == node) {
if (REF(node->bit[0]))
PUSH(&node->bit[0]);
else if (REF(node->bit[1]))
PUSH(&node->bit[1]);
} else if (REF(node->bit[0]) == prev) {
if (REF(node->bit[1]))
PUSH(&node->bit[1]);
} else {
if (rcu_dereference_protected(node->peer,
lockdep_is_held(lock)) == peer) {
RCU_INIT_POINTER(node->peer, NULL);
list_del_init(&node->peer_list);
if (!node->bit[0] || !node->bit[1]) {
rcu_assign_pointer(*nptr, DEREF(
&node->bit[!REF(node->bit[0])]));
call_rcu(&node->rcu, node_free_rcu);
node = DEREF(nptr);
}
}
--len;
}
}
#undef REF
#undef DEREF
#undef PUSH
}
static unsigned int fls128(u64 a, u64 b)
{
return a ? fls64(a) + 64U : fls64(b);
}
static u8 common_bits(const struct allowedips_node *node, const u8 *key,
u8 bits)
{
if (bits == 32)
return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key);
else if (bits == 128)
return 128U - fls128(
*(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0],
*(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]);
return 0;
}
static bool prefix_matches(const struct allowedips_node *node, const u8 *key,
u8 bits)
{
/* This could be much faster if it actually just compared the common
* bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and
* the rest, but it turns out that common_bits is already super fast on
* modern processors, even taking into account the unfortunate bswap.
* So, we just inline it like this instead.
*/
return common_bits(node, key, bits) >= node->cidr;
}
static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
const u8 *key)
{
struct allowedips_node *node = trie, *found = NULL;
while (node && prefix_matches(node, key, bits)) {
if (rcu_access_pointer(node->peer))
found = node;
if (node->cidr == bits)
break;
node = rcu_dereference_bh(CHOOSE_NODE(node, key));
}
return found;
}
/* Returns a strong reference to a peer */
static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits,
const void *be_ip)
{
/* Aligned so it can be passed to fls/fls64 */
u8 ip[16] __aligned(__alignof(u64));
struct allowedips_node *node;
struct wg_peer *peer = NULL;
swap_endian(ip, be_ip, bits);
rcu_read_lock_bh();
retry:
node = find_node(rcu_dereference_bh(root), bits, ip);
if (node) {
peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer));
if (!peer)
goto retry;
}
rcu_read_unlock_bh();
return peer;
}
static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
u8 cidr, u8 bits, struct allowedips_node **rnode,
struct mutex *lock)
{
struct allowedips_node *node = rcu_dereference_protected(trie,
lockdep_is_held(lock));
struct allowedips_node *parent = NULL;
bool exact = false;
while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) {
parent = node;
if (parent->cidr == cidr) {
exact = true;
break;
}
node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
lockdep_is_held(lock));
}
*rnode = parent;
return exact;
}
static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
u8 cidr, struct wg_peer *peer, struct mutex *lock)
{
struct allowedips_node *node, *parent, *down, *newnode;
if (unlikely(cidr > bits || !peer))
return -EINVAL;
if (!rcu_access_pointer(*trie)) {
node = kzalloc(sizeof(*node), GFP_KERNEL);
if (unlikely(!node))
return -ENOMEM;
RCU_INIT_POINTER(node->peer, peer);
list_add_tail(&node->peer_list, &peer->allowedips_list);
copy_and_assign_cidr(node, key, cidr, bits);
rcu_assign_pointer(*trie, node);
return 0;
}
if (node_placement(*trie, key, cidr, bits, &node, lock)) {
rcu_assign_pointer(node->peer, peer);
list_move_tail(&node->peer_list, &peer->allowedips_list);
return 0;
}
newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
if (unlikely(!newnode))
return -ENOMEM;
RCU_INIT_POINTER(newnode->peer, peer);
list_add_tail(&newnode->peer_list, &peer->allowedips_list);
copy_and_assign_cidr(newnode, key, cidr, bits);
if (!node) {
down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
} else {
down = rcu_dereference_protected(CHOOSE_NODE(node, key),
lockdep_is_held(lock));
if (!down) {
rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
return 0;
}
}
cidr = min(cidr, common_bits(down, key, bits));
parent = node;
if (newnode->cidr == cidr) {
rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
if (!parent)
rcu_assign_pointer(*trie, newnode);
else
rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
newnode);
} else {
node = kzalloc(sizeof(*node), GFP_KERNEL);
if (unlikely(!node)) {
list_del(&newnode->peer_list);
kfree(newnode);
return -ENOMEM;
}
INIT_LIST_HEAD(&node->peer_list);
copy_and_assign_cidr(node, newnode->bits, cidr, bits);
rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
if (!parent)
rcu_assign_pointer(*trie, node);
else
rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
node);
}
return 0;
}
void wg_allowedips_init(struct allowedips *table)
{
table->root4 = table->root6 = NULL;
table->seq = 1;
}
void wg_allowedips_free(struct allowedips *table, struct mutex *lock)
{
struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6;
++table->seq;
RCU_INIT_POINTER(table->root4, NULL);
RCU_INIT_POINTER(table->root6, NULL);
if (rcu_access_pointer(old4)) {
struct allowedips_node *node = rcu_dereference_protected(old4,
lockdep_is_held(lock));
root_remove_peer_lists(node);
call_rcu(&node->rcu, root_free_rcu);
}
if (rcu_access_pointer(old6)) {
struct allowedips_node *node = rcu_dereference_protected(old6,
lockdep_is_held(lock));
root_remove_peer_lists(node);
call_rcu(&node->rcu, root_free_rcu);
}
}
int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
u8 cidr, struct wg_peer *peer, struct mutex *lock)
{
/* Aligned so it can be passed to fls */
u8 key[4] __aligned(__alignof(u32));
++table->seq;
swap_endian(key, (const u8 *)ip, 32);
return add(&table->root4, 32, key, cidr, peer, lock);
}
int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
u8 cidr, struct wg_peer *peer, struct mutex *lock)
{
/* Aligned so it can be passed to fls64 */
u8 key[16] __aligned(__alignof(u64));
++table->seq;
swap_endian(key, (const u8 *)ip, 128);
return add(&table->root6, 128, key, cidr, peer, lock);
}
void wg_allowedips_remove_by_peer(struct allowedips *table,
struct wg_peer *peer, struct mutex *lock)
{
++table->seq;
walk_remove_by_peer(&table->root4, peer, lock);
walk_remove_by_peer(&table->root6, peer, lock);
}
int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
{
const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U);
swap_endian(ip, node->bits, node->bitlen);
memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes);
if (node->cidr)
ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U);
*cidr = node->cidr;
return node->bitlen == 32 ? AF_INET : AF_INET6;
}
/* Returns a strong reference to a peer */
struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP))
return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
else if (skb->protocol == htons(ETH_P_IPV6))
return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
return NULL;
}
/* Returns a strong reference to a peer */
struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
struct sk_buff *skb)
{
if (skb->protocol == htons(ETH_P_IP))
return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
else if (skb->protocol == htons(ETH_P_IPV6))
return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
return NULL;
}
#include "selftest/allowedips.c"

View File

@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_ALLOWEDIPS_H
#define _WG_ALLOWEDIPS_H
#include <linux/mutex.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
struct wg_peer;
struct allowedips_node {
struct wg_peer __rcu *peer;
struct allowedips_node __rcu *bit[2];
/* While it may seem scandalous that we waste space for v4,
* we're alloc'ing to the nearest power of 2 anyway, so this
* doesn't actually make a difference.
*/
u8 bits[16] __aligned(__alignof(u64));
u8 cidr, bit_at_a, bit_at_b, bitlen;
/* Keep rarely used list at bottom to be beyond cache line. */
union {
struct list_head peer_list;
struct rcu_head rcu;
};
};
struct allowedips {
struct allowedips_node __rcu *root4;
struct allowedips_node __rcu *root6;
u64 seq;
};
void wg_allowedips_init(struct allowedips *table);
void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
u8 cidr, struct wg_peer *peer, struct mutex *lock);
int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
u8 cidr, struct wg_peer *peer, struct mutex *lock);
void wg_allowedips_remove_by_peer(struct allowedips *table,
struct wg_peer *peer, struct mutex *lock);
/* The ip input pointer should be __aligned(__alignof(u64))) */
int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr);
/* These return a strong reference to a peer: */
struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
struct sk_buff *skb);
struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
struct sk_buff *skb);
#ifdef DEBUG
bool wg_allowedips_selftest(void);
#endif
#endif /* _WG_ALLOWEDIPS_H */

View File

@ -0,0 +1,102 @@
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
ccflags-y += -include $(kbuild-dir)/compat/compat.h
asflags-y += -include $(kbuild-dir)/compat/compat-asm.h
ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),)
ccflags-y += -I$(kbuild-dir)/compat/ptr_ring/include
endif
ifeq ($(wildcard $(srctree)/include/linux/siphash.h),)
ccflags-y += -I$(kbuild-dir)/compat/siphash/include
wireguard-y += compat/siphash/siphash.o
endif
ifeq ($(wildcard $(srctree)/include/net/dst_cache.h),)
ccflags-y += -I$(kbuild-dir)/compat/dst_cache/include
wireguard-y += compat/dst_cache/dst_cache.o
endif
ifeq ($(wildcard $(srctree)/arch/x86/include/asm/intel-family.h)$(CONFIG_X86),y)
ccflags-y += -I$(kbuild-dir)/compat/intel-family-x86/include
endif
ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h)$(CONFIG_X86),y)
ccflags-y += -I$(kbuild-dir)/compat/fpu-x86/include
endif
ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/simd.h)$(shell grep -s -F "generic-y += simd.h" "$(srctree)/arch/$(SRCARCH)/Kbuild" "$(srctree)/arch/$(SRCARCH)/Makefile"),)
ccflags-y += -I$(kbuild-dir)/compat/simd-asm/include
endif
ifeq ($(wildcard $(srctree)/include/linux/simd.h),)
ccflags-y += -I$(kbuild-dir)/compat/simd/include
endif
ifeq ($(wildcard $(srctree)/include/net/udp_tunnel.h),)
ccflags-y += -I$(kbuild-dir)/compat/udp_tunnel/include
wireguard-y += compat/udp_tunnel/udp_tunnel.o
endif
ifeq ($(shell grep -s -F "int crypto_memneq" "$(srctree)/include/crypto/algapi.h"),)
ccflags-y += -include $(kbuild-dir)/compat/memneq/include.h
wireguard-y += compat/memneq/memneq.o
endif
ifeq ($(shell grep -s -F "addr_gen_mode" "$(srctree)/include/linux/ipv6.h"),)
ccflags-y += -DCOMPAT_CANNOT_USE_DEV_CNF
endif
ifdef CONFIG_HZ
ifeq ($(wildcard $(CURDIR)/include/generated/timeconst.h),)
ccflags-y += $(shell bash -c '((a=$(CONFIG_HZ), b=1000000)); while ((b > 0)); do ((t=b, b=a%b, a=t)); done; echo "-DHZ_TO_USEC_NUM=$$((1000000/a)) -DHZ_TO_USEC_DEN=$$(($(CONFIG_HZ)/a))";')
endif
endif
ifeq ($(wildcard $(srctree)/arch/arm/include/asm/neon.h)$(CONFIG_ARM),y)
ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include
endif
ifeq ($(wildcard $(srctree)/arch/arm64/include/asm/neon.h)$(CONFIG_ARM64),y)
ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include
endif
ifeq ($(CONFIG_X86_64),y)
ifeq ($(ssse3_instr),)
ssse3_instr := $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
ccflags-y += $(ssse3_instr)
asflags-y += $(ssse3_instr)
endif
ifeq ($(avx_instr),)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
ccflags-y += $(avx_instr)
asflags-y += $(avx_instr)
endif
ifeq ($(avx2_instr),)
avx2_instr := $(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
ccflags-y += $(avx2_instr)
asflags-y += $(avx2_instr)
endif
ifeq ($(avx512_instr),)
avx512_instr := $(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
ccflags-y += $(avx512_instr)
asflags-y += $(avx512_instr)
endif
ifeq ($(bmi2_instr),)
bmi2_instr :=$(call as-instr,mulx %rax$(comma)%rax$(comma)%rax,-DCONFIG_AS_BMI2=1)
ccflags-y += $(bmi2_instr)
asflags-y += $(bmi2_instr)
endif
ifeq ($(adx_instr),)
adx_instr :=$(call as-instr,adcx %rax$(comma)%rax,-DCONFIG_AS_ADX=1)
ccflags-y += $(adx_instr)
asflags-y += $(adx_instr)
endif
endif
ifneq ($(shell grep -s -F "\#define LINUX_PACKAGE_ID \" Debian " "$(CURDIR)/include/generated/package.h"),)
ccflags-y += -DISDEBIAN
endif

View File

@ -0,0 +1,208 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <net/route.h>
#include <net/esp.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_checksum.h>
#define IP6_MF 0x0001
#define IP6_OFFSET 0xFFF8
static inline int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max)
{
if (skb_headlen(skb) >= len)
return 0;
if (max > skb->len)
max = skb->len;
if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
return -ENOMEM;
if (skb_headlen(skb) < len)
return -EPROTO;
return 0;
}
#define MAX_IP_HDR_LEN 128
static inline int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
{
unsigned int off;
bool fragment;
int err;
fragment = false;
err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN);
if (err < 0)
goto out;
if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
fragment = true;
off = ip_hdrlen(skb);
err = -EPROTO;
if (fragment)
goto out;
switch (ip_hdr(skb)->protocol) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb,
off + sizeof(struct tcphdr),
MAX_IP_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct tcphdr, check))) {
err = -EPROTO;
goto out;
}
if (recalculate)
tcp_hdr(skb)->check =
~csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr,
skb->len - off,
IPPROTO_TCP, 0);
break;
case IPPROTO_UDP:
err = skb_maybe_pull_tail(skb,
off + sizeof(struct udphdr),
MAX_IP_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct udphdr, check))) {
err = -EPROTO;
goto out;
}
if (recalculate)
udp_hdr(skb)->check =
~csum_tcpudp_magic(ip_hdr(skb)->saddr,
ip_hdr(skb)->daddr,
skb->len - off,
IPPROTO_UDP, 0);
break;
default:
goto out;
}
err = 0;
out:
return err;
}
#define MAX_IPV6_HDR_LEN 256
#define OPT_HDR(type, skb, off) \
(type *)(skb_network_header(skb) + (off))
static inline int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
{
int err;
u8 nexthdr;
unsigned int off;
unsigned int len;
bool fragment;
bool done;
fragment = false;
done = false;
off = sizeof(struct ipv6hdr);
err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
nexthdr = ipv6_hdr(skb)->nexthdr;
len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
while (off <= len && !done) {
switch (nexthdr) {
case IPPROTO_DSTOPTS:
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING: {
struct ipv6_opt_hdr *hp;
err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
nexthdr = hp->nexthdr;
off += ipv6_optlen(hp);
break;
}
case IPPROTO_FRAGMENT: {
struct frag_hdr *hp;
err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
hp = OPT_HDR(struct frag_hdr, skb, off);
if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
fragment = true;
nexthdr = hp->nexthdr;
off += sizeof(struct frag_hdr);
break;
}
default:
done = true;
break;
}
}
err = -EPROTO;
if (!done || fragment)
goto out;
switch (nexthdr) {
case IPPROTO_TCP:
err = skb_maybe_pull_tail(skb,
off + sizeof(struct tcphdr),
MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct tcphdr, check))) {
err = -EPROTO;
goto out;
}
if (recalculate)
tcp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len - off,
IPPROTO_TCP, 0);
break;
case IPPROTO_UDP:
err = skb_maybe_pull_tail(skb,
off + sizeof(struct udphdr),
MAX_IPV6_HDR_LEN);
if (err < 0)
goto out;
if (!skb_partial_csum_set(skb, off,
offsetof(struct udphdr, check))) {
err = -EPROTO;
goto out;
}
if (recalculate)
udp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len - off,
IPPROTO_UDP, 0);
break;
default:
goto out;
}
err = 0;
out:
return err;
}
static inline int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
{
int err;
switch (skb->protocol) {
case htons(ETH_P_IP):
err = skb_checksum_setup_ip(skb, recalculate);
break;
case htons(ETH_P_IPV6):
err = skb_checksum_setup_ipv6(skb, recalculate);
break;
default:
err = -EPROTO;
break;
}
return err;
}

View File

@ -0,0 +1,78 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_COMPATASM_H
#define _WG_COMPATASM_H
#include <linux/linkage.h>
#include <linux/kconfig.h>
#include <linux/version.h>
/* PaX compatibility */
#if defined(RAP_PLUGIN)
#undef ENTRY
#define ENTRY RAP_ENTRY
#endif
#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
.macro ret\c, reg
#if __LINUX_ARM_ARCH__ < 6
mov\c pc, \reg
#else
.ifeqs "\reg", "lr"
bx\c \reg
.else
mov\c pc, \reg
.endif
#endif
.endm
.endr
#endif
#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
#include <asm/assembler.h>
#define lspush push
#define lspull pull
#undef push
#undef pull
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0)
#define SYM_FUNC_START ENTRY
#define SYM_FUNC_END ENDPROC
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
#define blake2s_compress_ssse3 zinc_blake2s_compress_ssse3
#define blake2s_compress_avx512 zinc_blake2s_compress_avx512
#define poly1305_init_arm zinc_poly1305_init_arm
#define poly1305_blocks_arm zinc_poly1305_blocks_arm
#define poly1305_emit_arm zinc_poly1305_emit_arm
#define poly1305_blocks_neon zinc_poly1305_blocks_neon
#define poly1305_emit_neon zinc_poly1305_emit_neon
#define poly1305_init_mips zinc_poly1305_init_mips
#define poly1305_blocks_mips zinc_poly1305_blocks_mips
#define poly1305_emit_mips zinc_poly1305_emit_mips
#define poly1305_init_x86_64 zinc_poly1305_init_x86_64
#define poly1305_blocks_x86_64 zinc_poly1305_blocks_x86_64
#define poly1305_emit_x86_64 zinc_poly1305_emit_x86_64
#define poly1305_emit_avx zinc_poly1305_emit_avx
#define poly1305_blocks_avx zinc_poly1305_blocks_avx
#define poly1305_blocks_avx2 zinc_poly1305_blocks_avx2
#define poly1305_blocks_avx512 zinc_poly1305_blocks_avx512
#define curve25519_neon zinc_curve25519_neon
#define hchacha20_ssse3 zinc_hchacha20_ssse3
#define chacha20_ssse3 zinc_chacha20_ssse3
#define chacha20_avx2 zinc_chacha20_avx2
#define chacha20_avx512 zinc_chacha20_avx512
#define chacha20_avx512vl zinc_chacha20_avx512vl
#define chacha20_mips zinc_chacha20_mips
#define chacha20_arm zinc_chacha20_arm
#define hchacha20_arm zinc_hchacha20_arm
#define chacha20_neon zinc_chacha20_neon
#endif
#endif /* _WG_COMPATASM_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,175 @@
/*
* net/core/dst_cache.c - dst entry cache
*
* Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <net/dst_cache.h>
#include <net/route.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 50)
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
{
if ((unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
rt = (struct rt6_info *)(rt->dst.from);
return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
}
#endif
#endif
#include <uapi/linux/in.h>
struct dst_cache_pcpu {
unsigned long refresh_ts;
struct dst_entry *dst;
u32 cookie;
union {
struct in_addr in_saddr;
struct in6_addr in6_saddr;
};
};
static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
struct dst_entry *dst, u32 cookie)
{
dst_release(dst_cache->dst);
if (dst)
dst_hold(dst);
dst_cache->cookie = cookie;
dst_cache->dst = dst;
}
static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
struct dst_cache_pcpu *idst)
{
struct dst_entry *dst;
dst = idst->dst;
if (!dst)
goto fail;
/* the cache already hold a dst reference; it can't go away */
dst_hold(dst);
if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
(dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
dst_cache_per_cpu_dst_set(idst, NULL, 0);
dst_release(dst);
goto fail;
}
return dst;
fail:
idst->refresh_ts = jiffies;
return NULL;
}
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
{
if (!dst_cache->cache)
return NULL;
return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
}
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
{
struct dst_cache_pcpu *idst;
struct dst_entry *dst;
if (!dst_cache->cache)
return NULL;
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
if (!dst)
return NULL;
*saddr = idst->in_saddr.s_addr;
return container_of(dst, struct rtable, dst);
}
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
__be32 saddr)
{
struct dst_cache_pcpu *idst;
if (!dst_cache->cache)
return;
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(idst, dst, 0);
idst->in_saddr.s_addr = saddr;
}
#if IS_ENABLED(CONFIG_IPV6)
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
const struct in6_addr *addr)
{
struct dst_cache_pcpu *idst;
if (!dst_cache->cache)
return;
idst = this_cpu_ptr(dst_cache->cache);
dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
rt6_get_cookie((struct rt6_info *)dst));
idst->in6_saddr = *addr;
}
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
struct in6_addr *saddr)
{
struct dst_cache_pcpu *idst;
struct dst_entry *dst;
if (!dst_cache->cache)
return NULL;
idst = this_cpu_ptr(dst_cache->cache);
dst = dst_cache_per_cpu_get(dst_cache, idst);
if (!dst)
return NULL;
*saddr = idst->in6_saddr;
return dst;
}
#endif
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
BUG_ON(gfp & GFP_ATOMIC);
dst_cache->cache = alloc_percpu(struct dst_cache_pcpu);
#else
dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
gfp | __GFP_ZERO);
#endif
if (!dst_cache->cache)
return -ENOMEM;
dst_cache_reset(dst_cache);
return 0;
}
void dst_cache_destroy(struct dst_cache *dst_cache)
{
int i;
if (!dst_cache->cache)
return;
for_each_possible_cpu(i)
dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
free_percpu(dst_cache->cache);
}

View File

@ -0,0 +1,97 @@
#ifndef _WG_NET_DST_CACHE_H
#define _WG_NET_DST_CACHE_H
#include <linux/jiffies.h>
#include <net/dst.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#endif
struct dst_cache {
struct dst_cache_pcpu __percpu *cache;
unsigned long reset_ts;
};
/**
* dst_cache_get - perform cache lookup
* @dst_cache: the cache
*
* The caller should use dst_cache_get_ip4() if it need to retrieve the
* source address to be used when xmitting to the cached dst.
* local BH must be disabled.
*/
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);
/**
* dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
* @dst_cache: the cache
* @saddr: return value for the retrieved source address
*
* local BH must be disabled.
*/
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
/**
* dst_cache_set_ip4 - store the ipv4 dst into the cache
* @dst_cache: the cache
* @dst: the entry to be cached
* @saddr: the source address to be stored inside the cache
*
* local BH must be disabled.
*/
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
__be32 saddr);
#if IS_ENABLED(CONFIG_IPV6)
/**
* dst_cache_set_ip6 - store the ipv6 dst into the cache
* @dst_cache: the cache
* @dst: the entry to be cached
* @saddr: the source address to be stored inside the cache
*
* local BH must be disabled.
*/
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
const struct in6_addr *addr);
/**
* dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
* @dst_cache: the cache
* @saddr: return value for the retrieved source address
*
* local BH must be disabled.
*/
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
struct in6_addr *saddr);
#endif
/**
* dst_cache_reset - invalidate the cache contents
* @dst_cache: the cache
*
* This do not free the cached dst to avoid races and contentions.
* the dst will be freed on later cache lookup.
*/
static inline void dst_cache_reset(struct dst_cache *dst_cache)
{
dst_cache->reset_ts = jiffies;
}
/**
* dst_cache_init - initialize the cache, allocating the required storage
* @dst_cache: the cache
* @gfp: allocation flags
*/
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
/**
* dst_cache_destroy - empty the cache and free the allocated storage
* @dst_cache: the cache
*
* No synchronization is enforced: it must be called only when the cache
* is unused.
*/
void dst_cache_destroy(struct dst_cache *dst_cache);
#endif /* _WG_NET_DST_CACHE_H */

View File

@ -0,0 +1 @@
#include <asm/i387.h>

View File

@ -0,0 +1,73 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_INTEL_FAMILY_H
#define _ASM_X86_INTEL_FAMILY_H
/*
* "Big Core" Processors (Branded as Core, Xeon, etc...)
*
* The "_X" parts are generally the EP and EX Xeons, or the
* "Extreme" ones, like Broadwell-E.
*
* Things ending in "2" are usually because we have no better
* name for them. There's no processor called "SILVERMONT2".
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
#define INTEL_FAM6_CORE2_MEROM 0x0F
#define INTEL_FAM6_CORE2_MEROM_L 0x16
#define INTEL_FAM6_CORE2_PENRYN 0x17
#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
#define INTEL_FAM6_NEHALEM 0x1E
#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
#define INTEL_FAM6_NEHALEM_EP 0x1A
#define INTEL_FAM6_NEHALEM_EX 0x2E
#define INTEL_FAM6_WESTMERE 0x25
#define INTEL_FAM6_WESTMERE_EP 0x2C
#define INTEL_FAM6_WESTMERE_EX 0x2F
#define INTEL_FAM6_SANDYBRIDGE 0x2A
#define INTEL_FAM6_SANDYBRIDGE_X 0x2D
#define INTEL_FAM6_IVYBRIDGE 0x3A
#define INTEL_FAM6_IVYBRIDGE_X 0x3E
#define INTEL_FAM6_HASWELL_CORE 0x3C
#define INTEL_FAM6_HASWELL_X 0x3F
#define INTEL_FAM6_HASWELL_ULT 0x45
#define INTEL_FAM6_HASWELL_GT3E 0x46
#define INTEL_FAM6_BROADWELL_CORE 0x3D
#define INTEL_FAM6_BROADWELL_GT3E 0x47
#define INTEL_FAM6_BROADWELL_X 0x4F
#define INTEL_FAM6_BROADWELL_XEON_D 0x56
#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
#define INTEL_FAM6_SKYLAKE_X 0x55
#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E
#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
#define INTEL_FAM6_ATOM_LINCROFT 0x26
#define INTEL_FAM6_ATOM_PENWELL 0x27
#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
/* Xeon Phi */
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
#endif /* _ASM_X86_INTEL_FAMILY_H */

View File

@ -0,0 +1,5 @@
extern noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
static inline int crypto_memneq(const void *a, const void *b, size_t size)
{
return __crypto_memneq(a, b, size) != 0UL ? 1 : 0;
}

View File

@ -0,0 +1,170 @@
/*
* Constant-time equality testing of memory regions.
*
* Authors:
*
* James Yonan <james@openvpn.net>
* Daniel Borkmann <dborkman@redhat.com>
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
* BSD LICENSE
*
* Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of OpenVPN Technologies nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <crypto/algapi.h>
/* Make the optimizer believe the variable can be manipulated arbitrarily. */
#define COMPILER_OPTIMIZER_HIDE_VAR(var) asm("" : "=r" (var) : "0" (var))
#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
/* Generic path for arbitrary size */
static inline unsigned long
__crypto_memneq_generic(const void *a, const void *b, size_t size)
{
unsigned long neq = 0;
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
while (size >= sizeof(unsigned long)) {
neq |= *(unsigned long *)a ^ *(unsigned long *)b;
COMPILER_OPTIMIZER_HIDE_VAR(neq);
a += sizeof(unsigned long);
b += sizeof(unsigned long);
size -= sizeof(unsigned long);
}
#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
while (size > 0) {
neq |= *(unsigned char *)a ^ *(unsigned char *)b;
COMPILER_OPTIMIZER_HIDE_VAR(neq);
a += 1;
b += 1;
size -= 1;
}
return neq;
}
/* Loop-free fast-path for frequently used 16-byte size */
static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
{
unsigned long neq = 0;
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
if (sizeof(unsigned long) == 8) {
neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
} else if (sizeof(unsigned int) == 4) {
neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
} else
#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
{
neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
COMPILER_OPTIMIZER_HIDE_VAR(neq);
}
return neq;
}
/* Compare two areas of memory without leaking timing information,
* and with special optimizations for common sizes. Users should
* not call this function directly, but should instead use
* crypto_memneq defined in crypto/algapi.h.
*/
noinline unsigned long __crypto_memneq(const void *a, const void *b,
size_t size)
{
switch (size) {
case 16:
return __crypto_memneq_16(a, b);
default:
return __crypto_memneq_generic(a, b, size);
}
}
#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */

View File

@ -0,0 +1,7 @@
#ifndef _ARCH_ARM_ASM_NEON
#define _ARCH_ARM_ASM_NEON
#define kernel_neon_begin() \
BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON")
#define kernel_neon_end() \
BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON")
#endif

View File

@ -0,0 +1,674 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Definitions for the 'struct ptr_ring' datastructure.
*
* Author:
* Michael S. Tsirkin <mst@redhat.com>
*
* Copyright (C) 2016 Red Hat, Inc.
*
* This is a limited-size FIFO maintaining pointers in FIFO order, with
* one CPU producing entries and another consuming entries from a FIFO.
*
* This implementation tries to minimize cache-contention when there is a
* single producer and a single consumer CPU.
*/
#ifndef _LINUX_PTR_RING_H
#define _LINUX_PTR_RING_H 1
#ifdef __KERNEL__
#include <linux/spinlock.h>
#include <linux/cache.h>
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/errno.h>
#endif
struct ptr_ring {
int producer ____cacheline_aligned_in_smp;
spinlock_t producer_lock;
int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
int consumer_tail; /* next entry to invalidate */
spinlock_t consumer_lock;
/* Shared consumer/producer data */
/* Read-only by both the producer and the consumer */
int size ____cacheline_aligned_in_smp; /* max entries in queue */
int batch; /* number of entries to consume in a batch */
void **queue;
};
/* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax().
*
* NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
* see e.g. ptr_ring_full.
*/
static inline bool __ptr_ring_full(struct ptr_ring *r)
{
return r->queue[r->producer];
}
static inline bool ptr_ring_full(struct ptr_ring *r)
{
bool ret;
spin_lock(&r->producer_lock);
ret = __ptr_ring_full(r);
spin_unlock(&r->producer_lock);
return ret;
}
static inline bool ptr_ring_full_irq(struct ptr_ring *r)
{
bool ret;
spin_lock_irq(&r->producer_lock);
ret = __ptr_ring_full(r);
spin_unlock_irq(&r->producer_lock);
return ret;
}
static inline bool ptr_ring_full_any(struct ptr_ring *r)
{
unsigned long flags;
bool ret;
spin_lock_irqsave(&r->producer_lock, flags);
ret = __ptr_ring_full(r);
spin_unlock_irqrestore(&r->producer_lock, flags);
return ret;
}
static inline bool ptr_ring_full_bh(struct ptr_ring *r)
{
bool ret;
spin_lock_bh(&r->producer_lock);
ret = __ptr_ring_full(r);
spin_unlock_bh(&r->producer_lock);
return ret;
}
/* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax(). Callers must hold producer_lock.
* Callers are responsible for making sure pointer that is being queued
* points to a valid data.
*/
static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
{
if (unlikely(!r->size) || r->queue[r->producer])
return -ENOSPC;
/* Make sure the pointer we are storing points to a valid data. */
/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
smp_wmb();
WRITE_ONCE(r->queue[r->producer++], ptr);
if (unlikely(r->producer >= r->size))
r->producer = 0;
return 0;
}
/*
* Note: resize (below) nests producer lock within consumer lock, so if you
* consume in interrupt or BH context, you must disable interrupts/BH when
* calling this.
*/
static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
{
int ret;
spin_lock(&r->producer_lock);
ret = __ptr_ring_produce(r, ptr);
spin_unlock(&r->producer_lock);
return ret;
}
static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
{
int ret;
spin_lock_irq(&r->producer_lock);
ret = __ptr_ring_produce(r, ptr);
spin_unlock_irq(&r->producer_lock);
return ret;
}
static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&r->producer_lock, flags);
ret = __ptr_ring_produce(r, ptr);
spin_unlock_irqrestore(&r->producer_lock, flags);
return ret;
}
static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
{
int ret;
spin_lock_bh(&r->producer_lock);
ret = __ptr_ring_produce(r, ptr);
spin_unlock_bh(&r->producer_lock);
return ret;
}
static inline void *__ptr_ring_peek(struct ptr_ring *r)
{
if (likely(r->size))
return READ_ONCE(r->queue[r->consumer_head]);
return NULL;
}
/*
* Test ring empty status without taking any locks.
*
* NB: This is only safe to call if ring is never resized.
*
* However, if some other CPU consumes ring entries at the same time, the value
* returned is not guaranteed to be correct.
*
* In this case - to avoid incorrectly detecting the ring
* as empty - the CPU consuming the ring entries is responsible
* for either consuming all ring entries until the ring is empty,
* or synchronizing with some other CPU and causing it to
* re-test __ptr_ring_empty and/or consume the ring enteries
* after the synchronization point.
*
* Note: callers invoking this in a loop must use a compiler barrier,
* for example cpu_relax().
*/
static inline bool __ptr_ring_empty(struct ptr_ring *r)
{
if (likely(r->size))
return !r->queue[READ_ONCE(r->consumer_head)];
return true;
}
static inline bool ptr_ring_empty(struct ptr_ring *r)
{
bool ret;
spin_lock(&r->consumer_lock);
ret = __ptr_ring_empty(r);
spin_unlock(&r->consumer_lock);
return ret;
}
static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
{
bool ret;
spin_lock_irq(&r->consumer_lock);
ret = __ptr_ring_empty(r);
spin_unlock_irq(&r->consumer_lock);
return ret;
}
static inline bool ptr_ring_empty_any(struct ptr_ring *r)
{
unsigned long flags;
bool ret;
spin_lock_irqsave(&r->consumer_lock, flags);
ret = __ptr_ring_empty(r);
spin_unlock_irqrestore(&r->consumer_lock, flags);
return ret;
}
static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
{
bool ret;
spin_lock_bh(&r->consumer_lock);
ret = __ptr_ring_empty(r);
spin_unlock_bh(&r->consumer_lock);
return ret;
}
/* Must only be called after __ptr_ring_peek returned !NULL */
static inline void __ptr_ring_discard_one(struct ptr_ring *r)
{
/* Fundamentally, what we want to do is update consumer
* index and zero out the entry so producer can reuse it.
* Doing it naively at each consume would be as simple as:
* consumer = r->consumer;
* r->queue[consumer++] = NULL;
* if (unlikely(consumer >= r->size))
* consumer = 0;
* r->consumer = consumer;
* but that is suboptimal when the ring is full as producer is writing
* out new entries in the same cache line. Defer these updates until a
* batch of entries has been consumed.
*/
/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
* to work correctly.
*/
int consumer_head = r->consumer_head;
int head = consumer_head++;
/* Once we have processed enough entries invalidate them in
* the ring all at once so producer can reuse their space in the ring.
* We also do this when we reach end of the ring - not mandatory
* but helps keep the implementation simple.
*/
if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
consumer_head >= r->size)) {
/* Zero out entries in the reverse order: this way we touch the
* cache line that producer might currently be reading the last;
* producer won't make progress and touch other cache lines
* besides the first one until we write out all entries.
*/
while (likely(head >= r->consumer_tail))
r->queue[head--] = NULL;
r->consumer_tail = consumer_head;
}
if (unlikely(consumer_head >= r->size)) {
consumer_head = 0;
r->consumer_tail = 0;
}
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
WRITE_ONCE(r->consumer_head, consumer_head);
}
static inline void *__ptr_ring_consume(struct ptr_ring *r)
{
void *ptr;
ptr = __ptr_ring_peek(r);
if (ptr)
__ptr_ring_discard_one(r);
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
/* The READ_ONCE in __ptr_ring_peek doesn't imply a barrier on old kernels. */
smp_read_barrier_depends();
#endif
return ptr;
}
static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
void **array, int n)
{
void *ptr;
int i;
for (i = 0; i < n; i++) {
ptr = __ptr_ring_consume(r);
if (!ptr)
break;
array[i] = ptr;
}
return i;
}
/*
* Note: resize (below) nests producer lock within consumer lock, so if you
* call this in interrupt or BH context, you must disable interrupts/BH when
* producing.
*/
static inline void *ptr_ring_consume(struct ptr_ring *r)
{
void *ptr;
spin_lock(&r->consumer_lock);
ptr = __ptr_ring_consume(r);
spin_unlock(&r->consumer_lock);
return ptr;
}
static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
{
void *ptr;
spin_lock_irq(&r->consumer_lock);
ptr = __ptr_ring_consume(r);
spin_unlock_irq(&r->consumer_lock);
return ptr;
}
static inline void *ptr_ring_consume_any(struct ptr_ring *r)
{
unsigned long flags;
void *ptr;
spin_lock_irqsave(&r->consumer_lock, flags);
ptr = __ptr_ring_consume(r);
spin_unlock_irqrestore(&r->consumer_lock, flags);
return ptr;
}
static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
{
void *ptr;
spin_lock_bh(&r->consumer_lock);
ptr = __ptr_ring_consume(r);
spin_unlock_bh(&r->consumer_lock);
return ptr;
}
static inline int ptr_ring_consume_batched(struct ptr_ring *r,
void **array, int n)
{
int ret;
spin_lock(&r->consumer_lock);
ret = __ptr_ring_consume_batched(r, array, n);
spin_unlock(&r->consumer_lock);
return ret;
}
static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
void **array, int n)
{
int ret;
spin_lock_irq(&r->consumer_lock);
ret = __ptr_ring_consume_batched(r, array, n);
spin_unlock_irq(&r->consumer_lock);
return ret;
}
static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
void **array, int n)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&r->consumer_lock, flags);
ret = __ptr_ring_consume_batched(r, array, n);
spin_unlock_irqrestore(&r->consumer_lock, flags);
return ret;
}
static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
void **array, int n)
{
int ret;
spin_lock_bh(&r->consumer_lock);
ret = __ptr_ring_consume_batched(r, array, n);
spin_unlock_bh(&r->consumer_lock);
return ret;
}
/* Cast to structure type and call a function without discarding from FIFO.
* Function must return a value.
* Callers must take consumer_lock.
*/
#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
#define PTR_RING_PEEK_CALL(r, f) ({ \
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
\
spin_lock(&(r)->consumer_lock); \
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
spin_unlock(&(r)->consumer_lock); \
__PTR_RING_PEEK_CALL_v; \
})
#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
\
spin_lock_irq(&(r)->consumer_lock); \
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
spin_unlock_irq(&(r)->consumer_lock); \
__PTR_RING_PEEK_CALL_v; \
})
#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
\
spin_lock_bh(&(r)->consumer_lock); \
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
spin_unlock_bh(&(r)->consumer_lock); \
__PTR_RING_PEEK_CALL_v; \
})
#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
unsigned long __PTR_RING_PEEK_CALL_f;\
\
spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
__PTR_RING_PEEK_CALL_v; \
})
/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
* documentation for vmalloc for which of them are legal.
*/
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
{
if (size > KMALLOC_MAX_SIZE / sizeof(void *))
return NULL;
return kvmalloc(size * sizeof(void *), gfp | __GFP_ZERO);
}
static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
{
r->size = size;
r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
/* We need to set batch at least to 1 to make logic
* in __ptr_ring_discard_one work correctly.
* Batching too much (because ring is small) would cause a lot of
* burstiness. Needs tuning, for now disable batching.
*/
if (r->batch > r->size / 2 || !r->batch)
r->batch = 1;
}
static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
{
r->queue = __ptr_ring_init_queue_alloc(size, gfp);
if (!r->queue)
return -ENOMEM;
__ptr_ring_set_size(r, size);
r->producer = r->consumer_head = r->consumer_tail = 0;
spin_lock_init(&r->producer_lock);
spin_lock_init(&r->consumer_lock);
return 0;
}
/*
* Return entries into ring. Destroy entries that don't fit.
*
* Note: this is expected to be a rare slow path operation.
*
* Note: producer lock is nested within consumer lock, so if you
* resize you must make sure all uses nest correctly.
* In particular if you consume ring in interrupt or BH context, you must
* disable interrupts/BH when doing so.
*/
static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
void (*destroy)(void *))
{
unsigned long flags;
int head;
spin_lock_irqsave(&r->consumer_lock, flags);
spin_lock(&r->producer_lock);
if (!r->size)
goto done;
/*
* Clean out buffered entries (for simplicity). This way following code
* can test entries for NULL and if not assume they are valid.
*/
head = r->consumer_head - 1;
while (likely(head >= r->consumer_tail))
r->queue[head--] = NULL;
r->consumer_tail = r->consumer_head;
/*
* Go over entries in batch, start moving head back and copy entries.
* Stop when we run into previously unconsumed entries.
*/
while (n) {
head = r->consumer_head - 1;
if (head < 0)
head = r->size - 1;
if (r->queue[head]) {
/* This batch entry will have to be destroyed. */
goto done;
}
r->queue[head] = batch[--n];
r->consumer_tail = head;
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
WRITE_ONCE(r->consumer_head, head);
}
done:
/* Destroy all entries left in the batch. */
while (n)
destroy(batch[--n]);
spin_unlock(&r->producer_lock);
spin_unlock_irqrestore(&r->consumer_lock, flags);
}
static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
int size, gfp_t gfp,
void (*destroy)(void *))
{
int producer = 0;
void **old;
void *ptr;
while ((ptr = __ptr_ring_consume(r)))
if (producer < size)
queue[producer++] = ptr;
else if (destroy)
destroy(ptr);
if (producer >= size)
producer = 0;
__ptr_ring_set_size(r, size);
r->producer = producer;
r->consumer_head = 0;
r->consumer_tail = 0;
old = r->queue;
r->queue = queue;
return old;
}
/*
* Note: producer lock is nested within consumer lock, so if you
* resize you must make sure all uses nest correctly.
* In particular if you consume ring in interrupt or BH context, you must
* disable interrupts/BH when doing so.
*/
static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
void (*destroy)(void *))
{
unsigned long flags;
void **queue = __ptr_ring_init_queue_alloc(size, gfp);
void **old;
if (!queue)
return -ENOMEM;
spin_lock_irqsave(&(r)->consumer_lock, flags);
spin_lock(&(r)->producer_lock);
old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
spin_unlock(&(r)->producer_lock);
spin_unlock_irqrestore(&(r)->consumer_lock, flags);
kvfree(old);
return 0;
}
/*
* Note: producer lock is nested within consumer lock, so if you
* resize you must make sure all uses nest correctly.
* In particular if you consume ring in interrupt or BH context, you must
* disable interrupts/BH when doing so.
*/
static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
unsigned int nrings,
int size,
gfp_t gfp, void (*destroy)(void *))
{
unsigned long flags;
void ***queues;
int i;
queues = kmalloc_array(nrings, sizeof(*queues), gfp);
if (!queues)
goto noqueues;
for (i = 0; i < nrings; ++i) {
queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
if (!queues[i])
goto nomem;
}
for (i = 0; i < nrings; ++i) {
spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
spin_lock(&(rings[i])->producer_lock);
queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
size, gfp, destroy);
spin_unlock(&(rings[i])->producer_lock);
spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
}
for (i = 0; i < nrings; ++i)
kvfree(queues[i]);
kfree(queues);
return 0;
nomem:
while (--i >= 0)
kvfree(queues[i]);
kfree(queues);
noqueues:
return -ENOMEM;
}
static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
{
void *ptr;
if (destroy)
while ((ptr = ptr_ring_consume(r)))
destroy(ptr);
kvfree(r->queue);
}
#endif /* _LINUX_PTR_RING_H */

View File

@ -0,0 +1,21 @@
#ifndef _COMPAT_ASM_SIMD_H
#define _COMPAT_ASM_SIMD_H
#if defined(CONFIG_X86_64)
#include <asm/fpu/api.h>
#endif
static __must_check inline bool may_use_simd(void)
{
#if defined(CONFIG_X86_64)
return irq_fpu_usable();
#elif defined(CONFIG_ARM64) && defined(CONFIG_KERNEL_MODE_NEON)
return true;
#elif defined(CONFIG_ARM) && defined(CONFIG_KERNEL_MODE_NEON)
return !in_nmi() && !in_irq() && !in_serving_softirq();
#else
return false;
#endif
}
#endif

View File

@ -0,0 +1,70 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_SIMD_H
#define _WG_SIMD_H
#include <linux/sched.h>
#include <asm/simd.h>
#if defined(CONFIG_X86_64)
#include <linux/version.h>
#include <asm/fpu/api.h>
#elif defined(CONFIG_KERNEL_MODE_NEON)
#include <asm/neon.h>
#endif
typedef enum {
HAVE_NO_SIMD = 1 << 0,
HAVE_FULL_SIMD = 1 << 1,
HAVE_SIMD_IN_USE = 1 << 31
} simd_context_t;
#define DONT_USE_SIMD ((simd_context_t []){ HAVE_NO_SIMD })
static inline void simd_get(simd_context_t *ctx)
{
*ctx = !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
}
static inline void simd_put(simd_context_t *ctx)
{
#if defined(CONFIG_X86_64)
if (*ctx & HAVE_SIMD_IN_USE)
kernel_fpu_end();
#elif defined(CONFIG_KERNEL_MODE_NEON)
if (*ctx & HAVE_SIMD_IN_USE)
kernel_neon_end();
#endif
*ctx = HAVE_NO_SIMD;
}
static inline bool simd_relax(simd_context_t *ctx)
{
#ifdef CONFIG_PREEMPT
if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
simd_put(ctx);
simd_get(ctx);
return true;
}
#endif
return false;
}
static __must_check inline bool simd_use(simd_context_t *ctx)
{
if (!(*ctx & HAVE_FULL_SIMD))
return false;
if (*ctx & HAVE_SIMD_IN_USE)
return true;
#if defined(CONFIG_X86_64)
kernel_fpu_begin();
#elif defined(CONFIG_KERNEL_MODE_NEON)
kernel_neon_begin();
#endif
*ctx |= HAVE_SIMD_IN_USE;
return true;
}
#endif /* _WG_SIMD_H */

View File

@ -0,0 +1,140 @@
/* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This file is provided under a dual BSD/GPLv2 license.
*
* SipHash: a fast short-input PRF
* https://131002.net/siphash/
*
* This implementation is specifically for SipHash2-4 for a secure PRF
* and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
* hashtables.
*/
#ifndef _WG_LINUX_SIPHASH_H
#define _WG_LINUX_SIPHASH_H
#include <linux/types.h>
#include <linux/kernel.h>
#define SIPHASH_ALIGNMENT __alignof__(u64)
typedef struct {
u64 key[2];
} siphash_key_t;
u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
#endif
u64 siphash_1u64(const u64 a, const siphash_key_t *key);
u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
const siphash_key_t *key);
u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
const siphash_key_t *key);
u64 siphash_1u32(const u32 a, const siphash_key_t *key);
u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
const siphash_key_t *key);
static inline u64 siphash_2u32(const u32 a, const u32 b,
const siphash_key_t *key)
{
return siphash_1u64((u64)b << 32 | a, key);
}
static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
const u32 d, const siphash_key_t *key)
{
return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
}
static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
const siphash_key_t *key)
{
if (__builtin_constant_p(len) && len == 4)
return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
if (__builtin_constant_p(len) && len == 8)
return siphash_1u64(le64_to_cpu(data[0]), key);
if (__builtin_constant_p(len) && len == 16)
return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
key);
if (__builtin_constant_p(len) && len == 24)
return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
le64_to_cpu(data[2]), key);
if (__builtin_constant_p(len) && len == 32)
return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
le64_to_cpu(data[2]), le64_to_cpu(data[3]),
key);
return __siphash_aligned(data, len, key);
}
/**
* siphash - compute 64-bit siphash PRF value
* @data: buffer to hash
* @size: size of @data
* @key: the siphash key
*/
static inline u64 siphash(const void *data, size_t len,
const siphash_key_t *key)
{
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
return __siphash_unaligned(data, len, key);
#endif
return ___siphash_aligned(data, len, key);
}
#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
typedef struct {
unsigned long key[2];
} hsiphash_key_t;
u32 __hsiphash_aligned(const void *data, size_t len,
const hsiphash_key_t *key);
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
u32 __hsiphash_unaligned(const void *data, size_t len,
const hsiphash_key_t *key);
#endif
u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
const hsiphash_key_t *key);
u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
const hsiphash_key_t *key);
static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
const hsiphash_key_t *key)
{
if (__builtin_constant_p(len) && len == 4)
return hsiphash_1u32(le32_to_cpu(data[0]), key);
if (__builtin_constant_p(len) && len == 8)
return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
key);
if (__builtin_constant_p(len) && len == 12)
return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
le32_to_cpu(data[2]), key);
if (__builtin_constant_p(len) && len == 16)
return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
le32_to_cpu(data[2]), le32_to_cpu(data[3]),
key);
return __hsiphash_aligned(data, len, key);
}
/**
* hsiphash - compute 32-bit hsiphash PRF value
* @data: buffer to hash
* @size: size of @data
* @key: the hsiphash key
*/
static inline u32 hsiphash(const void *data, size_t len,
const hsiphash_key_t *key)
{
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
return __hsiphash_unaligned(data, len, key);
#endif
return ___hsiphash_aligned(data, len, key);
}
#endif /* _WG_LINUX_SIPHASH_H */

View File

@ -0,0 +1,539 @@
/* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This file is provided under a dual BSD/GPLv2 license.
*
* SipHash: a fast short-input PRF
* https://131002.net/siphash/
*
* This implementation is specifically for SipHash2-4 for a secure PRF
* and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
* hashtables.
*/
#include <linux/siphash.h>
#include <asm/unaligned.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
#ifdef __LITTLE_ENDIAN
#define bytemask_from_count(cnt) (~(~0ul << (cnt)*8))
#else
#define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8))
#endif
#endif
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
#include <linux/dcache.h>
#include <asm/word-at-a-time.h>
#endif
#define SIPROUND \
do { \
v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
} while (0)
#define PREAMBLE(len) \
u64 v0 = 0x736f6d6570736575ULL; \
u64 v1 = 0x646f72616e646f6dULL; \
u64 v2 = 0x6c7967656e657261ULL; \
u64 v3 = 0x7465646279746573ULL; \
u64 b = ((u64)(len)) << 56; \
v3 ^= key->key[1]; \
v2 ^= key->key[0]; \
v1 ^= key->key[1]; \
v0 ^= key->key[0];
#define POSTAMBLE \
v3 ^= b; \
SIPROUND; \
SIPROUND; \
v0 ^= b; \
v2 ^= 0xff; \
SIPROUND; \
SIPROUND; \
SIPROUND; \
SIPROUND; \
return (v0 ^ v1) ^ (v2 ^ v3);
u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
{
const u8 *end = data + len - (len % sizeof(u64));
const u8 left = len & (sizeof(u64) - 1);
u64 m;
PREAMBLE(len)
for (; data != end; data += sizeof(u64)) {
m = le64_to_cpup(data);
v3 ^= m;
SIPROUND;
SIPROUND;
v0 ^= m;
}
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
if (left)
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
bytemask_from_count(left)));
#else
switch (left) {
case 7: b |= ((u64)end[6]) << 48;
case 6: b |= ((u64)end[5]) << 40;
case 5: b |= ((u64)end[4]) << 32;
case 4: b |= le32_to_cpup(data); break;
case 3: b |= ((u64)end[2]) << 16;
case 2: b |= le16_to_cpup(data); break;
case 1: b |= end[0];
}
#endif
POSTAMBLE
}
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
{
const u8 *end = data + len - (len % sizeof(u64));
const u8 left = len & (sizeof(u64) - 1);
u64 m;
PREAMBLE(len)
for (; data != end; data += sizeof(u64)) {
m = get_unaligned_le64(data);
v3 ^= m;
SIPROUND;
SIPROUND;
v0 ^= m;
}
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
if (left)
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
bytemask_from_count(left)));
#else
switch (left) {
case 7: b |= ((u64)end[6]) << 48;
case 6: b |= ((u64)end[5]) << 40;
case 5: b |= ((u64)end[4]) << 32;
case 4: b |= get_unaligned_le32(end); break;
case 3: b |= ((u64)end[2]) << 16;
case 2: b |= get_unaligned_le16(end); break;
case 1: b |= end[0];
}
#endif
POSTAMBLE
}
#endif
/**
* siphash_1u64 - compute 64-bit siphash PRF value of a u64
* @first: first u64
* @key: the siphash key
*/
u64 siphash_1u64(const u64 first, const siphash_key_t *key)
{
PREAMBLE(8)
v3 ^= first;
SIPROUND;
SIPROUND;
v0 ^= first;
POSTAMBLE
}
/**
* siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
* @first: first u64
* @second: second u64
* @key: the siphash key
*/
u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
{
PREAMBLE(16)
v3 ^= first;
SIPROUND;
SIPROUND;
v0 ^= first;
v3 ^= second;
SIPROUND;
SIPROUND;
v0 ^= second;
POSTAMBLE
}
/**
* siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
* @first: first u64
* @second: second u64
* @third: third u64
* @key: the siphash key
*/
u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
const siphash_key_t *key)
{
PREAMBLE(24)
v3 ^= first;
SIPROUND;
SIPROUND;
v0 ^= first;
v3 ^= second;
SIPROUND;
SIPROUND;
v0 ^= second;
v3 ^= third;
SIPROUND;
SIPROUND;
v0 ^= third;
POSTAMBLE
}
/**
* siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
* @first: first u64
* @second: second u64
* @third: third u64
* @forth: forth u64
* @key: the siphash key
*/
u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
const u64 forth, const siphash_key_t *key)
{
PREAMBLE(32)
v3 ^= first;
SIPROUND;
SIPROUND;
v0 ^= first;
v3 ^= second;
SIPROUND;
SIPROUND;
v0 ^= second;
v3 ^= third;
SIPROUND;
SIPROUND;
v0 ^= third;
v3 ^= forth;
SIPROUND;
SIPROUND;
v0 ^= forth;
POSTAMBLE
}
u64 siphash_1u32(const u32 first, const siphash_key_t *key)
{
PREAMBLE(4)
b |= first;
POSTAMBLE
}
u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
const siphash_key_t *key)
{
u64 combined = (u64)second << 32 | first;
PREAMBLE(12)
v3 ^= combined;
SIPROUND;
SIPROUND;
v0 ^= combined;
b |= third;
POSTAMBLE
}
#if BITS_PER_LONG == 64
/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
* performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
*/
#define HSIPROUND SIPROUND
#define HPREAMBLE(len) PREAMBLE(len)
#define HPOSTAMBLE \
v3 ^= b; \
HSIPROUND; \
v0 ^= b; \
v2 ^= 0xff; \
HSIPROUND; \
HSIPROUND; \
HSIPROUND; \
return (v0 ^ v1) ^ (v2 ^ v3);
u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
{
const u8 *end = data + len - (len % sizeof(u64));
const u8 left = len & (sizeof(u64) - 1);
u64 m;
HPREAMBLE(len)
for (; data != end; data += sizeof(u64)) {
m = le64_to_cpup(data);
v3 ^= m;
HSIPROUND;
v0 ^= m;
}
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
if (left)
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
bytemask_from_count(left)));
#else
switch (left) {
case 7: b |= ((u64)end[6]) << 48;
case 6: b |= ((u64)end[5]) << 40;
case 5: b |= ((u64)end[4]) << 32;
case 4: b |= le32_to_cpup(data); break;
case 3: b |= ((u64)end[2]) << 16;
case 2: b |= le16_to_cpup(data); break;
case 1: b |= end[0];
}
#endif
HPOSTAMBLE
}
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
u32 __hsiphash_unaligned(const void *data, size_t len,
const hsiphash_key_t *key)
{
const u8 *end = data + len - (len % sizeof(u64));
const u8 left = len & (sizeof(u64) - 1);
u64 m;
HPREAMBLE(len)
for (; data != end; data += sizeof(u64)) {
m = get_unaligned_le64(data);
v3 ^= m;
HSIPROUND;
v0 ^= m;
}
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
if (left)
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
bytemask_from_count(left)));
#else
switch (left) {
case 7: b |= ((u64)end[6]) << 48;
case 6: b |= ((u64)end[5]) << 40;
case 5: b |= ((u64)end[4]) << 32;
case 4: b |= get_unaligned_le32(end); break;
case 3: b |= ((u64)end[2]) << 16;
case 2: b |= get_unaligned_le16(end); break;
case 1: b |= end[0];
}
#endif
HPOSTAMBLE
}
#endif
/**
* hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
* @first: first u32
* @key: the hsiphash key
*/
u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
{
HPREAMBLE(4)
b |= first;
HPOSTAMBLE
}
/**
* hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
* @first: first u32
* @second: second u32
* @key: the hsiphash key
*/
u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
{
u64 combined = (u64)second << 32 | first;
HPREAMBLE(8)
v3 ^= combined;
HSIPROUND;
v0 ^= combined;
HPOSTAMBLE
}
/**
* hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
* @first: first u32
* @second: second u32
* @third: third u32
* @key: the hsiphash key
*/
u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
const hsiphash_key_t *key)
{
u64 combined = (u64)second << 32 | first;
HPREAMBLE(12)
v3 ^= combined;
HSIPROUND;
v0 ^= combined;
b |= third;
HPOSTAMBLE
}
/**
* hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
* @first: first u32
* @second: second u32
* @third: third u32
* @forth: forth u32
* @key: the hsiphash key
*/
u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
const u32 forth, const hsiphash_key_t *key)
{
u64 combined = (u64)second << 32 | first;
HPREAMBLE(16)
v3 ^= combined;
HSIPROUND;
v0 ^= combined;
combined = (u64)forth << 32 | third;
v3 ^= combined;
HSIPROUND;
v0 ^= combined;
HPOSTAMBLE
}
#else
#define HSIPROUND \
do { \
v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
} while (0)
#define HPREAMBLE(len) \
u32 v0 = 0; \
u32 v1 = 0; \
u32 v2 = 0x6c796765U; \
u32 v3 = 0x74656462U; \
u32 b = ((u32)(len)) << 24; \
v3 ^= key->key[1]; \
v2 ^= key->key[0]; \
v1 ^= key->key[1]; \
v0 ^= key->key[0];
#define HPOSTAMBLE \
v3 ^= b; \
HSIPROUND; \
v0 ^= b; \
v2 ^= 0xff; \
HSIPROUND; \
HSIPROUND; \
HSIPROUND; \
return v1 ^ v3;
u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
{
const u8 *end = data + len - (len % sizeof(u32));
const u8 left = len & (sizeof(u32) - 1);
u32 m;
HPREAMBLE(len)
for (; data != end; data += sizeof(u32)) {
m = le32_to_cpup(data);
v3 ^= m;
HSIPROUND;
v0 ^= m;
}
switch (left) {
case 3: b |= ((u32)end[2]) << 16;
case 2: b |= le16_to_cpup(data); break;
case 1: b |= end[0];
}
HPOSTAMBLE
}
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
u32 __hsiphash_unaligned(const void *data, size_t len,
const hsiphash_key_t *key)
{
const u8 *end = data + len - (len % sizeof(u32));
const u8 left = len & (sizeof(u32) - 1);
u32 m;
HPREAMBLE(len)
for (; data != end; data += sizeof(u32)) {
m = get_unaligned_le32(data);
v3 ^= m;
HSIPROUND;
v0 ^= m;
}
switch (left) {
case 3: b |= ((u32)end[2]) << 16;
case 2: b |= get_unaligned_le16(end); break;
case 1: b |= end[0];
}
HPOSTAMBLE
}
#endif
/**
* hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
* @first: first u32
* @key: the hsiphash key
*/
u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
{
HPREAMBLE(4)
v3 ^= first;
HSIPROUND;
v0 ^= first;
HPOSTAMBLE
}
/**
* hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
* @first: first u32
* @second: second u32
* @key: the hsiphash key
*/
u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
{
HPREAMBLE(8)
v3 ^= first;
HSIPROUND;
v0 ^= first;
v3 ^= second;
HSIPROUND;
v0 ^= second;
HPOSTAMBLE
}
/**
* hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
* @first: first u32
* @second: second u32
* @third: third u32
* @key: the hsiphash key
*/
u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
const hsiphash_key_t *key)
{
HPREAMBLE(12)
v3 ^= first;
HSIPROUND;
v0 ^= first;
v3 ^= second;
HSIPROUND;
v0 ^= second;
v3 ^= third;
HSIPROUND;
v0 ^= third;
HPOSTAMBLE
}
/**
* hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
* @first: first u32
* @second: second u32
* @third: third u32
* @forth: forth u32
* @key: the hsiphash key
*/
u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
const u32 forth, const hsiphash_key_t *key)
{
HPREAMBLE(16)
v3 ^= first;
HSIPROUND;
v0 ^= first;
v3 ^= second;
HSIPROUND;
v0 ^= second;
v3 ^= third;
HSIPROUND;
v0 ^= third;
v3 ^= forth;
HSIPROUND;
v0 ^= forth;
HPOSTAMBLE
}
#endif

View File

@ -0,0 +1,94 @@
#ifndef _WG_NET_UDP_TUNNEL_H
#define _WG_NET_UDP_TUNNEL_H
#include <net/ip_tunnels.h>
#include <net/udp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
#endif
struct udp_port_cfg {
u8 family;
/* Used only for kernel-created sockets */
union {
struct in_addr local_ip;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr local_ip6;
#endif
};
union {
struct in_addr peer_ip;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr peer_ip6;
#endif
};
__be16 local_udp_port;
__be16 peer_udp_port;
unsigned int use_udp_checksums:1,
use_udp6_tx_checksums:1,
use_udp6_rx_checksums:1,
ipv6_v6only:1;
};
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp);
#if IS_ENABLED(CONFIG_IPV6)
int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp);
#else
static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
{
return 0;
}
#endif
static inline int udp_sock_create(struct net *net,
struct udp_port_cfg *cfg,
struct socket **sockp)
{
if (cfg->family == AF_INET)
return udp_sock_create4(net, cfg, sockp);
if (cfg->family == AF_INET6)
return udp_sock_create6(net, cfg, sockp);
return -EPFNOSUPPORT;
}
typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
struct udp_tunnel_sock_cfg {
void *sk_user_data;
__u8 encap_type;
udp_tunnel_encap_rcv_t encap_rcv;
};
/* Setup the given (UDP) sock to receive UDP encapsulated packets */
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct udp_tunnel_sock_cfg *sock_cfg);
/* Transmit the skb using UDP encapsulation. */
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
__be16 df, __be16 src_port, __be16 dst_port,
bool xnet, bool nocheck);
#if IS_ENABLED(CONFIG_IPV6)
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck);
#endif
void udp_tunnel_sock_release(struct socket *sock);
#endif /* _WG_NET_UDP_TUNNEL_H */

View File

@ -0,0 +1,394 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/udp.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <net/net_namespace.h>
#include <net/inet_common.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
#endif
/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
static udp_tunnel_encap_rcv_t encap_rcv = NULL;
static void __compat_sk_data_ready(struct sock *sk
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
,int unused_vulnerable_length_param
#endif
)
{
struct sk_buff *skb;
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
skb_orphan(skb);
sk_mem_reclaim(sk);
encap_rcv(sk, skb);
}
}
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
{
int err;
struct socket *sock = NULL;
struct sockaddr_in udp_addr;
err = __sock_create(net, AF_INET, SOCK_DGRAM, 0, &sock, 1);
if (err < 0)
goto error;
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port;
err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
sizeof(udp_addr));
if (err < 0)
goto error;
if (cfg->peer_udp_port) {
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->peer_ip;
udp_addr.sin_port = cfg->peer_udp_port;
err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
sizeof(udp_addr), 0);
if (err < 0)
goto error;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
sock->sk->sk_no_check = !cfg->use_udp_checksums;
#else
sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
#endif
*sockp = sock;
return 0;
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}
*sockp = NULL;
return err;
}
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct udp_tunnel_sock_cfg *cfg)
{
inet_sk(sock->sk)->mc_loop = 0;
encap_rcv = cfg->encap_rcv;
rcu_assign_sk_user_data(sock->sk, cfg->sk_user_data);
/* We force the cast in this awful way, due to various Android kernels
* backporting things stupidly. */
*(void **)&sock->sk->sk_data_ready = (void *)__compat_sk_data_ready;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
static inline __sum16 udp_v4_check(int len, __be32 saddr,
__be32 daddr, __wsum base)
{
return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
}
static void udp_set_csum(bool nocheck, struct sk_buff *skb,
__be32 saddr, __be32 daddr, int len)
{
struct udphdr *uh = udp_hdr(skb);
if (nocheck)
uh->check = 0;
else if (skb_is_gso(skb))
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
else if (skb_dst(skb) && skb_dst(skb)->dev &&
(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
} else {
__wsum csum;
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
uh->check = 0;
csum = skb_checksum(skb, 0, len, 0);
uh->check = udp_v4_check(len, saddr, daddr, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
#endif
static void __compat_fake_destructor(struct sk_buff *skb)
{
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
static void __compat_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
{
struct iphdr *iph;
struct pcpu_tstats *tstats = this_cpu_ptr(skb->dev->tstats);
skb_scrub_packet(skb, xnet);
skb->rxhash = 0;
skb_dst_set(skb, &rt->dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
/* Push down and install the IP header. */
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
iph->frag_off = df;
iph->protocol = proto;
iph->tos = tos;
iph->daddr = dst;
iph->saddr = src;
iph->ttl = ttl;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 53)
__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
#else
__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
#endif
iptunnel_xmit(skb, skb->dev);
u64_stats_update_begin(&tstats->syncp);
tstats->tx_bytes -= 8;
u64_stats_update_end(&tstats->syncp);
}
#define iptunnel_xmit __compat_iptunnel_xmit
#endif
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
__be16 df, __be16 src_port, __be16 dst_port,
bool xnet, bool nocheck)
{
struct udphdr *uh;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
struct net_device *dev = skb->dev;
int ret;
#endif
__skb_push(skb, sizeof(*uh));
skb_reset_transport_header(skb);
uh = udp_hdr(skb);
uh->dest = dst_port;
uh->source = src_port;
uh->len = htons(skb->len);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
udp_set_csum(nocheck, skb, src, dst, skb->len);
if (!skb->sk)
skb->sk = sk;
if (!skb->destructor)
skb->destructor = __compat_fake_destructor;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
ret =
#endif
iptunnel_xmit(
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
sk,
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
dev_net(dev),
#endif
rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
, xnet
#endif
);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
if (ret)
iptunnel_xmit_stats(ret - 8, &dev->stats, dev->tstats);
#endif
}
void udp_tunnel_sock_release(struct socket *sock)
{
rcu_assign_sk_user_data(sock->sk, NULL);
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/socket.h>
#include <linux/udp.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/in6.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/ip6_tunnel.h>
#include <net/ip6_checksum.h>
int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
{
struct sockaddr_in6 udp6_addr;
int err;
struct socket *sock = NULL;
err = __sock_create(net, AF_INET6, SOCK_DGRAM, 0, &sock, 1);
if (err < 0)
goto error;
if (cfg->ipv6_v6only) {
int val = 1;
err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
(char *) &val, sizeof(val));
if (err < 0)
goto error;
}
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
sizeof(udp6_addr.sin6_addr));
udp6_addr.sin6_port = cfg->local_udp_port;
err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
sizeof(udp6_addr));
if (err < 0)
goto error;
if (cfg->peer_udp_port) {
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
sizeof(udp6_addr.sin6_addr));
udp6_addr.sin6_port = cfg->peer_udp_port;
err = kernel_connect(sock,
(struct sockaddr *)&udp6_addr,
sizeof(udp6_addr), 0);
}
if (err < 0)
goto error;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
sock->sk->sk_no_check = !cfg->use_udp_checksums;
#else
udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
#endif
*sockp = sock;
return 0;
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}
*sockp = NULL;
return err;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
static inline __sum16 udp_v6_check(int len,
const struct in6_addr *saddr,
const struct in6_addr *daddr,
__wsum base)
{
return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
}
static void udp6_set_csum(bool nocheck, struct sk_buff *skb,
const struct in6_addr *saddr,
const struct in6_addr *daddr, int len)
{
struct udphdr *uh = udp_hdr(skb);
if (nocheck)
uh->check = 0;
else if (skb_is_gso(skb))
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
else if (skb_dst(skb) && skb_dst(skb)->dev &&
(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
} else {
__wsum csum;
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
uh->check = 0;
csum = skb_checksum(skb, 0, len, 0);
uh->check = udp_v6_check(len, saddr, daddr, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
}
#endif
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck)
{
struct udphdr *uh;
struct ipv6hdr *ip6h;
__skb_push(skb, sizeof(*uh));
skb_reset_transport_header(skb);
uh = udp_hdr(skb);
uh->dest = dst_port;
uh->source = src_port;
uh->len = htons(skb->len);
skb_dst_set(skb, dst);
udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
ip6h = ipv6_hdr(skb);
ip6_flow_hdr(ip6h, prio, label);
ip6h->payload_len = htons(skb->len);
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = ttl;
ip6h->daddr = *daddr;
ip6h->saddr = *saddr;
if (!skb->sk)
skb->sk = sk;
if (!skb->destructor)
skb->destructor = __compat_fake_destructor;
ip6tunnel_xmit(skb, dev);
return 0;
}
#endif

View File

@ -0,0 +1,226 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
#define udp_sock_create4 udp_sock_create
#define udp_sock_create6 udp_sock_create
#include <linux/socket.h>
#include <linux/if.h>
#include <linux/in.h>
#include <net/ip_tunnels.h>
#include <net/udp.h>
#include <net/inet_common.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <linux/in6.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
#endif
static inline void __compat_fake_destructor(struct sk_buff *skb)
{
}
typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
struct udp_tunnel_sock_cfg {
void *sk_user_data;
__u8 encap_type;
udp_tunnel_encap_rcv_t encap_rcv;
};
/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
static udp_tunnel_encap_rcv_t encap_rcv = NULL;
static void __compat_sk_data_ready(struct sock *sk)
{
struct sk_buff *skb;
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
skb_orphan(skb);
sk_mem_reclaim(sk);
encap_rcv(sk, skb);
}
}
static inline void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct udp_tunnel_sock_cfg *cfg)
{
struct sock *sk = sock->sk;
inet_sk(sk)->mc_loop = 0;
encap_rcv = cfg->encap_rcv;
rcu_assign_sk_user_data(sk, cfg->sk_user_data);
sk->sk_data_ready = __compat_sk_data_ready;
}
static inline void udp_tunnel_sock_release(struct socket *sock)
{
rcu_assign_sk_user_data(sock->sk, NULL);
kernel_sock_shutdown(sock, SHUT_RDWR);
sk_release_kernel(sock->sk);
}
static inline int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
struct sk_buff *skb, __be32 src, __be32 dst,
__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
__be16 dst_port, bool xnet)
{
struct udphdr *uh;
__skb_push(skb, sizeof(*uh));
skb_reset_transport_header(skb);
uh = udp_hdr(skb);
uh->dest = dst_port;
uh->source = src_port;
uh->len = htons(skb->len);
udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
tos, ttl, df, xnet);
}
#if IS_ENABLED(CONFIG_IPV6)
static inline int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
struct sk_buff *skb, struct net_device *dev,
struct in6_addr *saddr, struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be16 src_port,
__be16 dst_port)
{
struct udphdr *uh;
struct ipv6hdr *ip6h;
struct sock *sk = sock->sk;
__skb_push(skb, sizeof(*uh));
skb_reset_transport_header(skb);
uh = udp_hdr(skb);
uh->dest = dst_port;
uh->source = src_port;
uh->len = htons(skb->len);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
| IPSKB_REROUTED);
skb_dst_set(skb, dst);
udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
&sk->sk_v6_daddr, skb->len);
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
ip6h = ipv6_hdr(skb);
ip6_flow_hdr(ip6h, prio, htonl(0));
ip6h->payload_len = htons(skb->len);
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = ttl;
ip6h->daddr = *daddr;
ip6h->saddr = *saddr;
ip6tunnel_xmit(skb, dev);
return 0;
}
#endif
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/udp.h>
#include <linux/skbuff.h>
#include <linux/if.h>
#include <net/udp_tunnel.h>
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; ret__ = udp_tunnel_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, i, j, k); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
#if IS_ENABLED(CONFIG_IPV6)
#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, j, k);
#endif
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
#include <linux/if.h>
#include <net/udp_tunnel.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
static inline void __compat_fake_destructor(struct sk_buff *skb)
{
}
#endif
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
#if IS_ENABLED(CONFIG_IPV6)
#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0)
#endif
#else
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
#include <linux/if.h>
#include <net/udp_tunnel.h>
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
#include <linux/if.h>
#include <net/udp_tunnel.h>
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0)
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
#include <linux/if.h>
#include <net/udp_tunnel.h>
#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, j, k, l)
#endif
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
#include <linux/skbuff.h>
#include <linux/if.h>
#include <net/udp_tunnel.h>
struct __compat_udp_port_cfg {
u8 family;
union {
struct in_addr local_ip;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr local_ip6;
#endif
};
union {
struct in_addr peer_ip;
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr peer_ip6;
#endif
};
__be16 local_udp_port;
__be16 peer_udp_port;
unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1;
};
static inline int __maybe_unused __compat_udp_sock_create(struct net *net, struct __compat_udp_port_cfg *cfg, struct socket **sockp)
{
struct udp_port_cfg old_cfg = {
.family = cfg->family,
.local_ip = cfg->local_ip,
#if IS_ENABLED(CONFIG_IPV6)
.local_ip6 = cfg->local_ip6,
#endif
.peer_ip = cfg->peer_ip,
#if IS_ENABLED(CONFIG_IPV6)
.peer_ip6 = cfg->peer_ip6,
#endif
.local_udp_port = cfg->local_udp_port,
.peer_udp_port = cfg->peer_udp_port,
.use_udp_checksums = cfg->use_udp_checksums,
.use_udp6_tx_checksums = cfg->use_udp6_tx_checksums,
.use_udp6_rx_checksums = cfg->use_udp6_rx_checksums
};
if (cfg->family == AF_INET)
return udp_sock_create4(net, &old_cfg, sockp);
#if IS_ENABLED(CONFIG_IPV6)
if (cfg->family == AF_INET6) {
int ret;
int old_bindv6only;
struct net *nobns;
if (cfg->ipv6_v6only) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
nobns = &init_net;
#else
nobns = net;
#endif
/* Since udp_port_cfg only learned of ipv6_v6only in 4.3, we do this horrible
* hack here and set the sysctl variable temporarily to something that will
* set the right option for us in sock_create. It's super racey! */
old_bindv6only = nobns->ipv6.sysctl.bindv6only;
nobns->ipv6.sysctl.bindv6only = 1;
}
ret = udp_sock_create6(net, &old_cfg, sockp);
if (cfg->ipv6_v6only)
nobns->ipv6.sysctl.bindv6only = old_bindv6only;
return ret;
}
#endif
return -EPFNOSUPPORT;
}
#define udp_port_cfg __compat_udp_port_cfg
#define udp_sock_create(a, b, c) __compat_udp_sock_create(a, b, c)
#endif

236
net/wireguard/cookie.c Normal file
View File

@ -0,0 +1,236 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "cookie.h"
#include "peer.h"
#include "device.h"
#include "messages.h"
#include "ratelimiter.h"
#include "timers.h"
#include <zinc/blake2s.h>
#include <zinc/chacha20poly1305.h>
#include <net/ipv6.h>
#include <crypto/algapi.h>
void wg_cookie_checker_init(struct cookie_checker *checker,
struct wg_device *wg)
{
init_rwsem(&checker->secret_lock);
checker->secret_birthdate = ktime_get_coarse_boottime_ns();
get_random_bytes(checker->secret, NOISE_HASH_LEN);
checker->device = wg;
}
enum { COOKIE_KEY_LABEL_LEN = 8 };
static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN],
const u8 pubkey[NOISE_PUBLIC_KEY_LEN],
const u8 label[COOKIE_KEY_LABEL_LEN])
{
struct blake2s_state blake;
blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
blake2s_final(&blake, key);
}
/* Must hold peer->handshake.static_identity->lock */
void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker)
{
if (likely(checker->device->static_identity.has_identity)) {
precompute_key(checker->cookie_encryption_key,
checker->device->static_identity.static_public,
cookie_key_label);
precompute_key(checker->message_mac1_key,
checker->device->static_identity.static_public,
mac1_key_label);
} else {
memset(checker->cookie_encryption_key, 0,
NOISE_SYMMETRIC_KEY_LEN);
memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
}
}
void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer)
{
precompute_key(peer->latest_cookie.cookie_decryption_key,
peer->handshake.remote_static, cookie_key_label);
precompute_key(peer->latest_cookie.message_mac1_key,
peer->handshake.remote_static, mac1_key_label);
}
void wg_cookie_init(struct cookie *cookie)
{
memset(cookie, 0, sizeof(*cookie));
init_rwsem(&cookie->lock);
}
static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len,
const u8 key[NOISE_SYMMETRIC_KEY_LEN])
{
len = len - sizeof(struct message_macs) +
offsetof(struct message_macs, mac1);
blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
}
static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
const u8 cookie[COOKIE_LEN])
{
len = len - sizeof(struct message_macs) +
offsetof(struct message_macs, mac2);
blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
}
static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
struct cookie_checker *checker)
{
struct blake2s_state state;
if (wg_birthdate_has_expired(checker->secret_birthdate,
COOKIE_SECRET_MAX_AGE)) {
down_write(&checker->secret_lock);
checker->secret_birthdate = ktime_get_coarse_boottime_ns();
get_random_bytes(checker->secret, NOISE_HASH_LEN);
up_write(&checker->secret_lock);
}
down_read(&checker->secret_lock);
blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
if (skb->protocol == htons(ETH_P_IP))
blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr,
sizeof(struct in_addr));
else if (skb->protocol == htons(ETH_P_IPV6))
blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr,
sizeof(struct in6_addr));
blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
blake2s_final(&state, cookie);
up_read(&checker->secret_lock);
}
enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
struct sk_buff *skb,
bool check_cookie)
{
struct message_macs *macs = (struct message_macs *)
(skb->data + skb->len - sizeof(*macs));
enum cookie_mac_state ret;
u8 computed_mac[COOKIE_LEN];
u8 cookie[COOKIE_LEN];
ret = INVALID_MAC;
compute_mac1(computed_mac, skb->data, skb->len,
checker->message_mac1_key);
if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
goto out;
ret = VALID_MAC_BUT_NO_COOKIE;
if (!check_cookie)
goto out;
make_cookie(cookie, skb, checker);
compute_mac2(computed_mac, skb->data, skb->len, cookie);
if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
goto out;
ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev)))
goto out;
ret = VALID_MAC_WITH_COOKIE;
out:
return ret;
}
void wg_cookie_add_mac_to_packet(void *message, size_t len,
struct wg_peer *peer)
{
struct message_macs *macs = (struct message_macs *)
((u8 *)message + len - sizeof(*macs));
down_write(&peer->latest_cookie.lock);
compute_mac1(macs->mac1, message, len,
peer->latest_cookie.message_mac1_key);
memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
peer->latest_cookie.have_sent_mac1 = true;
up_write(&peer->latest_cookie.lock);
down_read(&peer->latest_cookie.lock);
if (peer->latest_cookie.is_valid &&
!wg_birthdate_has_expired(peer->latest_cookie.birthdate,
COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
compute_mac2(macs->mac2, message, len,
peer->latest_cookie.cookie);
else
memset(macs->mac2, 0, COOKIE_LEN);
up_read(&peer->latest_cookie.lock);
}
void wg_cookie_message_create(struct message_handshake_cookie *dst,
struct sk_buff *skb, __le32 index,
struct cookie_checker *checker)
{
struct message_macs *macs = (struct message_macs *)
((u8 *)skb->data + skb->len - sizeof(*macs));
u8 cookie[COOKIE_LEN];
dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
dst->receiver_index = index;
get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
make_cookie(cookie, skb, checker);
xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN,
macs->mac1, COOKIE_LEN, dst->nonce,
checker->cookie_encryption_key);
}
void wg_cookie_message_consume(struct message_handshake_cookie *src,
struct wg_device *wg)
{
struct wg_peer *peer = NULL;
u8 cookie[COOKIE_LEN];
bool ret;
if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable,
INDEX_HASHTABLE_HANDSHAKE |
INDEX_HASHTABLE_KEYPAIR,
src->receiver_index, &peer)))
return;
down_read(&peer->latest_cookie.lock);
if (unlikely(!peer->latest_cookie.have_sent_mac1)) {
up_read(&peer->latest_cookie.lock);
goto out;
}
ret = xchacha20poly1305_decrypt(
cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie),
peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce,
peer->latest_cookie.cookie_decryption_key);
up_read(&peer->latest_cookie.lock);
if (ret) {
down_write(&peer->latest_cookie.lock);
memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN);
peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns();
peer->latest_cookie.is_valid = true;
peer->latest_cookie.have_sent_mac1 = false;
up_write(&peer->latest_cookie.lock);
} else {
net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n",
wg->dev->name);
}
out:
wg_peer_put(peer);
}

59
net/wireguard/cookie.h Normal file
View File

@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_COOKIE_H
#define _WG_COOKIE_H
#include "messages.h"
#include <linux/rwsem.h>
struct wg_peer;
struct cookie_checker {
u8 secret[NOISE_HASH_LEN];
u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
u64 secret_birthdate;
struct rw_semaphore secret_lock;
struct wg_device *device;
};
struct cookie {
u64 birthdate;
bool is_valid;
u8 cookie[COOKIE_LEN];
bool have_sent_mac1;
u8 last_mac1_sent[COOKIE_LEN];
u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
struct rw_semaphore lock;
};
enum cookie_mac_state {
INVALID_MAC,
VALID_MAC_BUT_NO_COOKIE,
VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
VALID_MAC_WITH_COOKIE
};
void wg_cookie_checker_init(struct cookie_checker *checker,
struct wg_device *wg);
void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker);
void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer);
void wg_cookie_init(struct cookie *cookie);
enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
struct sk_buff *skb,
bool check_cookie);
void wg_cookie_add_mac_to_packet(void *message, size_t len,
struct wg_peer *peer);
void wg_cookie_message_create(struct message_handshake_cookie *src,
struct sk_buff *skb, __le32 index,
struct cookie_checker *checker);
void wg_cookie_message_consume(struct message_handshake_cookie *src,
struct wg_device *wg);
#endif /* _WG_COOKIE_H */

View File

@ -0,0 +1,57 @@
ifeq ($(CONFIG_X86_64)$(if $(CONFIG_UML),y,n),yn)
CONFIG_ZINC_ARCH_X86_64 := y
endif
ifeq ($(CONFIG_ARM)$(if $(CONFIG_CPU_32v3),y,n),yn)
CONFIG_ZINC_ARCH_ARM := y
endif
ifeq ($(CONFIG_ARM64),y)
CONFIG_ZINC_ARCH_ARM64 := y
endif
ifeq ($(CONFIG_MIPS)$(CONFIG_CPU_MIPS32_R2),yy)
CONFIG_ZINC_ARCH_MIPS := y
endif
ifeq ($(CONFIG_MIPS)$(CONFIG_64BIT),yy)
CONFIG_ZINC_ARCH_MIPS64 := y
endif
zinc-y += chacha20/chacha20.o
zinc-$(CONFIG_ZINC_ARCH_X86_64) += chacha20/chacha20-x86_64.o
zinc-$(CONFIG_ZINC_ARCH_ARM) += chacha20/chacha20-arm.o chacha20/chacha20-unrolled-arm.o
zinc-$(CONFIG_ZINC_ARCH_ARM64) += chacha20/chacha20-arm64.o
zinc-$(CONFIG_ZINC_ARCH_MIPS) += chacha20/chacha20-mips.o
AFLAGS_chacha20-mips.o += -O2 # This is required to fill the branch delay slots
zinc-y += poly1305/poly1305.o
zinc-$(CONFIG_ZINC_ARCH_X86_64) += poly1305/poly1305-x86_64.o
zinc-$(CONFIG_ZINC_ARCH_ARM) += poly1305/poly1305-arm.o
zinc-$(CONFIG_ZINC_ARCH_ARM64) += poly1305/poly1305-arm64.o
zinc-$(CONFIG_ZINC_ARCH_MIPS) += poly1305/poly1305-mips.o
AFLAGS_poly1305-mips.o += -O2 # This is required to fill the branch delay slots
zinc-$(CONFIG_ZINC_ARCH_MIPS64) += poly1305/poly1305-mips64.o
zinc-y += chacha20poly1305.o
zinc-y += blake2s/blake2s.o
zinc-$(CONFIG_ZINC_ARCH_X86_64) += blake2s/blake2s-x86_64.o
zinc-y += curve25519/curve25519.o
zinc-$(CONFIG_ZINC_ARCH_ARM) += curve25519/curve25519-arm.o
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $< > $@
$(obj)/%.S: $(src)/%.pl FORCE
$(call if_changed,perlasm)
kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
targets := $(patsubst $(kbuild-dir)/%.pl,%.S,$(wildcard $(patsubst %.o,$(kbuild-dir)/crypto/zinc/%.pl,$(zinc-y) $(zinc-m) $(zinc-))))
# Old kernels don't set this, which causes trouble.
.SECONDARY:
wireguard-y += $(addprefix crypto/zinc/,$(zinc-y))
ccflags-y += -I$(kbuild-dir)/crypto/include
ccflags-$(CONFIG_ZINC_ARCH_X86_64) += -DCONFIG_ZINC_ARCH_X86_64
ccflags-$(CONFIG_ZINC_ARCH_ARM) += -DCONFIG_ZINC_ARCH_ARM
ccflags-$(CONFIG_ZINC_ARCH_ARM64) += -DCONFIG_ZINC_ARCH_ARM64
ccflags-$(CONFIG_ZINC_ARCH_MIPS) += -DCONFIG_ZINC_ARCH_MIPS
ccflags-$(CONFIG_ZINC_ARCH_MIPS64) += -DCONFIG_ZINC_ARCH_MIPS64
ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DCONFIG_ZINC_SELFTEST

View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_BLAKE2S_H
#define _ZINC_BLAKE2S_H
#include <linux/types.h>
#include <linux/kernel.h>
#include <asm/bug.h>
enum blake2s_lengths {
BLAKE2S_BLOCK_SIZE = 64,
BLAKE2S_HASH_SIZE = 32,
BLAKE2S_KEY_SIZE = 32
};
struct blake2s_state {
u32 h[8];
u32 t[2];
u32 f[2];
u8 buf[BLAKE2S_BLOCK_SIZE];
unsigned int buflen;
unsigned int outlen;
};
void blake2s_init(struct blake2s_state *state, const size_t outlen);
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
const void *key, const size_t keylen);
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
void blake2s_final(struct blake2s_state *state, u8 *out);
static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
const size_t outlen, const size_t inlen,
const size_t keylen)
{
struct blake2s_state state;
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
(!key && keylen)));
if (keylen)
blake2s_init_key(&state, outlen, key, keylen);
else
blake2s_init(&state, outlen);
blake2s_update(&state, in, inlen);
blake2s_final(&state, out);
}
void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
const size_t inlen, const size_t keylen);
#endif /* _ZINC_BLAKE2S_H */

View File

@ -0,0 +1,70 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_CHACHA20_H
#define _ZINC_CHACHA20_H
#include <asm/unaligned.h>
#include <linux/simd.h>
#include <linux/kernel.h>
#include <linux/types.h>
enum chacha20_lengths {
CHACHA20_NONCE_SIZE = 16,
CHACHA20_KEY_SIZE = 32,
CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(u32),
CHACHA20_BLOCK_SIZE = 64,
CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(u32),
HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
};
enum chacha20_constants { /* expand 32-byte k */
CHACHA20_CONSTANT_EXPA = 0x61707865U,
CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
CHACHA20_CONSTANT_2_BY = 0x79622d32U,
CHACHA20_CONSTANT_TE_K = 0x6b206574U
};
struct chacha20_ctx {
union {
u32 state[16];
struct {
u32 constant[4];
u32 key[8];
u32 counter[4];
};
};
};
static inline void chacha20_init(struct chacha20_ctx *ctx,
const u8 key[CHACHA20_KEY_SIZE],
const u64 nonce)
{
ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
ctx->key[0] = get_unaligned_le32(key + 0);
ctx->key[1] = get_unaligned_le32(key + 4);
ctx->key[2] = get_unaligned_le32(key + 8);
ctx->key[3] = get_unaligned_le32(key + 12);
ctx->key[4] = get_unaligned_le32(key + 16);
ctx->key[5] = get_unaligned_le32(key + 20);
ctx->key[6] = get_unaligned_le32(key + 24);
ctx->key[7] = get_unaligned_le32(key + 28);
ctx->counter[0] = 0;
ctx->counter[1] = 0;
ctx->counter[2] = nonce & U32_MAX;
ctx->counter[3] = nonce >> 32;
}
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
simd_context_t *simd_context);
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context);
#endif /* _ZINC_CHACHA20_H */

View File

@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_CHACHA20POLY1305_H
#define _ZINC_CHACHA20POLY1305_H
#include <linux/simd.h>
#include <linux/types.h>
struct scatterlist;
enum chacha20poly1305_lengths {
XCHACHA20POLY1305_NONCE_SIZE = 24,
CHACHA20POLY1305_KEY_SIZE = 32,
CHACHA20POLY1305_AUTHTAG_SIZE = 16
};
void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
bool __must_check chacha20poly1305_encrypt_sg_inplace(
struct scatterlist *src, const size_t src_len, const u8 *ad,
const size_t ad_len, const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
bool __must_check
chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len, const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
bool __must_check chacha20poly1305_decrypt_sg_inplace(
struct scatterlist *src, size_t src_len, const u8 *ad,
const size_t ad_len, const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
bool __must_check xchacha20poly1305_decrypt(
u8 *dst, const u8 *src, const size_t src_len, const u8 *ad,
const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
#endif /* _ZINC_CHACHA20POLY1305_H */

View File

@ -0,0 +1,28 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_CURVE25519_H
#define _ZINC_CURVE25519_H
#include <linux/types.h>
enum curve25519_lengths {
CURVE25519_KEY_SIZE = 32
};
bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE]);
void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]);
bool __must_check curve25519_generate_public(
u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]);
static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
{
secret[0] &= 248;
secret[31] = (secret[31] & 127) | 64;
}
#endif /* _ZINC_CURVE25519_H */

View File

@ -0,0 +1,31 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_POLY1305_H
#define _ZINC_POLY1305_H
#include <linux/simd.h>
#include <linux/types.h>
enum poly1305_lengths {
POLY1305_BLOCK_SIZE = 16,
POLY1305_KEY_SIZE = 32,
POLY1305_MAC_SIZE = 16
};
struct poly1305_ctx {
u8 opaque[24 * sizeof(u64)];
u32 nonce[4];
u8 data[POLY1305_BLOCK_SIZE];
size_t num;
} __aligned(8);
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]);
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
simd_context_t *simd_context);
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
simd_context_t *simd_context);
#endif /* _ZINC_POLY1305_H */

View File

@ -0,0 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_ZINC_H
#define _WG_ZINC_H
int chacha20_mod_init(void);
int poly1305_mod_init(void);
int chacha20poly1305_mod_init(void);
int blake2s_mod_init(void);
int curve25519_mod_init(void);
#endif

View File

@ -0,0 +1,72 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <linux/simd.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/fpu/api.h>
asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
const u8 *block, const size_t nblocks,
const u32 inc);
asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
const u8 *block, const size_t nblocks,
const u32 inc);
static bool blake2s_use_ssse3 __ro_after_init;
static bool blake2s_use_avx512 __ro_after_init;
static bool *const blake2s_nobs[] __initconst = { &blake2s_use_ssse3,
&blake2s_use_avx512 };
static void __init blake2s_fpu_init(void)
{
blake2s_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
#ifndef COMPAT_CANNOT_USE_AVX512
blake2s_use_avx512 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512VL) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL);
#endif
}
static inline bool blake2s_compress_arch(struct blake2s_state *state,
const u8 *block, size_t nblocks,
const u32 inc)
{
simd_context_t simd_context;
bool used_arch = false;
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
simd_get(&simd_context);
if (!IS_ENABLED(CONFIG_AS_SSSE3) || !blake2s_use_ssse3 ||
!simd_use(&simd_context))
goto out;
used_arch = true;
for (;;) {
const size_t blocks = min_t(size_t, nblocks,
PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
if (IS_ENABLED(CONFIG_AS_AVX512) && blake2s_use_avx512)
blake2s_compress_avx512(state, block, blocks, inc);
else
blake2s_compress_ssse3(state, block, blocks, inc);
nblocks -= blocks;
if (!nblocks)
break;
block += blocks * BLAKE2S_BLOCK_SIZE;
simd_relax(&simd_context);
}
out:
simd_put(&simd_context);
return used_arch;
}

View File

@ -0,0 +1,258 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
* Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
*/
#include <linux/linkage.h>
.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
.align 32
IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
.octa 0x5BE0CD191F83D9AB9B05688C510E527F
.section .rodata.cst16.ROT16, "aM", @progbits, 16
.align 16
ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
.section .rodata.cst16.ROR328, "aM", @progbits, 16
.align 16
ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
.align 64
SIGMA:
.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
#ifdef CONFIG_AS_AVX512
.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
.align 64
SIGMA2:
.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
#endif /* CONFIG_AS_AVX512 */
.text
#ifdef CONFIG_AS_SSSE3
SYM_FUNC_START(blake2s_compress_ssse3)
testq %rdx,%rdx
je .Lendofloop
movdqu (%rdi),%xmm0
movdqu 0x10(%rdi),%xmm1
movdqa ROT16(%rip),%xmm12
movdqa ROR328(%rip),%xmm13
movdqu 0x20(%rdi),%xmm14
movq %rcx,%xmm15
leaq SIGMA+0xa0(%rip),%r8
jmp .Lbeginofloop
.align 32
.Lbeginofloop:
movdqa %xmm0,%xmm10
movdqa %xmm1,%xmm11
paddq %xmm15,%xmm14
movdqa IV(%rip),%xmm2
movdqa %xmm14,%xmm3
pxor IV+0x10(%rip),%xmm3
leaq SIGMA(%rip),%rcx
.Lroundloop:
movzbl (%rcx),%eax
movd (%rsi,%rax,4),%xmm4
movzbl 0x1(%rcx),%eax
movd (%rsi,%rax,4),%xmm5
movzbl 0x2(%rcx),%eax
movd (%rsi,%rax,4),%xmm6
movzbl 0x3(%rcx),%eax
movd (%rsi,%rax,4),%xmm7
punpckldq %xmm5,%xmm4
punpckldq %xmm7,%xmm6
punpcklqdq %xmm6,%xmm4
paddd %xmm4,%xmm0
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
pshufb %xmm12,%xmm3
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm8
psrld $0xc,%xmm1
pslld $0x14,%xmm8
por %xmm8,%xmm1
movzbl 0x4(%rcx),%eax
movd (%rsi,%rax,4),%xmm5
movzbl 0x5(%rcx),%eax
movd (%rsi,%rax,4),%xmm6
movzbl 0x6(%rcx),%eax
movd (%rsi,%rax,4),%xmm7
movzbl 0x7(%rcx),%eax
movd (%rsi,%rax,4),%xmm4
punpckldq %xmm6,%xmm5
punpckldq %xmm4,%xmm7
punpcklqdq %xmm7,%xmm5
paddd %xmm5,%xmm0
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
pshufb %xmm13,%xmm3
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm8
psrld $0x7,%xmm1
pslld $0x19,%xmm8
por %xmm8,%xmm1
pshufd $0x93,%xmm0,%xmm0
pshufd $0x4e,%xmm3,%xmm3
pshufd $0x39,%xmm2,%xmm2
movzbl 0x8(%rcx),%eax
movd (%rsi,%rax,4),%xmm6
movzbl 0x9(%rcx),%eax
movd (%rsi,%rax,4),%xmm7
movzbl 0xa(%rcx),%eax
movd (%rsi,%rax,4),%xmm4
movzbl 0xb(%rcx),%eax
movd (%rsi,%rax,4),%xmm5
punpckldq %xmm7,%xmm6
punpckldq %xmm5,%xmm4
punpcklqdq %xmm4,%xmm6
paddd %xmm6,%xmm0
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
pshufb %xmm12,%xmm3
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm8
psrld $0xc,%xmm1
pslld $0x14,%xmm8
por %xmm8,%xmm1
movzbl 0xc(%rcx),%eax
movd (%rsi,%rax,4),%xmm7
movzbl 0xd(%rcx),%eax
movd (%rsi,%rax,4),%xmm4
movzbl 0xe(%rcx),%eax
movd (%rsi,%rax,4),%xmm5
movzbl 0xf(%rcx),%eax
movd (%rsi,%rax,4),%xmm6
punpckldq %xmm4,%xmm7
punpckldq %xmm6,%xmm5
punpcklqdq %xmm5,%xmm7
paddd %xmm7,%xmm0
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
pshufb %xmm13,%xmm3
paddd %xmm3,%xmm2
pxor %xmm2,%xmm1
movdqa %xmm1,%xmm8
psrld $0x7,%xmm1
pslld $0x19,%xmm8
por %xmm8,%xmm1
pshufd $0x39,%xmm0,%xmm0
pshufd $0x4e,%xmm3,%xmm3
pshufd $0x93,%xmm2,%xmm2
addq $0x10,%rcx
cmpq %r8,%rcx
jnz .Lroundloop
pxor %xmm2,%xmm0
pxor %xmm3,%xmm1
pxor %xmm10,%xmm0
pxor %xmm11,%xmm1
addq $0x40,%rsi
decq %rdx
jnz .Lbeginofloop
movdqu %xmm0,(%rdi)
movdqu %xmm1,0x10(%rdi)
movdqu %xmm14,0x20(%rdi)
.Lendofloop:
ret
SYM_FUNC_END(blake2s_compress_ssse3)
#endif /* CONFIG_AS_SSSE3 */
#ifdef CONFIG_AS_AVX512
SYM_FUNC_START(blake2s_compress_avx512)
vmovdqu (%rdi),%xmm0
vmovdqu 0x10(%rdi),%xmm1
vmovdqu 0x20(%rdi),%xmm4
vmovq %rcx,%xmm5
vmovdqa IV(%rip),%xmm14
vmovdqa IV+16(%rip),%xmm15
jmp .Lblake2s_compress_avx512_mainloop
.align 32
.Lblake2s_compress_avx512_mainloop:
vmovdqa %xmm0,%xmm10
vmovdqa %xmm1,%xmm11
vpaddq %xmm5,%xmm4,%xmm4
vmovdqa %xmm14,%xmm2
vpxor %xmm15,%xmm4,%xmm3
vmovdqu (%rsi),%ymm6
vmovdqu 0x20(%rsi),%ymm7
addq $0x40,%rsi
leaq SIGMA2(%rip),%rax
movb $0xa,%cl
.Lblake2s_compress_avx512_roundloop:
addq $0x40,%rax
vmovdqa -0x40(%rax),%ymm8
vmovdqa -0x20(%rax),%ymm9
vpermi2d %ymm7,%ymm6,%ymm8
vpermi2d %ymm7,%ymm6,%ymm9
vmovdqa %ymm8,%ymm6
vmovdqa %ymm9,%ymm7
vpaddd %xmm8,%xmm0,%xmm0
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
vprord $0x10,%xmm3,%xmm3
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
vprord $0xc,%xmm1,%xmm1
vextracti128 $0x1,%ymm8,%xmm8
vpaddd %xmm8,%xmm0,%xmm0
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
vprord $0x8,%xmm3,%xmm3
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
vprord $0x7,%xmm1,%xmm1
vpshufd $0x93,%xmm0,%xmm0
vpshufd $0x4e,%xmm3,%xmm3
vpshufd $0x39,%xmm2,%xmm2
vpaddd %xmm9,%xmm0,%xmm0
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
vprord $0x10,%xmm3,%xmm3
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
vprord $0xc,%xmm1,%xmm1
vextracti128 $0x1,%ymm9,%xmm9
vpaddd %xmm9,%xmm0,%xmm0
vpaddd %xmm1,%xmm0,%xmm0
vpxor %xmm0,%xmm3,%xmm3
vprord $0x8,%xmm3,%xmm3
vpaddd %xmm3,%xmm2,%xmm2
vpxor %xmm2,%xmm1,%xmm1
vprord $0x7,%xmm1,%xmm1
vpshufd $0x39,%xmm0,%xmm0
vpshufd $0x4e,%xmm3,%xmm3
vpshufd $0x93,%xmm2,%xmm2
decb %cl
jne .Lblake2s_compress_avx512_roundloop
vpxor %xmm10,%xmm0,%xmm0
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm2,%xmm0,%xmm0
vpxor %xmm3,%xmm1,%xmm1
decq %rdx
jne .Lblake2s_compress_avx512_mainloop
vmovdqu %xmm0,(%rdi)
vmovdqu %xmm1,0x10(%rdi)
vmovdqu %xmm4,0x20(%rdi)
vzeroupper
retq
SYM_FUNC_END(blake2s_compress_avx512)
#endif /* CONFIG_AS_AVX512 */

View File

@ -0,0 +1,271 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is an implementation of the BLAKE2s hash and PRF functions.
*
* Information: https://blake2.net/
*
*/
#include <zinc/blake2s.h>
#include "../selftest/run.h"
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bug.h>
#include <asm/unaligned.h>
static const u32 blake2s_iv[8] = {
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const u8 blake2s_sigma[10][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
};
static inline void blake2s_set_lastblock(struct blake2s_state *state)
{
state->f[0] = -1;
}
static inline void blake2s_increment_counter(struct blake2s_state *state,
const u32 inc)
{
state->t[0] += inc;
state->t[1] += (state->t[0] < inc);
}
static inline void blake2s_init_param(struct blake2s_state *state,
const u32 param)
{
int i;
memset(state, 0, sizeof(*state));
for (i = 0; i < 8; ++i)
state->h[i] = blake2s_iv[i];
state->h[0] ^= param;
}
void blake2s_init(struct blake2s_state *state, const size_t outlen)
{
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE));
blake2s_init_param(state, 0x01010000 | outlen);
state->outlen = outlen;
}
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
const void *key, const size_t keylen)
{
u8 block[BLAKE2S_BLOCK_SIZE] = { 0 };
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));
blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen);
state->outlen = outlen;
memcpy(block, key, keylen);
blake2s_update(state, block, BLAKE2S_BLOCK_SIZE);
memzero_explicit(block, BLAKE2S_BLOCK_SIZE);
}
#if defined(CONFIG_ZINC_ARCH_X86_64)
#include "blake2s-x86_64-glue.c"
#else
static bool *const blake2s_nobs[] __initconst = { };
static void __init blake2s_fpu_init(void)
{
}
static inline bool blake2s_compress_arch(struct blake2s_state *state,
const u8 *block, size_t nblocks,
const u32 inc)
{
return false;
}
#endif
static inline void blake2s_compress(struct blake2s_state *state,
const u8 *block, size_t nblocks,
const u32 inc)
{
u32 m[16];
u32 v[16];
int i;
WARN_ON(IS_ENABLED(DEBUG) &&
(nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
if (blake2s_compress_arch(state, block, nblocks, inc))
return;
while (nblocks > 0) {
blake2s_increment_counter(state, inc);
memcpy(m, block, BLAKE2S_BLOCK_SIZE);
le32_to_cpu_array(m, ARRAY_SIZE(m));
memcpy(v, state->h, 32);
v[ 8] = blake2s_iv[0];
v[ 9] = blake2s_iv[1];
v[10] = blake2s_iv[2];
v[11] = blake2s_iv[3];
v[12] = blake2s_iv[4] ^ state->t[0];
v[13] = blake2s_iv[5] ^ state->t[1];
v[14] = blake2s_iv[6] ^ state->f[0];
v[15] = blake2s_iv[7] ^ state->f[1];
#define G(r, i, a, b, c, d) do { \
a += b + m[blake2s_sigma[r][2 * i + 0]]; \
d = ror32(d ^ a, 16); \
c += d; \
b = ror32(b ^ c, 12); \
a += b + m[blake2s_sigma[r][2 * i + 1]]; \
d = ror32(d ^ a, 8); \
c += d; \
b = ror32(b ^ c, 7); \
} while (0)
#define ROUND(r) do { \
G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
G(r, 2, v[2], v[ 6], v[10], v[14]); \
G(r, 3, v[3], v[ 7], v[11], v[15]); \
G(r, 4, v[0], v[ 5], v[10], v[15]); \
G(r, 5, v[1], v[ 6], v[11], v[12]); \
G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
} while (0)
ROUND(0);
ROUND(1);
ROUND(2);
ROUND(3);
ROUND(4);
ROUND(5);
ROUND(6);
ROUND(7);
ROUND(8);
ROUND(9);
#undef G
#undef ROUND
for (i = 0; i < 8; ++i)
state->h[i] ^= v[i] ^ v[i + 8];
block += BLAKE2S_BLOCK_SIZE;
--nblocks;
}
}
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
{
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
if (unlikely(!inlen))
return;
if (inlen > fill) {
memcpy(state->buf + state->buflen, in, fill);
blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
state->buflen = 0;
in += fill;
inlen -= fill;
}
if (inlen > BLAKE2S_BLOCK_SIZE) {
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
/* Hash one less (full) block than strictly possible */
blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
}
memcpy(state->buf + state->buflen, in, inlen);
state->buflen += inlen;
}
void blake2s_final(struct blake2s_state *state, u8 *out)
{
WARN_ON(IS_ENABLED(DEBUG) && !out);
blake2s_set_lastblock(state);
memset(state->buf + state->buflen, 0,
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
blake2s_compress(state, state->buf, 1, state->buflen);
cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
memcpy(out, state->h, state->outlen);
memzero_explicit(state, sizeof(*state));
}
void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
const size_t inlen, const size_t keylen)
{
struct blake2s_state state;
u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
int i;
if (keylen > BLAKE2S_BLOCK_SIZE) {
blake2s_init(&state, BLAKE2S_HASH_SIZE);
blake2s_update(&state, key, keylen);
blake2s_final(&state, x_key);
} else
memcpy(x_key, key, keylen);
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
x_key[i] ^= 0x36;
blake2s_init(&state, BLAKE2S_HASH_SIZE);
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
blake2s_update(&state, in, inlen);
blake2s_final(&state, i_hash);
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
x_key[i] ^= 0x5c ^ 0x36;
blake2s_init(&state, BLAKE2S_HASH_SIZE);
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
blake2s_final(&state, i_hash);
memcpy(out, i_hash, outlen);
memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
}
#include "../selftest/blake2s.c"
static bool nosimd __initdata = false;
#ifndef COMPAT_ZINC_IS_A_MODULE
int __init blake2s_mod_init(void)
#else
static int __init mod_init(void)
#endif
{
if (!nosimd)
blake2s_fpu_init();
if (!selftest_run("blake2s", blake2s_selftest, blake2s_nobs,
ARRAY_SIZE(blake2s_nobs)))
return -ENOTRECOVERABLE;
return 0;
}
#ifdef COMPAT_ZINC_IS_A_MODULE
static void __exit mod_exit(void)
{
}
module_param(nosimd, bool, 0);
module_init(mod_init);
module_exit(mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("BLAKE2s hash function");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
#endif

View File

@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#if defined(CONFIG_ZINC_ARCH_ARM)
#include <asm/system_info.h>
#include <asm/cputype.h>
#endif
asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void hchacha20_arm(const u32 state[16], u32 out[8]);
asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
static bool chacha20_use_neon __ro_after_init;
static bool *const chacha20_nobs[] __initconst = { &chacha20_use_neon };
static void __init chacha20_fpu_init(void)
{
#if defined(CONFIG_ZINC_ARCH_ARM64)
chacha20_use_neon = cpu_have_named_feature(ASIMD);
#elif defined(CONFIG_ZINC_ARCH_ARM)
switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
case ARM_CPU_PART_CORTEX_A5:
/* The Cortex-A7 and Cortex-A5 do not perform well with the NEON
* implementation but do incredibly with the scalar one and use
* less power.
*/
break;
default:
chacha20_use_neon = elf_hwcap & HWCAP_NEON;
}
#endif
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
for (;;) {
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon &&
len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
chacha20_neon(dst, src, bytes, ctx->key, ctx->counter);
ctx->counter[0] += (bytes + 63) / 64;
len -= bytes;
if (!len)
break;
dst += bytes;
src += bytes;
simd_relax(simd_context);
} else {
chacha20_arm(dst, src, len, ctx->key, ctx->counter);
ctx->counter[0] += (len + 63) / 64;
break;
}
}
return true;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM)) {
u32 x[] = { CHACHA20_CONSTANT_EXPA,
CHACHA20_CONSTANT_ND_3,
CHACHA20_CONSTANT_2_BY,
CHACHA20_CONSTANT_TE_K,
get_unaligned_le32(key + 0),
get_unaligned_le32(key + 4),
get_unaligned_le32(key + 8),
get_unaligned_le32(key + 12),
get_unaligned_le32(key + 16),
get_unaligned_le32(key + 20),
get_unaligned_le32(key + 24),
get_unaligned_le32(key + 28),
get_unaligned_le32(nonce + 0),
get_unaligned_le32(nonce + 4),
get_unaligned_le32(nonce + 8),
get_unaligned_le32(nonce + 12)
};
hchacha20_arm(x, derived_key);
return true;
}
return false;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,27 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
asmlinkage void chacha20_mips(u32 state[16], u8 *out, const u8 *in,
const size_t len);
static bool *const chacha20_nobs[] __initconst = { };
static void __init chacha20_fpu_init(void)
{
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
chacha20_mips(ctx->state, dst, src, len);
return true;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
return false;
}

View File

@ -0,0 +1,424 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#define MASK_U32 0x3c
#define CHACHA20_BLOCK_SIZE 64
#define STACK_SIZE 32
#define X0 $t0
#define X1 $t1
#define X2 $t2
#define X3 $t3
#define X4 $t4
#define X5 $t5
#define X6 $t6
#define X7 $t7
#define X8 $t8
#define X9 $t9
#define X10 $v1
#define X11 $s6
#define X12 $s5
#define X13 $s4
#define X14 $s3
#define X15 $s2
/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
#define T0 $s1
#define T1 $s0
#define T(n) T ## n
#define X(n) X ## n
/* Input arguments */
#define STATE $a0
#define OUT $a1
#define IN $a2
#define BYTES $a3
/* Output argument */
/* NONCE[0] is kept in a register and not in memory.
* We don't want to touch original value in memory.
* Must be incremented every loop iteration.
*/
#define NONCE_0 $v0
/* SAVED_X and SAVED_CA are set in the jump table.
* Use regs which are overwritten on exit else we don't leak clear data.
* They are used to handling the last bytes which are not multiple of 4.
*/
#define SAVED_X X15
#define SAVED_CA $s7
#define IS_UNALIGNED $s7
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
#define LSB 3
#define ROTx rotl
#define ROTR(n) rotr n, 24
#define CPU_TO_LE32(n) \
wsbh n; \
rotr n, 16;
#else
#define MSB 3
#define LSB 0
#define ROTx rotr
#define CPU_TO_LE32(n)
#define ROTR(n)
#endif
#define FOR_EACH_WORD(x) \
x( 0); \
x( 1); \
x( 2); \
x( 3); \
x( 4); \
x( 5); \
x( 6); \
x( 7); \
x( 8); \
x( 9); \
x(10); \
x(11); \
x(12); \
x(13); \
x(14); \
x(15);
#define FOR_EACH_WORD_REV(x) \
x(15); \
x(14); \
x(13); \
x(12); \
x(11); \
x(10); \
x( 9); \
x( 8); \
x( 7); \
x( 6); \
x( 5); \
x( 4); \
x( 3); \
x( 2); \
x( 1); \
x( 0);
#define PLUS_ONE_0 1
#define PLUS_ONE_1 2
#define PLUS_ONE_2 3
#define PLUS_ONE_3 4
#define PLUS_ONE_4 5
#define PLUS_ONE_5 6
#define PLUS_ONE_6 7
#define PLUS_ONE_7 8
#define PLUS_ONE_8 9
#define PLUS_ONE_9 10
#define PLUS_ONE_10 11
#define PLUS_ONE_11 12
#define PLUS_ONE_12 13
#define PLUS_ONE_13 14
#define PLUS_ONE_14 15
#define PLUS_ONE_15 16
#define PLUS_ONE(x) PLUS_ONE_ ## x
#define _CONCAT3(a,b,c) a ## b ## c
#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
#define STORE_UNALIGNED(x) \
CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
lwl T1, (x*4)+MSB ## (IN); \
lwr T1, (x*4)+LSB ## (IN); \
.if (x == 12); \
addu X ## x, NONCE_0; \
.else; \
addu X ## x, T0; \
.endif; \
CPU_TO_LE32(X ## x); \
xor X ## x, T1; \
swl X ## x, (x*4)+MSB ## (OUT); \
swr X ## x, (x*4)+LSB ## (OUT);
#define STORE_ALIGNED(x) \
CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
.if (x != 12); \
lw T0, (x*4)(STATE); \
.endif; \
lw T1, (x*4) ## (IN); \
.if (x == 12); \
addu X ## x, NONCE_0; \
.else; \
addu X ## x, T0; \
.endif; \
CPU_TO_LE32(X ## x); \
xor X ## x, T1; \
sw X ## x, (x*4) ## (OUT);
/* Jump table macro.
* Used for setup and handling the last bytes, which are not multiple of 4.
* X15 is free to store Xn
* Every jumptable entry must be equal in size.
*/
#define JMPTBL_ALIGNED(x) \
.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
.set noreorder; \
b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
addu SAVED_X, X ## x, SAVED_CA; \
.endif; \
.set reorder
#define JMPTBL_UNALIGNED(x) \
.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
.set noreorder; \
b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
.if (x == 12); \
addu SAVED_X, X ## x, NONCE_0; \
.else; \
addu SAVED_X, X ## x, SAVED_CA; \
.endif; \
.set reorder
#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
addu X(A), X(K); \
addu X(B), X(L); \
addu X(C), X(M); \
addu X(D), X(N); \
xor X(V), X(A); \
xor X(W), X(B); \
xor X(Y), X(C); \
xor X(Z), X(D); \
rotl X(V), S; \
rotl X(W), S; \
rotl X(Y), S; \
rotl X(Z), S;
.text
.set reorder
.set noat
.globl chacha20_mips
.ent chacha20_mips
chacha20_mips:
.frame $sp, STACK_SIZE, $ra
addiu $sp, -STACK_SIZE
/* Return bytes = 0. */
beqz BYTES, .Lchacha20_mips_end
lw NONCE_0, 48(STATE)
/* Save s0-s7 */
sw $s0, 0($sp)
sw $s1, 4($sp)
sw $s2, 8($sp)
sw $s3, 12($sp)
sw $s4, 16($sp)
sw $s5, 20($sp)
sw $s6, 24($sp)
sw $s7, 28($sp)
/* Test IN or OUT is unaligned.
* IS_UNALIGNED = ( IN | OUT ) & 0x00000003
*/
or IS_UNALIGNED, IN, OUT
andi IS_UNALIGNED, 0x3
/* Set number of rounds */
li $at, 20
b .Lchacha20_rounds_start
.align 4
.Loop_chacha20_rounds:
addiu IN, CHACHA20_BLOCK_SIZE
addiu OUT, CHACHA20_BLOCK_SIZE
addiu NONCE_0, 1
.Lchacha20_rounds_start:
lw X0, 0(STATE)
lw X1, 4(STATE)
lw X2, 8(STATE)
lw X3, 12(STATE)
lw X4, 16(STATE)
lw X5, 20(STATE)
lw X6, 24(STATE)
lw X7, 28(STATE)
lw X8, 32(STATE)
lw X9, 36(STATE)
lw X10, 40(STATE)
lw X11, 44(STATE)
move X12, NONCE_0
lw X13, 52(STATE)
lw X14, 56(STATE)
lw X15, 60(STATE)
.Loop_chacha20_xor_rounds:
addiu $at, -2
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
bnez $at, .Loop_chacha20_xor_rounds
addiu BYTES, -(CHACHA20_BLOCK_SIZE)
/* Is data src/dst unaligned? Jump */
bnez IS_UNALIGNED, .Loop_chacha20_unaligned
/* Set number rounds here to fill delayslot. */
li $at, 20
/* BYTES < 0, it has no full block. */
bltz BYTES, .Lchacha20_mips_no_full_block_aligned
FOR_EACH_WORD_REV(STORE_ALIGNED)
/* BYTES > 0? Loop again. */
bgtz BYTES, .Loop_chacha20_rounds
/* Place this here to fill delay slot */
addiu NONCE_0, 1
/* BYTES < 0? Handle last bytes */
bltz BYTES, .Lchacha20_mips_xor_bytes
.Lchacha20_mips_xor_done:
/* Restore used registers */
lw $s0, 0($sp)
lw $s1, 4($sp)
lw $s2, 8($sp)
lw $s3, 12($sp)
lw $s4, 16($sp)
lw $s5, 20($sp)
lw $s6, 24($sp)
lw $s7, 28($sp)
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
.Lchacha20_mips_end:
addiu $sp, STACK_SIZE
jr $ra
.Lchacha20_mips_no_full_block_aligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
/* Get number of full WORDS */
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
/* Add offset to STATE */
addu T1, STATE, $at
/* Add lower half jump table addr */
addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
/* Store remaining bytecounter as negative value */
subu BYTES, $at, BYTES
jr T0
/* Jump table */
FOR_EACH_WORD(JMPTBL_ALIGNED)
.Loop_chacha20_unaligned:
/* Set number rounds here to fill delayslot. */
li $at, 20
/* BYTES > 0, it has no full block. */
bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
FOR_EACH_WORD_REV(STORE_UNALIGNED)
/* BYTES > 0? Loop again. */
bgtz BYTES, .Loop_chacha20_rounds
/* Write NONCE_0 back to right location in state */
sw NONCE_0, 48(STATE)
.set noreorder
/* Fall through to byte handling */
bgez BYTES, .Lchacha20_mips_xor_done
.Lchacha20_mips_xor_unaligned_0_b:
.Lchacha20_mips_xor_aligned_0_b:
/* Place this here to fill delay slot */
addiu NONCE_0, 1
.set reorder
.Lchacha20_mips_xor_bytes:
addu IN, $at
addu OUT, $at
/* First byte */
lbu T1, 0(IN)
addiu $at, BYTES, 1
CPU_TO_LE32(SAVED_X)
ROTR(SAVED_X)
xor T1, SAVED_X
sb T1, 0(OUT)
beqz $at, .Lchacha20_mips_xor_done
/* Second byte */
lbu T1, 1(IN)
addiu $at, BYTES, 2
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 1(OUT)
beqz $at, .Lchacha20_mips_xor_done
/* Third byte */
lbu T1, 2(IN)
ROTx SAVED_X, 8
xor T1, SAVED_X
sb T1, 2(OUT)
b .Lchacha20_mips_xor_done
.Lchacha20_mips_no_full_block_unaligned:
/* Restore the offset on BYTES */
addiu BYTES, CHACHA20_BLOCK_SIZE
/* Get number of full WORDS */
andi $at, BYTES, MASK_U32
/* Load upper half of jump table addr */
lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
/* Calculate lower half jump table offset */
ins T0, $at, 1, 6
/* Add offset to STATE */
addu T1, STATE, $at
/* Add lower half jump table addr */
addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
/* Read value from STATE */
lw SAVED_CA, 0(T1)
/* Store remaining bytecounter as negative value */
subu BYTES, $at, BYTES
jr T0
/* Jump table */
FOR_EACH_WORD(JMPTBL_UNALIGNED)
.end chacha20_mips
.set at

View File

@ -0,0 +1,461 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2018 Google, Inc.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
/*
* Design notes:
*
* 16 registers would be needed to hold the state matrix, but only 14 are
* available because 'sp' and 'pc' cannot be used. So we spill the elements
* (x8, x9) to the stack and swap them out with (x10, x11). This adds one
* 'ldrd' and one 'strd' instruction per round.
*
* All rotates are performed using the implicit rotate operand accepted by the
* 'add' and 'eor' instructions. This is faster than using explicit rotate
* instructions. To make this work, we allow the values in the second and last
* rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
* wrong rotation amount. The rotation amount is then fixed up just in time
* when the values are used. 'brot' is the number of bits the values in row 'b'
* need to be rotated right to arrive at the correct values, and 'drot'
* similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
* that they end up as (25, 24) after every round.
*/
// ChaCha state registers
X0 .req r0
X1 .req r1
X2 .req r2
X3 .req r3
X4 .req r4
X5 .req r5
X6 .req r6
X7 .req r7
X8_X10 .req r8 // shared by x8 and x10
X9_X11 .req r9 // shared by x9 and x11
X12 .req r10
X13 .req r11
X14 .req r12
X15 .req r14
.Lexpand_32byte_k:
// "expand 32-byte k"
.word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
#ifdef __thumb2__
# define adrl adr
#endif
.macro __rev out, in, t0, t1, t2
.if __LINUX_ARM_ARCH__ >= 6
rev \out, \in
.else
lsl \t0, \in, #24
and \t1, \in, #0xff00
and \t2, \in, #0xff0000
orr \out, \t0, \in, lsr #24
orr \out, \out, \t1, lsl #8
orr \out, \out, \t2, lsr #8
.endif
.endm
.macro _le32_bswap x, t0, t1, t2
#ifdef __ARMEB__
__rev \x, \x, \t0, \t1, \t2
#endif
.endm
.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
_le32_bswap \a, \t0, \t1, \t2
_le32_bswap \b, \t0, \t1, \t2
_le32_bswap \c, \t0, \t1, \t2
_le32_bswap \d, \t0, \t1, \t2
.endm
.macro __ldrd a, b, src, offset
#if __LINUX_ARM_ARCH__ >= 6
ldrd \a, \b, [\src, #\offset]
#else
ldr \a, [\src, #\offset]
ldr \b, [\src, #\offset + 4]
#endif
.endm
.macro __strd a, b, dst, offset
#if __LINUX_ARM_ARCH__ >= 6
strd \a, \b, [\dst, #\offset]
#else
str \a, [\dst, #\offset]
str \b, [\dst, #\offset + 4]
#endif
.endm
.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
// a += b; d ^= a; d = rol(d, 16);
add \a1, \a1, \b1, ror #brot
add \a2, \a2, \b2, ror #brot
eor \d1, \a1, \d1, ror #drot
eor \d2, \a2, \d2, ror #drot
// drot == 32 - 16 == 16
// c += d; b ^= c; b = rol(b, 12);
add \c1, \c1, \d1, ror #16
add \c2, \c2, \d2, ror #16
eor \b1, \c1, \b1, ror #brot
eor \b2, \c2, \b2, ror #brot
// brot == 32 - 12 == 20
// a += b; d ^= a; d = rol(d, 8);
add \a1, \a1, \b1, ror #20
add \a2, \a2, \b2, ror #20
eor \d1, \a1, \d1, ror #16
eor \d2, \a2, \d2, ror #16
// drot == 32 - 8 == 24
// c += d; b ^= c; b = rol(b, 7);
add \c1, \c1, \d1, ror #24
add \c2, \c2, \d2, ror #24
eor \b1, \c1, \b1, ror #20
eor \b2, \c2, \b2, ror #20
// brot == 32 - 7 == 25
.endm
.macro _doubleround
// column round
// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
_halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
// save (x8, x9); restore (x10, x11)
__strd X8_X10, X9_X11, sp, 0
__ldrd X8_X10, X9_X11, sp, 8
// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
_halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
.set brot, 25
.set drot, 24
// diagonal round
// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
_halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
// save (x10, x11); restore (x8, x9)
__strd X8_X10, X9_X11, sp, 8
__ldrd X8_X10, X9_X11, sp, 0
// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
_halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
.endm
.macro _chacha_permute nrounds
.set brot, 0
.set drot, 0
.rept \nrounds / 2
_doubleround
.endr
.endm
.macro _chacha nrounds
.Lnext_block\@:
// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
// Registers contain x0-x9,x12-x15.
// Do the core ChaCha permutation to update x0-x15.
_chacha_permute \nrounds
add sp, #8
// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
// Registers contain x0-x9,x12-x15.
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
push {X8_X10, X9_X11, X12, X13, X14, X15}
// Load (OUT, IN, LEN).
ldr r14, [sp, #96]
ldr r12, [sp, #100]
ldr r11, [sp, #104]
orr r10, r14, r12
// Use slow path if fewer than 64 bytes remain.
cmp r11, #64
blt .Lxor_slowpath\@
// Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
// ARMv6+, since ldmia and stmia (used below) still require alignment.
tst r10, #3
bne .Lxor_slowpath\@
// Fast path: XOR 64 bytes of aligned data.
// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
// x0-x3
__ldrd r8, r9, sp, 32
__ldrd r10, r11, sp, 40
add X0, X0, r8
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
ldmia r12!, {r8-r11}
eor X0, X0, r8
eor X1, X1, r9
eor X2, X2, r10
eor X3, X3, r11
stmia r14!, {X0-X3}
// x4-x7
__ldrd r8, r9, sp, 48
__ldrd r10, r11, sp, 56
add X4, r8, X4, ror #brot
add X5, r9, X5, ror #brot
ldmia r12!, {X0-X3}
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
eor X4, X4, X0
eor X5, X5, X1
eor X6, X6, X2
eor X7, X7, X3
stmia r14!, {X4-X7}
// x8-x15
pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
__ldrd r8, r9, sp, 32
__ldrd r10, r11, sp, 40
add r0, r0, r8 // x8
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
ldmia r12!, {r8-r11}
eor r0, r0, r8 // x8
eor r1, r1, r9 // x9
eor r6, r6, r10 // x10
eor r7, r7, r11 // x11
stmia r14!, {r0,r1,r6,r7}
ldmia r12!, {r0,r1,r6,r7}
__ldrd r8, r9, sp, 48
__ldrd r10, r11, sp, 56
add r2, r8, r2, ror #drot // x12
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
ldr r9, [sp, #72] // load LEN
eor r2, r2, r0 // x12
eor r3, r3, r1 // x13
eor r4, r4, r6 // x14
eor r5, r5, r7 // x15
subs r9, #64 // decrement and check LEN
stmia r14!, {r2-r5}
beq .Ldone\@
.Lprepare_for_next_block\@:
// Stack: x0-x15 OUT IN LEN
// Increment block counter (x12)
add r8, #1
// Store updated (OUT, IN, LEN)
str r14, [sp, #64]
str r12, [sp, #68]
str r9, [sp, #72]
mov r14, sp
// Store updated block counter (x12)
str r8, [sp, #48]
sub sp, #16
// Reload state and do next block
ldmia r14!, {r0-r11} // load x0-x11
__strd r10, r11, sp, 8 // store x10-x11 before state
ldmia r14, {r10-r12,r14} // load x12-x15
b .Lnext_block\@
.Lxor_slowpath\@:
// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
// We handle it by storing the 64 bytes of keystream to the stack, then
// XOR-ing the needed portion with the data.
// Allocate keystream buffer
sub sp, #64
mov r14, sp
// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
// Save keystream for x0-x3
__ldrd r8, r9, sp, 96
__ldrd r10, r11, sp, 104
add X0, X0, r8
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
stmia r14!, {X0-X3}
// Save keystream for x4-x7
__ldrd r8, r9, sp, 112
__ldrd r10, r11, sp, 120
add X4, r8, X4, ror #brot
add X5, r9, X5, ror #brot
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
add r8, sp, #64
stmia r14!, {X4-X7}
// Save keystream for x8-x15
ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
__ldrd r8, r9, sp, 128
__ldrd r10, r11, sp, 136
add r0, r0, r8 // x8
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
stmia r14!, {r0,r1,r6,r7}
__ldrd r8, r9, sp, 144
__ldrd r10, r11, sp, 152
add r2, r8, r2, ror #drot // x12
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
stmia r14, {r2-r5}
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
// Registers: r8 is block counter, r12 is IN.
ldr r9, [sp, #168] // LEN
ldr r14, [sp, #160] // OUT
cmp r9, #64
mov r0, sp
movle r1, r9
movgt r1, #64
// r1 is number of bytes to XOR, in range [1, 64]
.if __LINUX_ARM_ARCH__ < 6
orr r2, r12, r14
tst r2, #3 // IN or OUT misaligned?
bne .Lxor_next_byte\@
.endif
// XOR a word at a time
.rept 16
subs r1, #4
blt .Lxor_words_done\@
ldr r2, [r12], #4
ldr r3, [r0], #4
eor r2, r2, r3
str r2, [r14], #4
.endr
b .Lxor_slowpath_done\@
.Lxor_words_done\@:
ands r1, r1, #3
beq .Lxor_slowpath_done\@
// XOR a byte at a time
.Lxor_next_byte\@:
ldrb r2, [r12], #1
ldrb r3, [r0], #1
eor r2, r2, r3
strb r2, [r14], #1
subs r1, #1
bne .Lxor_next_byte\@
.Lxor_slowpath_done\@:
subs r9, #64
add sp, #96
bgt .Lprepare_for_next_block\@
.Ldone\@:
.endm // _chacha
/*
* void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
* const u32 iv[4]);
*/
SYM_FUNC_START(chacha20_arm)
cmp r2, #0 // len == 0?
reteq lr
push {r0-r2,r4-r11,lr}
// Push state x0-x15 onto stack.
// Also store an extra copy of x10-x11 just before the state.
ldr r4, [sp, #48] // iv
mov r0, sp
sub sp, #80
// iv: x12-x15
ldm r4, {X12,X13,X14,X15}
stmdb r0!, {X12,X13,X14,X15}
// key: x4-x11
__ldrd X8_X10, X9_X11, r3, 24
__strd X8_X10, X9_X11, sp, 8
stmdb r0!, {X8_X10, X9_X11}
ldm r3, {X4-X9_X11}
stmdb r0!, {X4-X9_X11}
// constants: x0-x3
adrl X3, .Lexpand_32byte_k
ldm X3, {X0-X3}
__strd X0, X1, sp, 16
__strd X2, X3, sp, 24
_chacha 20
add sp, #76
pop {r4-r11, pc}
SYM_FUNC_END(chacha20_arm)
/*
* void hchacha20_arm(const u32 state[16], u32 out[8]);
*/
SYM_FUNC_START(hchacha20_arm)
push {r1,r4-r11,lr}
mov r14, r0
ldmia r14!, {r0-r11} // load x0-x11
push {r10-r11} // store x10-x11 to stack
ldm r14, {r10-r12,r14} // load x12-x15
sub sp, #8
_chacha_permute 20
// Skip over (unused0-unused1, x10-x11)
add sp, #16
// Fix up rotations of x12-x15
ror X12, X12, #drot
ror X13, X13, #drot
pop {r4} // load 'out'
ror X14, X14, #drot
ror X15, X15, #drot
// Store (x0-x3,x12-x15) to 'out'
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
pop {r4-r11,pc}
SYM_FUNC_END(hchacha20_arm)

View File

@ -0,0 +1,105 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <asm/fpu/api.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/intel-family.h>
asmlinkage void hchacha20_ssse3(u32 *derived_key, const u8 *nonce,
const u8 *key);
asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void chacha20_avx2(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void chacha20_avx512(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, const size_t len,
const u32 key[8], const u32 counter[4]);
static bool chacha20_use_ssse3 __ro_after_init;
static bool chacha20_use_avx2 __ro_after_init;
static bool chacha20_use_avx512 __ro_after_init;
static bool chacha20_use_avx512vl __ro_after_init;
static bool *const chacha20_nobs[] __initconst = {
&chacha20_use_ssse3, &chacha20_use_avx2, &chacha20_use_avx512,
&chacha20_use_avx512vl };
static void __init chacha20_fpu_init(void)
{
chacha20_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
chacha20_use_avx2 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifndef COMPAT_CANNOT_USE_AVX512
chacha20_use_avx512 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL) &&
/* Skylake downclocks unacceptably much when using zmm. */
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
chacha20_use_avx512vl =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
boot_cpu_has(X86_FEATURE_AVX512VL) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL);
#endif
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
if (!IS_ENABLED(CONFIG_AS_SSSE3) || !chacha20_use_ssse3 ||
len <= CHACHA20_BLOCK_SIZE || !simd_use(simd_context))
return false;
for (;;) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512 &&
len >= CHACHA20_BLOCK_SIZE * 8)
chacha20_avx512(dst, src, bytes, ctx->key, ctx->counter);
else if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512vl &&
len >= CHACHA20_BLOCK_SIZE * 4)
chacha20_avx512vl(dst, src, bytes, ctx->key, ctx->counter);
else if (IS_ENABLED(CONFIG_AS_AVX2) && chacha20_use_avx2 &&
len >= CHACHA20_BLOCK_SIZE * 4)
chacha20_avx2(dst, src, bytes, ctx->key, ctx->counter);
else
chacha20_ssse3(dst, src, bytes, ctx->key, ctx->counter);
ctx->counter[0] += (bytes + 63) / 64;
len -= bytes;
if (!len)
break;
dst += bytes;
src += bytes;
simd_relax(simd_context);
}
return true;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
if (IS_ENABLED(CONFIG_AS_SSSE3) && chacha20_use_ssse3 &&
simd_use(simd_context)) {
hchacha20_ssse3(derived_key, nonce, key);
return true;
}
return false;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,191 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Implementation of the ChaCha20 stream cipher.
*
* Information: https://cr.yp.to/chacha.html
*/
#include <zinc/chacha20.h>
#include "../selftest/run.h"
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/vmalloc.h>
#include <crypto/algapi.h> // For crypto_xor_cpy.
#if defined(CONFIG_ZINC_ARCH_X86_64)
#include "chacha20-x86_64-glue.c"
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
#include "chacha20-arm-glue.c"
#elif defined(CONFIG_ZINC_ARCH_MIPS)
#include "chacha20-mips-glue.c"
#else
static bool *const chacha20_nobs[] __initconst = { };
static void __init chacha20_fpu_init(void)
{
}
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
const u8 *src, size_t len,
simd_context_t *simd_context)
{
return false;
}
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE],
simd_context_t *simd_context)
{
return false;
}
#endif
#define QUARTER_ROUND(x, a, b, c, d) ( \
x[a] += x[b], \
x[d] = rol32((x[d] ^ x[a]), 16), \
x[c] += x[d], \
x[b] = rol32((x[b] ^ x[c]), 12), \
x[a] += x[b], \
x[d] = rol32((x[d] ^ x[a]), 8), \
x[c] += x[d], \
x[b] = rol32((x[b] ^ x[c]), 7) \
)
#define C(i, j) (i * 4 + j)
#define DOUBLE_ROUND(x) ( \
/* Column Round */ \
QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
/* Diagonal Round */ \
QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
)
#define TWENTY_ROUNDS(x) ( \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x), \
DOUBLE_ROUND(x) \
)
static void chacha20_block_generic(struct chacha20_ctx *ctx, __le32 *stream)
{
u32 x[CHACHA20_BLOCK_WORDS];
int i;
for (i = 0; i < ARRAY_SIZE(x); ++i)
x[i] = ctx->state[i];
TWENTY_ROUNDS(x);
for (i = 0; i < ARRAY_SIZE(x); ++i)
stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
ctx->counter[0] += 1;
}
static void chacha20_generic(struct chacha20_ctx *ctx, u8 *out, const u8 *in,
u32 len)
{
__le32 buf[CHACHA20_BLOCK_WORDS];
while (len >= CHACHA20_BLOCK_SIZE) {
chacha20_block_generic(ctx, buf);
crypto_xor_cpy(out, in, (u8 *)buf, CHACHA20_BLOCK_SIZE);
len -= CHACHA20_BLOCK_SIZE;
out += CHACHA20_BLOCK_SIZE;
in += CHACHA20_BLOCK_SIZE;
}
if (len) {
chacha20_block_generic(ctx, buf);
crypto_xor_cpy(out, in, (u8 *)buf, len);
}
}
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
simd_context_t *simd_context)
{
if (!chacha20_arch(ctx, dst, src, len, simd_context))
chacha20_generic(ctx, dst, src, len);
}
static void hchacha20_generic(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE])
{
u32 x[] = { CHACHA20_CONSTANT_EXPA,
CHACHA20_CONSTANT_ND_3,
CHACHA20_CONSTANT_2_BY,
CHACHA20_CONSTANT_TE_K,
get_unaligned_le32(key + 0),
get_unaligned_le32(key + 4),
get_unaligned_le32(key + 8),
get_unaligned_le32(key + 12),
get_unaligned_le32(key + 16),
get_unaligned_le32(key + 20),
get_unaligned_le32(key + 24),
get_unaligned_le32(key + 28),
get_unaligned_le32(nonce + 0),
get_unaligned_le32(nonce + 4),
get_unaligned_le32(nonce + 8),
get_unaligned_le32(nonce + 12)
};
TWENTY_ROUNDS(x);
memcpy(derived_key + 0, x + 0, sizeof(u32) * 4);
memcpy(derived_key + 4, x + 12, sizeof(u32) * 4);
}
/* Derived key should be 32-bit aligned */
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
const u8 nonce[HCHACHA20_NONCE_SIZE],
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context)
{
if (!hchacha20_arch(derived_key, nonce, key, simd_context))
hchacha20_generic(derived_key, nonce, key);
}
#include "../selftest/chacha20.c"
static bool nosimd __initdata = false;
#ifndef COMPAT_ZINC_IS_A_MODULE
int __init chacha20_mod_init(void)
#else
static int __init mod_init(void)
#endif
{
if (!nosimd)
chacha20_fpu_init();
if (!selftest_run("chacha20", chacha20_selftest, chacha20_nobs,
ARRAY_SIZE(chacha20_nobs)))
return -ENOTRECOVERABLE;
return 0;
}
#ifdef COMPAT_ZINC_IS_A_MODULE
static void __exit mod_exit(void)
{
}
module_param(nosimd, bool, 0);
module_init(mod_init);
module_exit(mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("ChaCha20 stream cipher");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
#endif

View File

@ -0,0 +1,398 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is an implementation of the ChaCha20Poly1305 AEAD construction.
*
* Information: https://tools.ietf.org/html/rfc8439
*/
#include <zinc/chacha20poly1305.h>
#include <zinc/chacha20.h>
#include <zinc/poly1305.h>
#include "selftest/run.h"
#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <crypto/scatterwalk.h> // For blkcipher_walk.
static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 };
static inline void
__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len, const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE],
simd_context_t *simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
union {
u8 block0[POLY1305_KEY_SIZE];
__le64 lens[2];
} b = { { 0 } };
chacha20_init(&chacha20_state, key, nonce);
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
simd_context);
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
simd_context);
chacha20(&chacha20_state, dst, src, src_len, simd_context);
poly1305_update(&poly1305_state, dst, src_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(src_len);
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
simd_context);
poly1305_final(&poly1305_state, dst + src_len, simd_context);
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
}
void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
simd_context_t simd_context;
simd_get(&simd_context);
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key,
&simd_context);
simd_put(&simd_context);
}
bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src,
const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE],
simd_context_t *simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
struct sg_mapping_iter miter;
size_t partial = 0;
ssize_t sl;
union {
u8 chacha20_stream[CHACHA20_BLOCK_SIZE];
u8 block0[POLY1305_KEY_SIZE];
u8 mac[POLY1305_MAC_SIZE];
__le64 lens[2];
} b __aligned(16) = { { 0 } };
if (WARN_ON(src_len > INT_MAX))
return false;
chacha20_init(&chacha20_state, key, nonce);
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
simd_context);
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
simd_context);
sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC);
for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
u8 *addr = miter.addr;
size_t length = min_t(size_t, sl, miter.length);
if (unlikely(partial)) {
size_t l = min(length, CHACHA20_BLOCK_SIZE - partial);
crypto_xor(addr, b.chacha20_stream + partial, l);
partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1);
addr += l;
length -= l;
}
if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) {
size_t l = length;
if (unlikely(length < sl))
l &= ~(CHACHA20_BLOCK_SIZE - 1);
chacha20(&chacha20_state, addr, addr, l, simd_context);
addr += l;
length -= l;
}
if (unlikely(length > 0)) {
chacha20(&chacha20_state, b.chacha20_stream, pad0,
CHACHA20_BLOCK_SIZE, simd_context);
crypto_xor(addr, b.chacha20_stream, length);
partial = length;
}
poly1305_update(&poly1305_state, miter.addr,
min_t(size_t, sl, miter.length), simd_context);
simd_relax(simd_context);
}
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(src_len);
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
simd_context);
if (likely(sl <= -POLY1305_MAC_SIZE))
poly1305_final(&poly1305_state, miter.addr + miter.length + sl,
simd_context);
sg_miter_stop(&miter);
if (unlikely(sl > -POLY1305_MAC_SIZE)) {
poly1305_final(&poly1305_state, b.mac, simd_context);
scatterwalk_map_and_copy(b.mac, src, src_len, sizeof(b.mac), 1);
}
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
return true;
}
static inline bool
__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len, const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE],
simd_context_t *simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
int ret;
size_t dst_len;
union {
u8 block0[POLY1305_KEY_SIZE];
u8 mac[POLY1305_MAC_SIZE];
__le64 lens[2];
} b = { { 0 } };
if (unlikely(src_len < POLY1305_MAC_SIZE))
return false;
chacha20_init(&chacha20_state, key, nonce);
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
simd_context);
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
simd_context);
dst_len = src_len - POLY1305_MAC_SIZE;
poly1305_update(&poly1305_state, src, dst_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf,
simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(dst_len);
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
simd_context);
poly1305_final(&poly1305_state, b.mac, simd_context);
ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE);
if (likely(!ret))
chacha20(&chacha20_state, dst, src, dst_len, simd_context);
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
return !ret;
}
bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
simd_context_t simd_context, ret;
simd_get(&simd_context);
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce,
key, &simd_context);
simd_put(&simd_context);
return ret;
}
bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src,
size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
const u8 key[CHACHA20POLY1305_KEY_SIZE],
simd_context_t *simd_context)
{
struct poly1305_ctx poly1305_state;
struct chacha20_ctx chacha20_state;
struct sg_mapping_iter miter;
size_t partial = 0;
ssize_t sl;
union {
u8 chacha20_stream[CHACHA20_BLOCK_SIZE];
u8 block0[POLY1305_KEY_SIZE];
struct {
u8 read_mac[POLY1305_MAC_SIZE];
u8 computed_mac[POLY1305_MAC_SIZE];
};
__le64 lens[2];
} b __aligned(16) = { { 0 } };
bool ret = false;
if (unlikely(src_len < POLY1305_MAC_SIZE || WARN_ON(src_len > INT_MAX)))
return ret;
src_len -= POLY1305_MAC_SIZE;
chacha20_init(&chacha20_state, key, nonce);
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
simd_context);
poly1305_init(&poly1305_state, b.block0);
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
simd_context);
sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC);
for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
u8 *addr = miter.addr;
size_t length = min_t(size_t, sl, miter.length);
poly1305_update(&poly1305_state, addr, length, simd_context);
if (unlikely(partial)) {
size_t l = min(length, CHACHA20_BLOCK_SIZE - partial);
crypto_xor(addr, b.chacha20_stream + partial, l);
partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1);
addr += l;
length -= l;
}
if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) {
size_t l = length;
if (unlikely(length < sl))
l &= ~(CHACHA20_BLOCK_SIZE - 1);
chacha20(&chacha20_state, addr, addr, l, simd_context);
addr += l;
length -= l;
}
if (unlikely(length > 0)) {
chacha20(&chacha20_state, b.chacha20_stream, pad0,
CHACHA20_BLOCK_SIZE, simd_context);
crypto_xor(addr, b.chacha20_stream, length);
partial = length;
}
simd_relax(simd_context);
}
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
simd_context);
b.lens[0] = cpu_to_le64(ad_len);
b.lens[1] = cpu_to_le64(src_len);
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
simd_context);
if (likely(sl <= -POLY1305_MAC_SIZE)) {
poly1305_final(&poly1305_state, b.computed_mac, simd_context);
ret = !crypto_memneq(b.computed_mac,
miter.addr + miter.length + sl,
POLY1305_MAC_SIZE);
}
sg_miter_stop(&miter);
if (unlikely(sl > -POLY1305_MAC_SIZE)) {
poly1305_final(&poly1305_state, b.computed_mac, simd_context);
scatterwalk_map_and_copy(b.read_mac, src, src_len,
sizeof(b.read_mac), 0);
ret = !crypto_memneq(b.read_mac, b.computed_mac,
POLY1305_MAC_SIZE);
}
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
memzero_explicit(&b, sizeof(b));
return ret;
}
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
simd_context_t simd_context;
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
simd_get(&simd_context);
hchacha20(derived_key, nonce, key, &simd_context);
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
get_unaligned_le64(nonce + 16),
(u8 *)derived_key, &simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
simd_put(&simd_context);
}
bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
const u8 key[CHACHA20POLY1305_KEY_SIZE])
{
bool ret;
simd_context_t simd_context;
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
simd_get(&simd_context);
hchacha20(derived_key, nonce, key, &simd_context);
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
get_unaligned_le64(nonce + 16),
(u8 *)derived_key, &simd_context);
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
simd_put(&simd_context);
return ret;
}
#include "selftest/chacha20poly1305.c"
#ifndef COMPAT_ZINC_IS_A_MODULE
int __init chacha20poly1305_mod_init(void)
#else
static int __init mod_init(void)
#endif
{
if (!selftest_run("chacha20poly1305", chacha20poly1305_selftest,
NULL, 0))
return -ENOTRECOVERABLE;
return 0;
}
#ifdef COMPAT_ZINC_IS_A_MODULE
static void __exit mod_exit(void)
{
}
module_init(mod_init);
module_exit(mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
#endif

View File

@ -0,0 +1,43 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <linux/simd.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE]);
static bool curve25519_use_neon __ro_after_init;
static bool *const curve25519_nobs[] __initconst = { &curve25519_use_neon };
static void __init curve25519_fpu_init(void)
{
curve25519_use_neon = elf_hwcap & HWCAP_NEON;
}
static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
simd_context_t simd_context;
bool used_arch = false;
simd_get(&simd_context);
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
!IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && curve25519_use_neon &&
simd_use(&simd_context)) {
curve25519_neon(mypublic, secret, basepoint);
used_arch = true;
}
simd_put(&simd_context);
return used_arch;
}
static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE])
{
return false;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,860 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2016 The fiat-crypto Authors.
* Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is a machine-generated formally verified implementation of Curve25519
* ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
* machine generated, it has been tweaked to be suitable for use in the kernel.
* It is optimized for 32-bit machines and machines that cannot work efficiently
* with 128-bit integer types.
*/
/* fe means field element. Here the field is \Z/(2^255-19). An element t,
* entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
* t[3]+2^102 t[4]+...+2^230 t[9].
* fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
* Multiplication and carrying produce fe from fe_loose.
*/
typedef struct fe { u32 v[10]; } fe;
/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
* Addition and subtraction produce fe_loose from (fe, fe).
*/
typedef struct fe_loose { u32 v[10]; } fe_loose;
static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
{
/* Ignores top bit of s. */
u32 a0 = get_unaligned_le32(s);
u32 a1 = get_unaligned_le32(s+4);
u32 a2 = get_unaligned_le32(s+8);
u32 a3 = get_unaligned_le32(s+12);
u32 a4 = get_unaligned_le32(s+16);
u32 a5 = get_unaligned_le32(s+20);
u32 a6 = get_unaligned_le32(s+24);
u32 a7 = get_unaligned_le32(s+28);
h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
h[4] = (a3>> 6); /* (32- 6) = 26 */
h[5] = a4&((1<<25)-1); /* 25 */
h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
h[9] = (a7>> 6)&((1<<25)-1); /* 25 */
}
static __always_inline void fe_frombytes(fe *h, const u8 *s)
{
fe_frombytes_impl(h->v, s);
}
static __always_inline u8 /*bool*/
addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 25 bits of result and 1 bit of carry
* (26 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a + b + c;
*low = x & ((1 << 25) - 1);
return (x >> 25) & 1;
}
static __always_inline u8 /*bool*/
addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 26 bits of result and 1 bit of carry
* (27 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a + b + c;
*low = x & ((1 << 26) - 1);
return (x >> 26) & 1;
}
static __always_inline u8 /*bool*/
subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 25 bits of result and 1 bit of borrow
* (26 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a - b - c;
*low = x & ((1 << 25) - 1);
return x >> 31;
}
static __always_inline u8 /*bool*/
subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
{
/* This function extracts 26 bits of result and 1 bit of borrow
*(27 total), so a 32-bit intermediate is sufficient.
*/
u32 x = a - b - c;
*low = x & ((1 << 26) - 1);
return x >> 31;
}
static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
{
t = -!!t; /* all set if nonzero, 0 if 0 */
return (t&nz) | ((~t)&z);
}
static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
{
{ const u32 x17 = in1[9];
{ const u32 x18 = in1[8];
{ const u32 x16 = in1[7];
{ const u32 x14 = in1[6];
{ const u32 x12 = in1[5];
{ const u32 x10 = in1[4];
{ const u32 x8 = in1[3];
{ const u32 x6 = in1[2];
{ const u32 x4 = in1[1];
{ const u32 x2 = in1[0];
{ u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
{ u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
{ u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
{ u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
{ u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
{ u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
{ u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
{ u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
{ u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
{ u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
{ u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
{ u32 x50 = (x49 & 0x3ffffed);
{ u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
{ u32 x54 = (x49 & 0x1ffffff);
{ u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
{ u32 x58 = (x49 & 0x3ffffff);
{ u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
{ u32 x62 = (x49 & 0x1ffffff);
{ u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
{ u32 x66 = (x49 & 0x3ffffff);
{ u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
{ u32 x70 = (x49 & 0x1ffffff);
{ u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
{ u32 x74 = (x49 & 0x3ffffff);
{ u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
{ u32 x78 = (x49 & 0x1ffffff);
{ u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
{ u32 x82 = (x49 & 0x3ffffff);
{ u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
{ u32 x86 = (x49 & 0x1ffffff);
{ u32 x88; addcarryx_u25(x85, x47, x86, &x88);
out[0] = x52;
out[1] = x56;
out[2] = x60;
out[3] = x64;
out[4] = x68;
out[5] = x72;
out[6] = x76;
out[7] = x80;
out[8] = x84;
out[9] = x88;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_tobytes(u8 s[32], const fe *f)
{
u32 h[10];
fe_freeze(h, f->v);
s[0] = h[0] >> 0;
s[1] = h[0] >> 8;
s[2] = h[0] >> 16;
s[3] = (h[0] >> 24) | (h[1] << 2);
s[4] = h[1] >> 6;
s[5] = h[1] >> 14;
s[6] = (h[1] >> 22) | (h[2] << 3);
s[7] = h[2] >> 5;
s[8] = h[2] >> 13;
s[9] = (h[2] >> 21) | (h[3] << 5);
s[10] = h[3] >> 3;
s[11] = h[3] >> 11;
s[12] = (h[3] >> 19) | (h[4] << 6);
s[13] = h[4] >> 2;
s[14] = h[4] >> 10;
s[15] = h[4] >> 18;
s[16] = h[5] >> 0;
s[17] = h[5] >> 8;
s[18] = h[5] >> 16;
s[19] = (h[5] >> 24) | (h[6] << 1);
s[20] = h[6] >> 7;
s[21] = h[6] >> 15;
s[22] = (h[6] >> 23) | (h[7] << 3);
s[23] = h[7] >> 5;
s[24] = h[7] >> 13;
s[25] = (h[7] >> 21) | (h[8] << 4);
s[26] = h[8] >> 4;
s[27] = h[8] >> 12;
s[28] = (h[8] >> 20) | (h[9] << 6);
s[29] = h[9] >> 2;
s[30] = h[9] >> 10;
s[31] = h[9] >> 18;
}
/* h = f */
static __always_inline void fe_copy(fe *h, const fe *f)
{
memmove(h, f, sizeof(u32) * 10);
}
static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
{
memmove(h, f, sizeof(u32) * 10);
}
/* h = 0 */
static __always_inline void fe_0(fe *h)
{
memset(h, 0, sizeof(u32) * 10);
}
/* h = 1 */
static __always_inline void fe_1(fe *h)
{
memset(h, 0, sizeof(u32) * 10);
h->v[0] = 1;
}
static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = in2[9];
{ const u32 x39 = in2[8];
{ const u32 x37 = in2[7];
{ const u32 x35 = in2[6];
{ const u32 x33 = in2[5];
{ const u32 x31 = in2[4];
{ const u32 x29 = in2[3];
{ const u32 x27 = in2[2];
{ const u32 x25 = in2[1];
{ const u32 x23 = in2[0];
out[0] = (x5 + x23);
out[1] = (x7 + x25);
out[2] = (x9 + x27);
out[3] = (x11 + x29);
out[4] = (x13 + x31);
out[5] = (x15 + x33);
out[6] = (x17 + x35);
out[7] = (x19 + x37);
out[8] = (x21 + x39);
out[9] = (x20 + x38);
}}}}}}}}}}}}}}}}}}}}
}
/* h = f + g
* Can overlap h with f or g.
*/
static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
{
fe_add_impl(h->v, f->v, g->v);
}
static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = in2[9];
{ const u32 x39 = in2[8];
{ const u32 x37 = in2[7];
{ const u32 x35 = in2[6];
{ const u32 x33 = in2[5];
{ const u32 x31 = in2[4];
{ const u32 x29 = in2[3];
{ const u32 x27 = in2[2];
{ const u32 x25 = in2[1];
{ const u32 x23 = in2[0];
out[0] = ((0x7ffffda + x5) - x23);
out[1] = ((0x3fffffe + x7) - x25);
out[2] = ((0x7fffffe + x9) - x27);
out[3] = ((0x3fffffe + x11) - x29);
out[4] = ((0x7fffffe + x13) - x31);
out[5] = ((0x3fffffe + x15) - x33);
out[6] = ((0x7fffffe + x17) - x35);
out[7] = ((0x3fffffe + x19) - x37);
out[8] = ((0x7fffffe + x21) - x39);
out[9] = ((0x3fffffe + x20) - x38);
}}}}}}}}}}}}}}}}}}}}
}
/* h = f - g
* Can overlap h with f or g.
*/
static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
{
fe_sub_impl(h->v, f->v, g->v);
}
static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = in2[9];
{ const u32 x39 = in2[8];
{ const u32 x37 = in2[7];
{ const u32 x35 = in2[6];
{ const u32 x33 = in2[5];
{ const u32 x31 = in2[4];
{ const u32 x29 = in2[3];
{ const u32 x27 = in2[2];
{ const u32 x25 = in2[1];
{ const u32 x23 = in2[0];
{ u64 x40 = ((u64)x23 * x5);
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
{ u64 x58 = ((u64)(0x2 * x38) * x20);
{ u64 x59 = (x48 + (x58 << 0x4));
{ u64 x60 = (x59 + (x58 << 0x1));
{ u64 x61 = (x60 + x58);
{ u64 x62 = (x47 + (x57 << 0x4));
{ u64 x63 = (x62 + (x57 << 0x1));
{ u64 x64 = (x63 + x57);
{ u64 x65 = (x46 + (x56 << 0x4));
{ u64 x66 = (x65 + (x56 << 0x1));
{ u64 x67 = (x66 + x56);
{ u64 x68 = (x45 + (x55 << 0x4));
{ u64 x69 = (x68 + (x55 << 0x1));
{ u64 x70 = (x69 + x55);
{ u64 x71 = (x44 + (x54 << 0x4));
{ u64 x72 = (x71 + (x54 << 0x1));
{ u64 x73 = (x72 + x54);
{ u64 x74 = (x43 + (x53 << 0x4));
{ u64 x75 = (x74 + (x53 << 0x1));
{ u64 x76 = (x75 + x53);
{ u64 x77 = (x42 + (x52 << 0x4));
{ u64 x78 = (x77 + (x52 << 0x1));
{ u64 x79 = (x78 + x52);
{ u64 x80 = (x41 + (x51 << 0x4));
{ u64 x81 = (x80 + (x51 << 0x1));
{ u64 x82 = (x81 + x51);
{ u64 x83 = (x40 + (x50 << 0x4));
{ u64 x84 = (x83 + (x50 << 0x1));
{ u64 x85 = (x84 + x50);
{ u64 x86 = (x85 >> 0x1a);
{ u32 x87 = ((u32)x85 & 0x3ffffff);
{ u64 x88 = (x86 + x82);
{ u64 x89 = (x88 >> 0x19);
{ u32 x90 = ((u32)x88 & 0x1ffffff);
{ u64 x91 = (x89 + x79);
{ u64 x92 = (x91 >> 0x1a);
{ u32 x93 = ((u32)x91 & 0x3ffffff);
{ u64 x94 = (x92 + x76);
{ u64 x95 = (x94 >> 0x19);
{ u32 x96 = ((u32)x94 & 0x1ffffff);
{ u64 x97 = (x95 + x73);
{ u64 x98 = (x97 >> 0x1a);
{ u32 x99 = ((u32)x97 & 0x3ffffff);
{ u64 x100 = (x98 + x70);
{ u64 x101 = (x100 >> 0x19);
{ u32 x102 = ((u32)x100 & 0x1ffffff);
{ u64 x103 = (x101 + x67);
{ u64 x104 = (x103 >> 0x1a);
{ u32 x105 = ((u32)x103 & 0x3ffffff);
{ u64 x106 = (x104 + x64);
{ u64 x107 = (x106 >> 0x19);
{ u32 x108 = ((u32)x106 & 0x1ffffff);
{ u64 x109 = (x107 + x61);
{ u64 x110 = (x109 >> 0x1a);
{ u32 x111 = ((u32)x109 & 0x3ffffff);
{ u64 x112 = (x110 + x49);
{ u64 x113 = (x112 >> 0x19);
{ u32 x114 = ((u32)x112 & 0x1ffffff);
{ u64 x115 = (x87 + (0x13 * x113));
{ u32 x116 = (u32) (x115 >> 0x1a);
{ u32 x117 = ((u32)x115 & 0x3ffffff);
{ u32 x118 = (x116 + x90);
{ u32 x119 = (x118 >> 0x19);
{ u32 x120 = (x118 & 0x1ffffff);
out[0] = x117;
out[1] = x120;
out[2] = (x119 + x93);
out[3] = x96;
out[4] = x99;
out[5] = x102;
out[6] = x105;
out[7] = x108;
out[8] = x111;
out[9] = x114;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
{
fe_mul_impl(h->v, f->v, g->v);
}
static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
{
fe_mul_impl(h->v, f->v, g->v);
}
static __always_inline void
fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
{
fe_mul_impl(h->v, f->v, g->v);
}
static void fe_sqr_impl(u32 out[10], const u32 in1[10])
{
{ const u32 x17 = in1[9];
{ const u32 x18 = in1[8];
{ const u32 x16 = in1[7];
{ const u32 x14 = in1[6];
{ const u32 x12 = in1[5];
{ const u32 x10 = in1[4];
{ const u32 x8 = in1[3];
{ const u32 x6 = in1[2];
{ const u32 x4 = in1[1];
{ const u32 x2 = in1[0];
{ u64 x19 = ((u64)x2 * x2);
{ u64 x20 = ((u64)(0x2 * x2) * x4);
{ u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
{ u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
{ u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
{ u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
{ u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
{ u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
{ u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
{ u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
{ u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
{ u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
{ u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
{ u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
{ u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
{ u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
{ u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
{ u64 x36 = ((u64)(0x2 * x18) * x17);
{ u64 x37 = ((u64)(0x2 * x17) * x17);
{ u64 x38 = (x27 + (x37 << 0x4));
{ u64 x39 = (x38 + (x37 << 0x1));
{ u64 x40 = (x39 + x37);
{ u64 x41 = (x26 + (x36 << 0x4));
{ u64 x42 = (x41 + (x36 << 0x1));
{ u64 x43 = (x42 + x36);
{ u64 x44 = (x25 + (x35 << 0x4));
{ u64 x45 = (x44 + (x35 << 0x1));
{ u64 x46 = (x45 + x35);
{ u64 x47 = (x24 + (x34 << 0x4));
{ u64 x48 = (x47 + (x34 << 0x1));
{ u64 x49 = (x48 + x34);
{ u64 x50 = (x23 + (x33 << 0x4));
{ u64 x51 = (x50 + (x33 << 0x1));
{ u64 x52 = (x51 + x33);
{ u64 x53 = (x22 + (x32 << 0x4));
{ u64 x54 = (x53 + (x32 << 0x1));
{ u64 x55 = (x54 + x32);
{ u64 x56 = (x21 + (x31 << 0x4));
{ u64 x57 = (x56 + (x31 << 0x1));
{ u64 x58 = (x57 + x31);
{ u64 x59 = (x20 + (x30 << 0x4));
{ u64 x60 = (x59 + (x30 << 0x1));
{ u64 x61 = (x60 + x30);
{ u64 x62 = (x19 + (x29 << 0x4));
{ u64 x63 = (x62 + (x29 << 0x1));
{ u64 x64 = (x63 + x29);
{ u64 x65 = (x64 >> 0x1a);
{ u32 x66 = ((u32)x64 & 0x3ffffff);
{ u64 x67 = (x65 + x61);
{ u64 x68 = (x67 >> 0x19);
{ u32 x69 = ((u32)x67 & 0x1ffffff);
{ u64 x70 = (x68 + x58);
{ u64 x71 = (x70 >> 0x1a);
{ u32 x72 = ((u32)x70 & 0x3ffffff);
{ u64 x73 = (x71 + x55);
{ u64 x74 = (x73 >> 0x19);
{ u32 x75 = ((u32)x73 & 0x1ffffff);
{ u64 x76 = (x74 + x52);
{ u64 x77 = (x76 >> 0x1a);
{ u32 x78 = ((u32)x76 & 0x3ffffff);
{ u64 x79 = (x77 + x49);
{ u64 x80 = (x79 >> 0x19);
{ u32 x81 = ((u32)x79 & 0x1ffffff);
{ u64 x82 = (x80 + x46);
{ u64 x83 = (x82 >> 0x1a);
{ u32 x84 = ((u32)x82 & 0x3ffffff);
{ u64 x85 = (x83 + x43);
{ u64 x86 = (x85 >> 0x19);
{ u32 x87 = ((u32)x85 & 0x1ffffff);
{ u64 x88 = (x86 + x40);
{ u64 x89 = (x88 >> 0x1a);
{ u32 x90 = ((u32)x88 & 0x3ffffff);
{ u64 x91 = (x89 + x28);
{ u64 x92 = (x91 >> 0x19);
{ u32 x93 = ((u32)x91 & 0x1ffffff);
{ u64 x94 = (x66 + (0x13 * x92));
{ u32 x95 = (u32) (x94 >> 0x1a);
{ u32 x96 = ((u32)x94 & 0x3ffffff);
{ u32 x97 = (x95 + x69);
{ u32 x98 = (x97 >> 0x19);
{ u32 x99 = (x97 & 0x1ffffff);
out[0] = x96;
out[1] = x99;
out[2] = (x98 + x72);
out[3] = x75;
out[4] = x78;
out[5] = x81;
out[6] = x84;
out[7] = x87;
out[8] = x90;
out[9] = x93;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
{
fe_sqr_impl(h->v, f->v);
}
static __always_inline void fe_sq_tt(fe *h, const fe *f)
{
fe_sqr_impl(h->v, f->v);
}
static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
{
fe t0;
fe t1;
fe t2;
fe t3;
int i;
fe_sq_tl(&t0, z);
fe_sq_tt(&t1, &t0);
for (i = 1; i < 2; ++i)
fe_sq_tt(&t1, &t1);
fe_mul_tlt(&t1, z, &t1);
fe_mul_ttt(&t0, &t0, &t1);
fe_sq_tt(&t2, &t0);
fe_mul_ttt(&t1, &t1, &t2);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 5; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 10; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t2, &t2, &t1);
fe_sq_tt(&t3, &t2);
for (i = 1; i < 20; ++i)
fe_sq_tt(&t3, &t3);
fe_mul_ttt(&t2, &t3, &t2);
fe_sq_tt(&t2, &t2);
for (i = 1; i < 10; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t2, &t1);
for (i = 1; i < 50; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t2, &t2, &t1);
fe_sq_tt(&t3, &t2);
for (i = 1; i < 100; ++i)
fe_sq_tt(&t3, &t3);
fe_mul_ttt(&t2, &t3, &t2);
fe_sq_tt(&t2, &t2);
for (i = 1; i < 50; ++i)
fe_sq_tt(&t2, &t2);
fe_mul_ttt(&t1, &t2, &t1);
fe_sq_tt(&t1, &t1);
for (i = 1; i < 5; ++i)
fe_sq_tt(&t1, &t1);
fe_mul_ttt(out, &t1, &t0);
}
static __always_inline void fe_invert(fe *out, const fe *z)
{
fe_loose l;
fe_copy_lt(&l, z);
fe_loose_invert(out, &l);
}
/* Replace (f,g) with (g,f) if b == 1;
* replace (f,g) with (f,g) if b == 0.
*
* Preconditions: b in {0,1}
*/
static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
{
unsigned i;
b = 0 - b;
for (i = 0; i < 10; i++) {
u32 x = f->v[i] ^ g->v[i];
x &= b;
f->v[i] ^= x;
g->v[i] ^= x;
}
}
/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
{
{ const u32 x20 = in1[9];
{ const u32 x21 = in1[8];
{ const u32 x19 = in1[7];
{ const u32 x17 = in1[6];
{ const u32 x15 = in1[5];
{ const u32 x13 = in1[4];
{ const u32 x11 = in1[3];
{ const u32 x9 = in1[2];
{ const u32 x7 = in1[1];
{ const u32 x5 = in1[0];
{ const u32 x38 = 0;
{ const u32 x39 = 0;
{ const u32 x37 = 0;
{ const u32 x35 = 0;
{ const u32 x33 = 0;
{ const u32 x31 = 0;
{ const u32 x29 = 0;
{ const u32 x27 = 0;
{ const u32 x25 = 0;
{ const u32 x23 = 121666;
{ u64 x40 = ((u64)x23 * x5);
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
{ u64 x58 = ((u64)(0x2 * x38) * x20);
{ u64 x59 = (x48 + (x58 << 0x4));
{ u64 x60 = (x59 + (x58 << 0x1));
{ u64 x61 = (x60 + x58);
{ u64 x62 = (x47 + (x57 << 0x4));
{ u64 x63 = (x62 + (x57 << 0x1));
{ u64 x64 = (x63 + x57);
{ u64 x65 = (x46 + (x56 << 0x4));
{ u64 x66 = (x65 + (x56 << 0x1));
{ u64 x67 = (x66 + x56);
{ u64 x68 = (x45 + (x55 << 0x4));
{ u64 x69 = (x68 + (x55 << 0x1));
{ u64 x70 = (x69 + x55);
{ u64 x71 = (x44 + (x54 << 0x4));
{ u64 x72 = (x71 + (x54 << 0x1));
{ u64 x73 = (x72 + x54);
{ u64 x74 = (x43 + (x53 << 0x4));
{ u64 x75 = (x74 + (x53 << 0x1));
{ u64 x76 = (x75 + x53);
{ u64 x77 = (x42 + (x52 << 0x4));
{ u64 x78 = (x77 + (x52 << 0x1));
{ u64 x79 = (x78 + x52);
{ u64 x80 = (x41 + (x51 << 0x4));
{ u64 x81 = (x80 + (x51 << 0x1));
{ u64 x82 = (x81 + x51);
{ u64 x83 = (x40 + (x50 << 0x4));
{ u64 x84 = (x83 + (x50 << 0x1));
{ u64 x85 = (x84 + x50);
{ u64 x86 = (x85 >> 0x1a);
{ u32 x87 = ((u32)x85 & 0x3ffffff);
{ u64 x88 = (x86 + x82);
{ u64 x89 = (x88 >> 0x19);
{ u32 x90 = ((u32)x88 & 0x1ffffff);
{ u64 x91 = (x89 + x79);
{ u64 x92 = (x91 >> 0x1a);
{ u32 x93 = ((u32)x91 & 0x3ffffff);
{ u64 x94 = (x92 + x76);
{ u64 x95 = (x94 >> 0x19);
{ u32 x96 = ((u32)x94 & 0x1ffffff);
{ u64 x97 = (x95 + x73);
{ u64 x98 = (x97 >> 0x1a);
{ u32 x99 = ((u32)x97 & 0x3ffffff);
{ u64 x100 = (x98 + x70);
{ u64 x101 = (x100 >> 0x19);
{ u32 x102 = ((u32)x100 & 0x1ffffff);
{ u64 x103 = (x101 + x67);
{ u64 x104 = (x103 >> 0x1a);
{ u32 x105 = ((u32)x103 & 0x3ffffff);
{ u64 x106 = (x104 + x64);
{ u64 x107 = (x106 >> 0x19);
{ u32 x108 = ((u32)x106 & 0x1ffffff);
{ u64 x109 = (x107 + x61);
{ u64 x110 = (x109 >> 0x1a);
{ u32 x111 = ((u32)x109 & 0x3ffffff);
{ u64 x112 = (x110 + x49);
{ u64 x113 = (x112 >> 0x19);
{ u32 x114 = ((u32)x112 & 0x1ffffff);
{ u64 x115 = (x87 + (0x13 * x113));
{ u32 x116 = (u32) (x115 >> 0x1a);
{ u32 x117 = ((u32)x115 & 0x3ffffff);
{ u32 x118 = (x116 + x90);
{ u32 x119 = (x118 >> 0x19);
{ u32 x120 = (x118 & 0x1ffffff);
out[0] = x117;
out[1] = x120;
out[2] = (x119 + x93);
out[3] = x96;
out[4] = x99;
out[5] = x102;
out[6] = x105;
out[7] = x108;
out[8] = x111;
out[9] = x114;
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}
static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
{
fe_mul_121666_impl(h->v, f->v);
}
static void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
const u8 scalar[CURVE25519_KEY_SIZE],
const u8 point[CURVE25519_KEY_SIZE])
{
fe x1, x2, z2, x3, z3;
fe_loose x2l, z2l, x3l;
unsigned swap = 0;
int pos;
u8 e[32];
memcpy(e, scalar, 32);
curve25519_clamp_secret(e);
/* The following implementation was transcribed to Coq and proven to
* correspond to unary scalar multiplication in affine coordinates given
* that x1 != 0 is the x coordinate of some point on the curve. It was
* also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
* z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
* quantified over the underlying field, so it applies to Curve25519
* itself and the quadratic twist of Curve25519. It was not proven in
* Coq that prime-field arithmetic correctly simulates extension-field
* arithmetic on prime-field values. The decoding of the byte array
* representation of e was not considered.
*
* Specification of Montgomery curves in affine coordinates:
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
*
* Proof that these form a group that is isomorphic to a Weierstrass
* curve:
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
*
* Coq transcription and correctness proof of the loop
* (where scalarbits=255):
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
* preconditions: 0 <= e < 2^255 (not necessarily e < order),
* fe_invert(0) = 0
*/
fe_frombytes(&x1, point);
fe_1(&x2);
fe_0(&z2);
fe_copy(&x3, &x1);
fe_1(&z3);
for (pos = 254; pos >= 0; --pos) {
fe tmp0, tmp1;
fe_loose tmp0l, tmp1l;
/* loop invariant as of right before the test, for the case
* where x1 != 0:
* pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
* is nonzero
* let r := e >> (pos+1) in the following equalities of
* projective points:
* to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
* to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
* x1 is the nonzero x coordinate of the nonzero
* point (r*P-(r+1)*P)
*/
unsigned b = 1 & (e[pos / 8] >> (pos & 7));
swap ^= b;
fe_cswap(&x2, &x3, swap);
fe_cswap(&z2, &z3, swap);
swap = b;
/* Coq transcription of ladderstep formula (called from
* transcribed loop):
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
* x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
* x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
*/
fe_sub(&tmp0l, &x3, &z3);
fe_sub(&tmp1l, &x2, &z2);
fe_add(&x2l, &x2, &z2);
fe_add(&z2l, &x3, &z3);
fe_mul_tll(&z3, &tmp0l, &x2l);
fe_mul_tll(&z2, &z2l, &tmp1l);
fe_sq_tl(&tmp0, &tmp1l);
fe_sq_tl(&tmp1, &x2l);
fe_add(&x3l, &z3, &z2);
fe_sub(&z2l, &z3, &z2);
fe_mul_ttt(&x2, &tmp1, &tmp0);
fe_sub(&tmp1l, &tmp1, &tmp0);
fe_sq_tl(&z2, &z2l);
fe_mul121666(&z3, &tmp1l);
fe_sq_tl(&x3, &x3l);
fe_add(&tmp0l, &tmp0, &z3);
fe_mul_ttt(&z3, &x1, &z2);
fe_mul_tll(&z2, &tmp1l, &tmp0l);
}
/* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
* else (x2, z2)
*/
fe_cswap(&x2, &x3, swap);
fe_cswap(&z2, &z3, swap);
fe_invert(&z2, &z2);
fe_mul_ttt(&x2, &x2, &z2);
fe_tobytes(out, &x2);
memzero_explicit(&x1, sizeof(x1));
memzero_explicit(&x2, sizeof(x2));
memzero_explicit(&z2, sizeof(z2));
memzero_explicit(&x3, sizeof(x3));
memzero_explicit(&z3, sizeof(z3));
memzero_explicit(&x2l, sizeof(x2l));
memzero_explicit(&z2l, sizeof(z2l));
memzero_explicit(&x3l, sizeof(x3l));
memzero_explicit(&e, sizeof(e));
}

View File

@ -0,0 +1,779 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2016-2017 INRIA and Microsoft Corporation.
* Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is a machine-generated formally verified implementation of Curve25519
* ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine
* generated, it has been tweaked to be suitable for use in the kernel. It is
* optimized for 64-bit machines that can efficiently work with 128-bit
* integer types.
*/
typedef __uint128_t u128;
static __always_inline u64 u64_eq_mask(u64 a, u64 b)
{
u64 x = a ^ b;
u64 minus_x = ~x + (u64)1U;
u64 x_or_minus_x = x | minus_x;
u64 xnx = x_or_minus_x >> (u32)63U;
u64 c = xnx - (u64)1U;
return c;
}
static __always_inline u64 u64_gte_mask(u64 a, u64 b)
{
u64 x = a;
u64 y = b;
u64 x_xor_y = x ^ y;
u64 x_sub_y = x - y;
u64 x_sub_y_xor_y = x_sub_y ^ y;
u64 q = x_xor_y | x_sub_y_xor_y;
u64 x_xor_q = x ^ q;
u64 x_xor_q_ = x_xor_q >> (u32)63U;
u64 c = x_xor_q_ - (u64)1U;
return c;
}
static __always_inline void modulo_carry_top(u64 *b)
{
u64 b4 = b[4];
u64 b0 = b[0];
u64 b4_ = b4 & 0x7ffffffffffffLLU;
u64 b0_ = b0 + 19 * (b4 >> 51);
b[4] = b4_;
b[0] = b0_;
}
static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
{
{
u128 xi = input[0];
output[0] = ((u64)(xi));
}
{
u128 xi = input[1];
output[1] = ((u64)(xi));
}
{
u128 xi = input[2];
output[2] = ((u64)(xi));
}
{
u128 xi = input[3];
output[3] = ((u64)(xi));
}
{
u128 xi = input[4];
output[4] = ((u64)(xi));
}
}
static __always_inline void
fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s)
{
output[0] += (u128)input[0] * s;
output[1] += (u128)input[1] * s;
output[2] += (u128)input[2] * s;
output[3] += (u128)input[3] * s;
output[4] += (u128)input[4] * s;
}
static __always_inline void fproduct_carry_wide_(u128 *tmp)
{
{
u32 ctr = 0;
u128 tctr = tmp[ctr];
u128 tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
u128 c = ((tctr) >> (51));
tmp[ctr] = ((u128)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}
{
u32 ctr = 1;
u128 tctr = tmp[ctr];
u128 tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
u128 c = ((tctr) >> (51));
tmp[ctr] = ((u128)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}
{
u32 ctr = 2;
u128 tctr = tmp[ctr];
u128 tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
u128 c = ((tctr) >> (51));
tmp[ctr] = ((u128)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}
{
u32 ctr = 3;
u128 tctr = tmp[ctr];
u128 tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
u128 c = ((tctr) >> (51));
tmp[ctr] = ((u128)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}
}
static __always_inline void fmul_shift_reduce(u64 *output)
{
u64 tmp = output[4];
u64 b0;
{
u32 ctr = 5 - 0 - 1;
u64 z = output[ctr - 1];
output[ctr] = z;
}
{
u32 ctr = 5 - 1 - 1;
u64 z = output[ctr - 1];
output[ctr] = z;
}
{
u32 ctr = 5 - 2 - 1;
u64 z = output[ctr - 1];
output[ctr] = z;
}
{
u32 ctr = 5 - 3 - 1;
u64 z = output[ctr - 1];
output[ctr] = z;
}
output[0] = tmp;
b0 = output[0];
output[0] = 19 * b0;
}
static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input,
u64 *input21)
{
u32 i;
u64 input2i;
{
u64 input2i = input21[0];
fproduct_sum_scalar_multiplication_(output, input, input2i);
fmul_shift_reduce(input);
}
{
u64 input2i = input21[1];
fproduct_sum_scalar_multiplication_(output, input, input2i);
fmul_shift_reduce(input);
}
{
u64 input2i = input21[2];
fproduct_sum_scalar_multiplication_(output, input, input2i);
fmul_shift_reduce(input);
}
{
u64 input2i = input21[3];
fproduct_sum_scalar_multiplication_(output, input, input2i);
fmul_shift_reduce(input);
}
i = 4;
input2i = input21[i];
fproduct_sum_scalar_multiplication_(output, input, input2i);
}
static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21)
{
u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] };
{
u128 b4;
u128 b0;
u128 b4_;
u128 b0_;
u64 i0;
u64 i1;
u64 i0_;
u64 i1_;
u128 t[5] = { 0 };
fmul_mul_shift_reduce_(t, tmp, input21);
fproduct_carry_wide_(t);
b4 = t[4];
b0 = t[0];
b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
t[4] = b4_;
t[0] = b0_;
fproduct_copy_from_wide_(output, t);
i0 = output[0];
i1 = output[1];
i0_ = i0 & 0x7ffffffffffffLLU;
i1_ = i1 + (i0 >> 51);
output[0] = i0_;
output[1] = i1_;
}
}
static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output)
{
u64 r0 = output[0];
u64 r1 = output[1];
u64 r2 = output[2];
u64 r3 = output[3];
u64 r4 = output[4];
u64 d0 = r0 * 2;
u64 d1 = r1 * 2;
u64 d2 = r2 * 2 * 19;
u64 d419 = r4 * 19;
u64 d4 = d419 * 2;
u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) +
(((u128)(d2) * (r3))));
u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) +
(((u128)(r3 * 19) * (r3))));
u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) +
(((u128)(d4) * (r3))));
u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) +
(((u128)(r4) * (d419))));
u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) +
(((u128)(r2) * (r2))));
tmp[0] = s0;
tmp[1] = s1;
tmp[2] = s2;
tmp[3] = s3;
tmp[4] = s4;
}
static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output)
{
u128 b4;
u128 b0;
u128 b4_;
u128 b0_;
u64 i0;
u64 i1;
u64 i0_;
u64 i1_;
fsquare_fsquare__(tmp, output);
fproduct_carry_wide_(tmp);
b4 = tmp[4];
b0 = tmp[0];
b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
tmp[4] = b4_;
tmp[0] = b0_;
fproduct_copy_from_wide_(output, tmp);
i0 = output[0];
i1 = output[1];
i0_ = i0 & 0x7ffffffffffffLLU;
i1_ = i1 + (i0 >> 51);
output[0] = i0_;
output[1] = i1_;
}
static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp,
u32 count1)
{
u32 i;
fsquare_fsquare_(tmp, output);
for (i = 1; i < count1; ++i)
fsquare_fsquare_(tmp, output);
}
static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input,
u32 count1)
{
u128 t[5];
memcpy(output, input, 5 * sizeof(*input));
fsquare_fsquare_times_(output, t, count1);
}
static __always_inline void fsquare_fsquare_times_inplace(u64 *output,
u32 count1)
{
u128 t[5];
fsquare_fsquare_times_(output, t, count1);
}
static __always_inline void crecip_crecip(u64 *out, u64 *z)
{
u64 buf[20] = { 0 };
u64 *a0 = buf;
u64 *t00 = buf + 5;
u64 *b0 = buf + 10;
u64 *t01;
u64 *b1;
u64 *c0;
u64 *a;
u64 *t0;
u64 *b;
u64 *c;
fsquare_fsquare_times(a0, z, 1);
fsquare_fsquare_times(t00, a0, 2);
fmul_fmul(b0, t00, z);
fmul_fmul(a0, b0, a0);
fsquare_fsquare_times(t00, a0, 1);
fmul_fmul(b0, t00, b0);
fsquare_fsquare_times(t00, b0, 5);
t01 = buf + 5;
b1 = buf + 10;
c0 = buf + 15;
fmul_fmul(b1, t01, b1);
fsquare_fsquare_times(t01, b1, 10);
fmul_fmul(c0, t01, b1);
fsquare_fsquare_times(t01, c0, 20);
fmul_fmul(t01, t01, c0);
fsquare_fsquare_times_inplace(t01, 10);
fmul_fmul(b1, t01, b1);
fsquare_fsquare_times(t01, b1, 50);
a = buf;
t0 = buf + 5;
b = buf + 10;
c = buf + 15;
fmul_fmul(c, t0, b);
fsquare_fsquare_times(t0, c, 100);
fmul_fmul(t0, t0, c);
fsquare_fsquare_times_inplace(t0, 50);
fmul_fmul(t0, t0, b);
fsquare_fsquare_times_inplace(t0, 5);
fmul_fmul(out, t0, a);
}
static __always_inline void fsum(u64 *a, u64 *b)
{
a[0] += b[0];
a[1] += b[1];
a[2] += b[2];
a[3] += b[3];
a[4] += b[4];
}
static __always_inline void fdifference(u64 *a, u64 *b)
{
u64 tmp[5] = { 0 };
u64 b0;
u64 b1;
u64 b2;
u64 b3;
u64 b4;
memcpy(tmp, b, 5 * sizeof(*b));
b0 = tmp[0];
b1 = tmp[1];
b2 = tmp[2];
b3 = tmp[3];
b4 = tmp[4];
tmp[0] = b0 + 0x3fffffffffff68LLU;
tmp[1] = b1 + 0x3ffffffffffff8LLU;
tmp[2] = b2 + 0x3ffffffffffff8LLU;
tmp[3] = b3 + 0x3ffffffffffff8LLU;
tmp[4] = b4 + 0x3ffffffffffff8LLU;
{
u64 xi = a[0];
u64 yi = tmp[0];
a[0] = yi - xi;
}
{
u64 xi = a[1];
u64 yi = tmp[1];
a[1] = yi - xi;
}
{
u64 xi = a[2];
u64 yi = tmp[2];
a[2] = yi - xi;
}
{
u64 xi = a[3];
u64 yi = tmp[3];
a[3] = yi - xi;
}
{
u64 xi = a[4];
u64 yi = tmp[4];
a[4] = yi - xi;
}
}
static __always_inline void fscalar(u64 *output, u64 *b, u64 s)
{
u128 tmp[5];
u128 b4;
u128 b0;
u128 b4_;
u128 b0_;
{
u64 xi = b[0];
tmp[0] = ((u128)(xi) * (s));
}
{
u64 xi = b[1];
tmp[1] = ((u128)(xi) * (s));
}
{
u64 xi = b[2];
tmp[2] = ((u128)(xi) * (s));
}
{
u64 xi = b[3];
tmp[3] = ((u128)(xi) * (s));
}
{
u64 xi = b[4];
tmp[4] = ((u128)(xi) * (s));
}
fproduct_carry_wide_(tmp);
b4 = tmp[4];
b0 = tmp[0];
b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
tmp[4] = b4_;
tmp[0] = b0_;
fproduct_copy_from_wide_(output, tmp);
}
static __always_inline void crecip(u64 *output, u64 *input)
{
crecip_crecip(output, input);
}
static __always_inline void point_swap_conditional_step(u64 *a, u64 *b,
u64 swap1, u32 ctr)
{
u32 i = ctr - 1;
u64 ai = a[i];
u64 bi = b[i];
u64 x = swap1 & (ai ^ bi);
u64 ai1 = ai ^ x;
u64 bi1 = bi ^ x;
a[i] = ai1;
b[i] = bi1;
}
static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1)
{
point_swap_conditional_step(a, b, swap1, 5);
point_swap_conditional_step(a, b, swap1, 4);
point_swap_conditional_step(a, b, swap1, 3);
point_swap_conditional_step(a, b, swap1, 2);
point_swap_conditional_step(a, b, swap1, 1);
}
static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap)
{
u64 swap1 = 0 - iswap;
point_swap_conditional5(a, b, swap1);
point_swap_conditional5(a + 5, b + 5, swap1);
}
static __always_inline void point_copy(u64 *output, u64 *input)
{
memcpy(output, input, 5 * sizeof(*input));
memcpy(output + 5, input + 5, 5 * sizeof(*input));
}
static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p,
u64 *pq, u64 *qmqp)
{
u64 *qx = qmqp;
u64 *x2 = pp;
u64 *z2 = pp + 5;
u64 *x3 = ppq;
u64 *z3 = ppq + 5;
u64 *x = p;
u64 *z = p + 5;
u64 *xprime = pq;
u64 *zprime = pq + 5;
u64 buf[40] = { 0 };
u64 *origx = buf;
u64 *origxprime0 = buf + 5;
u64 *xxprime0;
u64 *zzprime0;
u64 *origxprime;
xxprime0 = buf + 25;
zzprime0 = buf + 30;
memcpy(origx, x, 5 * sizeof(*x));
fsum(x, z);
fdifference(z, origx);
memcpy(origxprime0, xprime, 5 * sizeof(*xprime));
fsum(xprime, zprime);
fdifference(zprime, origxprime0);
fmul_fmul(xxprime0, xprime, z);
fmul_fmul(zzprime0, x, zprime);
origxprime = buf + 5;
{
u64 *xx0;
u64 *zz0;
u64 *xxprime;
u64 *zzprime;
u64 *zzzprime;
xx0 = buf + 15;
zz0 = buf + 20;
xxprime = buf + 25;
zzprime = buf + 30;
zzzprime = buf + 35;
memcpy(origxprime, xxprime, 5 * sizeof(*xxprime));
fsum(xxprime, zzprime);
fdifference(zzprime, origxprime);
fsquare_fsquare_times(x3, xxprime, 1);
fsquare_fsquare_times(zzzprime, zzprime, 1);
fmul_fmul(z3, zzzprime, qx);
fsquare_fsquare_times(xx0, x, 1);
fsquare_fsquare_times(zz0, z, 1);
{
u64 *zzz;
u64 *xx;
u64 *zz;
u64 scalar;
zzz = buf + 10;
xx = buf + 15;
zz = buf + 20;
fmul_fmul(x2, xx, zz);
fdifference(zz, xx);
scalar = 121665;
fscalar(zzz, zz, scalar);
fsum(zzz, xx);
fmul_fmul(z2, zzz, zz);
}
}
}
static __always_inline void
ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
u64 *q, u8 byt)
{
u64 bit0 = (u64)(byt >> 7);
u64 bit;
point_swap_conditional(nq, nqpq, bit0);
addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q);
bit = (u64)(byt >> 7);
point_swap_conditional(nq2, nqpq2, bit);
}
static __always_inline void
ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2,
u64 *nqpq2, u64 *q, u8 byt)
{
u8 byt1;
ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
byt1 = byt << 1;
ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
}
static __always_inline void
ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
u64 *q, u8 byt, u32 i)
{
while (i--) {
ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2,
nqpq2, q, byt);
byt <<= 2;
}
}
static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq,
u64 *nqpq, u64 *nq2,
u64 *nqpq2, u64 *q,
u32 i)
{
while (i--) {
u8 byte = n1[i];
ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q,
byte, 4);
}
}
static void ladder_cmult(u64 *result, u8 *n1, u64 *q)
{
u64 point_buf[40] = { 0 };
u64 *nq = point_buf;
u64 *nqpq = point_buf + 10;
u64 *nq2 = point_buf + 20;
u64 *nqpq2 = point_buf + 30;
point_copy(nqpq, q);
nq[0] = 1;
ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32);
point_copy(result, nq);
}
static __always_inline void format_fexpand(u64 *output, const u8 *input)
{
const u8 *x00 = input + 6;
const u8 *x01 = input + 12;
const u8 *x02 = input + 19;
const u8 *x0 = input + 24;
u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4;
i0 = get_unaligned_le64(input);
i1 = get_unaligned_le64(x00);
i2 = get_unaligned_le64(x01);
i3 = get_unaligned_le64(x02);
i4 = get_unaligned_le64(x0);
output0 = i0 & 0x7ffffffffffffLLU;
output1 = i1 >> 3 & 0x7ffffffffffffLLU;
output2 = i2 >> 6 & 0x7ffffffffffffLLU;
output3 = i3 >> 1 & 0x7ffffffffffffLLU;
output4 = i4 >> 12 & 0x7ffffffffffffLLU;
output[0] = output0;
output[1] = output1;
output[2] = output2;
output[3] = output3;
output[4] = output4;
}
static __always_inline void format_fcontract_first_carry_pass(u64 *input)
{
u64 t0 = input[0];
u64 t1 = input[1];
u64 t2 = input[2];
u64 t3 = input[3];
u64 t4 = input[4];
u64 t1_ = t1 + (t0 >> 51);
u64 t0_ = t0 & 0x7ffffffffffffLLU;
u64 t2_ = t2 + (t1_ >> 51);
u64 t1__ = t1_ & 0x7ffffffffffffLLU;
u64 t3_ = t3 + (t2_ >> 51);
u64 t2__ = t2_ & 0x7ffffffffffffLLU;
u64 t4_ = t4 + (t3_ >> 51);
u64 t3__ = t3_ & 0x7ffffffffffffLLU;
input[0] = t0_;
input[1] = t1__;
input[2] = t2__;
input[3] = t3__;
input[4] = t4_;
}
static __always_inline void format_fcontract_first_carry_full(u64 *input)
{
format_fcontract_first_carry_pass(input);
modulo_carry_top(input);
}
static __always_inline void format_fcontract_second_carry_pass(u64 *input)
{
u64 t0 = input[0];
u64 t1 = input[1];
u64 t2 = input[2];
u64 t3 = input[3];
u64 t4 = input[4];
u64 t1_ = t1 + (t0 >> 51);
u64 t0_ = t0 & 0x7ffffffffffffLLU;
u64 t2_ = t2 + (t1_ >> 51);
u64 t1__ = t1_ & 0x7ffffffffffffLLU;
u64 t3_ = t3 + (t2_ >> 51);
u64 t2__ = t2_ & 0x7ffffffffffffLLU;
u64 t4_ = t4 + (t3_ >> 51);
u64 t3__ = t3_ & 0x7ffffffffffffLLU;
input[0] = t0_;
input[1] = t1__;
input[2] = t2__;
input[3] = t3__;
input[4] = t4_;
}
static __always_inline void format_fcontract_second_carry_full(u64 *input)
{
u64 i0;
u64 i1;
u64 i0_;
u64 i1_;
format_fcontract_second_carry_pass(input);
modulo_carry_top(input);
i0 = input[0];
i1 = input[1];
i0_ = i0 & 0x7ffffffffffffLLU;
i1_ = i1 + (i0 >> 51);
input[0] = i0_;
input[1] = i1_;
}
static __always_inline void format_fcontract_trim(u64 *input)
{
u64 a0 = input[0];
u64 a1 = input[1];
u64 a2 = input[2];
u64 a3 = input[3];
u64 a4 = input[4];
u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU);
u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU);
u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU);
u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU);
u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU);
u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
u64 a0_ = a0 - (0x7ffffffffffedLLU & mask);
u64 a1_ = a1 - (0x7ffffffffffffLLU & mask);
u64 a2_ = a2 - (0x7ffffffffffffLLU & mask);
u64 a3_ = a3 - (0x7ffffffffffffLLU & mask);
u64 a4_ = a4 - (0x7ffffffffffffLLU & mask);
input[0] = a0_;
input[1] = a1_;
input[2] = a2_;
input[3] = a3_;
input[4] = a4_;
}
static __always_inline void format_fcontract_store(u8 *output, u64 *input)
{
u64 t0 = input[0];
u64 t1 = input[1];
u64 t2 = input[2];
u64 t3 = input[3];
u64 t4 = input[4];
u64 o0 = t1 << 51 | t0;
u64 o1 = t2 << 38 | t1 >> 13;
u64 o2 = t3 << 25 | t2 >> 26;
u64 o3 = t4 << 12 | t3 >> 39;
u8 *b0 = output;
u8 *b1 = output + 8;
u8 *b2 = output + 16;
u8 *b3 = output + 24;
put_unaligned_le64(o0, b0);
put_unaligned_le64(o1, b1);
put_unaligned_le64(o2, b2);
put_unaligned_le64(o3, b3);
}
static __always_inline void format_fcontract(u8 *output, u64 *input)
{
format_fcontract_first_carry_full(input);
format_fcontract_second_carry_full(input);
format_fcontract_trim(input);
format_fcontract_store(output, input);
}
static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point)
{
u64 *x = point;
u64 *z = point + 5;
u64 buf[10] __aligned(32) = { 0 };
u64 *zmone = buf;
u64 *sc = buf + 5;
crecip(zmone, z);
fmul_fmul(sc, x, zmone);
format_fcontract(scalar, sc);
}
static void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
u64 buf0[10] __aligned(32) = { 0 };
u64 *x0 = buf0;
u64 *z = buf0 + 5;
u64 *q;
format_fexpand(x0, basepoint);
z[0] = 1;
q = buf0;
{
u8 e[32] __aligned(32) = { 0 };
u8 *scalar;
memcpy(e, secret, 32);
curve25519_clamp_secret(e);
scalar = e;
{
u64 buf[15] = { 0 };
u64 *nq = buf;
u64 *x = nq;
x[0] = 1;
ladder_cmult(nq, scalar, q);
format_scalar_of_point(mypublic, nq);
memzero_explicit(buf, sizeof(buf));
}
memzero_explicit(e, sizeof(e));
}
memzero_explicit(buf0, sizeof(buf0));
}

View File

@ -0,0 +1,44 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include "curve25519-x86_64.c"
static bool curve25519_use_bmi2_adx __ro_after_init;
static bool *const curve25519_nobs[] __initconst = {
&curve25519_use_bmi2_adx };
static void __init curve25519_fpu_init(void)
{
curve25519_use_bmi2_adx = IS_ENABLED(CONFIG_AS_BMI2) &&
IS_ENABLED(CONFIG_AS_ADX) &&
boot_cpu_has(X86_FEATURE_BMI2) &&
boot_cpu_has(X86_FEATURE_ADX);
}
static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) &&
curve25519_use_bmi2_adx) {
curve25519_ever64(mypublic, secret, basepoint);
return true;
}
return false;
}
static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE])
{
if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) &&
curve25519_use_bmi2_adx) {
curve25519_ever64_base(pub, secret);
return true;
}
return false;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,110 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is an implementation of the Curve25519 ECDH algorithm, using either
* a 32-bit implementation or a 64-bit implementation with 128-bit integers,
* depending on what is supported by the target compiler.
*
* Information: https://cr.yp.to/ecdh.html
*/
#include <zinc/curve25519.h>
#include "../selftest/run.h"
#include <asm/unaligned.h>
#include <linux/version.h>
#include <linux/string.h>
#include <linux/random.h>
#include <linux/module.h>
#include <linux/init.h>
#include <crypto/algapi.h> // For crypto_memneq.
#if defined(CONFIG_ZINC_ARCH_X86_64)
#include "curve25519-x86_64-glue.c"
#elif defined(CONFIG_ZINC_ARCH_ARM)
#include "curve25519-arm-glue.c"
#else
static bool *const curve25519_nobs[] __initconst = { };
static void __init curve25519_fpu_init(void)
{
}
static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
return false;
}
static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE])
{
return false;
}
#endif
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
#include "curve25519-hacl64.c"
#else
#include "curve25519-fiat32.c"
#endif
static const u8 null_point[CURVE25519_KEY_SIZE] = { 0 };
bool curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE],
const u8 basepoint[CURVE25519_KEY_SIZE])
{
if (!curve25519_arch(mypublic, secret, basepoint))
curve25519_generic(mypublic, secret, basepoint);
return crypto_memneq(mypublic, null_point, CURVE25519_KEY_SIZE);
}
bool curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
const u8 secret[CURVE25519_KEY_SIZE])
{
static const u8 basepoint[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
if (unlikely(!crypto_memneq(secret, null_point, CURVE25519_KEY_SIZE)))
return false;
if (curve25519_base_arch(pub, secret))
return crypto_memneq(pub, null_point, CURVE25519_KEY_SIZE);
return curve25519(pub, secret, basepoint);
}
void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
{
get_random_bytes_wait(secret, CURVE25519_KEY_SIZE);
curve25519_clamp_secret(secret);
}
#include "../selftest/curve25519.c"
static bool nosimd __initdata = false;
#ifndef COMPAT_ZINC_IS_A_MODULE
int __init curve25519_mod_init(void)
#else
static int __init mod_init(void)
#endif
{
if (!nosimd)
curve25519_fpu_init();
if (!selftest_run("curve25519", curve25519_selftest, curve25519_nobs,
ARRAY_SIZE(curve25519_nobs)))
return -ENOTRECOVERABLE;
return 0;
}
#ifdef COMPAT_ZINC_IS_A_MODULE
static void __exit mod_exit(void)
{
}
module_param(nosimd, bool, 0);
module_init(mod_init);
module_exit(mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Curve25519 scalar multiplication");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
#endif

View File

@ -0,0 +1,140 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
static bool poly1305_use_neon __ro_after_init;
static bool *const poly1305_nobs[] __initconst = { &poly1305_use_neon };
static void __init poly1305_fpu_init(void)
{
#if defined(CONFIG_ZINC_ARCH_ARM64)
poly1305_use_neon = cpu_have_named_feature(ASIMD);
#elif defined(CONFIG_ZINC_ARCH_ARM)
poly1305_use_neon = elf_hwcap & HWCAP_NEON;
#endif
}
#if defined(CONFIG_ZINC_ARCH_ARM64)
struct poly1305_arch_internal {
union {
u32 h[5];
struct {
u64 h0, h1, h2;
};
};
u64 is_base2_26;
u64 r[2];
};
#elif defined(CONFIG_ZINC_ARCH_ARM)
struct poly1305_arch_internal {
union {
u32 h[5];
struct {
u64 h0, h1;
u32 h2;
} __packed;
};
u32 r[4];
u32 is_base2_26;
};
#endif
/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
* and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
* and then having to go back to scalar -- because the user is silly and has
* called the update function from two separate contexts -- then we need to
* convert back to the original base before proceeding. The below function is
* written for 64-bit integers, and so we have to swap words at the end on
* big-endian 32-bit. It is possible to reason that the initial reduction below
* is sufficient given the implementation invariants. However, for an avoidance
* of doubt and because this is not performance critical, we do the full
* reduction anyway.
*/
static void convert_to_base2_64(void *ctx)
{
struct poly1305_arch_internal *state = ctx;
u32 cy;
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
return;
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
state->h2 = state->h[4] >> 24;
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
state->h0 = rol64(state->h0, 32);
state->h1 = rol64(state->h1, 32);
}
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
state->h2 &= 3;
state->h0 += cy;
state->h1 += (cy = ULT(state->h0, cy));
state->h2 += ULT(state->h1, cy);
#undef ULT
state->is_base2_26 = 0;
}
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_arm(ctx, key);
return true;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
!simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_blocks_arm(ctx, inp, len, padbit);
return true;
}
for (;;) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
poly1305_blocks_neon(ctx, inp, bytes, padbit);
len -= bytes;
if (!len)
break;
inp += bytes;
simd_relax(simd_context);
}
return true;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
!simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_emit_arm(ctx, mac, nonce);
} else
poly1305_emit_neon(ctx, mac, nonce);
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,974 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
# has relicensed it under the licenses specified in the SPDX header above.
# The original headers, including the original license headers, are
# included below for completeness.
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# This module implements Poly1305 hash for ARMv8.
#
# June 2015
#
# Numbers are cycles per processed byte with poly1305_blocks alone.
#
# IALU/gcc-4.9 NEON
#
# Apple A7 1.86/+5% 0.72
# Cortex-A53 2.69/+58% 1.47
# Cortex-A57 2.70/+7% 1.14
# Denver 1.64/+50% 1.18(*)
# X-Gene 2.13/+68% 2.27
# Mongoose 1.77/+75% 1.12
# Kryo 2.70/+55% 1.13
#
# (*) estimate based on resources availability is less than 1.0,
# i.e. measured result is worse than expected, presumably binary
# translator is not almighty;
$flavour=shift;
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
} else {
open STDOUT,">$output";
}
my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
my ($mac,$nonce)=($inp,$len);
my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
$code.=<<___;
#ifndef __KERNEL__
# include "arm_arch.h"
.extern OPENSSL_armcap_P
#else
# define poly1305_init poly1305_init_arm
# define poly1305_blocks poly1305_blocks_arm
# define poly1305_emit poly1305_emit_arm
#endif
.text
// forward "declarations" are required for Apple
.globl poly1305_blocks
.globl poly1305_emit
.globl poly1305_init
.type poly1305_init,%function
.align 5
poly1305_init:
cmp $inp,xzr
stp xzr,xzr,[$ctx] // zero hash value
stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
csel x0,xzr,x0,eq
b.eq .Lno_key
#ifndef __KERNEL__
# ifdef __ILP32__
ldrsw $t1,.LOPENSSL_armcap_P
# else
ldr $t1,.LOPENSSL_armcap_P
# endif
adr $t0,.LOPENSSL_armcap_P
ldr w17,[$t0,$t1]
#endif
ldp $r0,$r1,[$inp] // load key
mov $s1,#0xfffffffc0fffffff
movk $s1,#0x0fff,lsl#48
#ifdef __AARCH64EB__
rev $r0,$r0 // flip bytes
rev $r1,$r1
#endif
and $r0,$r0,$s1 // &=0ffffffc0fffffff
and $s1,$s1,#-4
and $r1,$r1,$s1 // &=0ffffffc0ffffffc
stp $r0,$r1,[$ctx,#32] // save key value
#ifndef __KERNEL__
tst w17,#ARMV7_NEON
adr $d0,poly1305_blocks
adr $r0,poly1305_blocks_neon
adr $d1,poly1305_emit
adr $r1,poly1305_emit_neon
csel $d0,$d0,$r0,eq
csel $d1,$d1,$r1,eq
# ifdef __ILP32__
stp w12,w13,[$len]
# else
stp $d0,$d1,[$len]
# endif
mov x0,#1
#else
mov x0,#0
#endif
.Lno_key:
ret
.size poly1305_init,.-poly1305_init
.type poly1305_blocks,%function
.align 5
poly1305_blocks:
ands $len,$len,#-16
b.eq .Lno_data
ldp $h0,$h1,[$ctx] // load hash value
ldp $r0,$r1,[$ctx,#32] // load key value
ldr $h2,[$ctx,#16]
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
b .Loop
.align 5
.Loop:
ldp $t0,$t1,[$inp],#16 // load input
sub $len,$len,#16
#ifdef __AARCH64EB__
rev $t0,$t0
rev $t1,$t1
#endif
adds $h0,$h0,$t0 // accumulate input
adcs $h1,$h1,$t1
mul $d0,$h0,$r0 // h0*r0
adc $h2,$h2,$padbit
umulh $d1,$h0,$r0
mul $t0,$h1,$s1 // h1*5*r1
umulh $t1,$h1,$s1
adds $d0,$d0,$t0
mul $t0,$h0,$r1 // h0*r1
adc $d1,$d1,$t1
umulh $d2,$h0,$r1
adds $d1,$d1,$t0
mul $t0,$h1,$r0 // h1*r0
adc $d2,$d2,xzr
umulh $t1,$h1,$r0
adds $d1,$d1,$t0
mul $t0,$h2,$s1 // h2*5*r1
adc $d2,$d2,$t1
mul $t1,$h2,$r0 // h2*r0
adds $d1,$d1,$t0
adc $d2,$d2,$t1
and $t0,$d2,#-4 // final reduction
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
adcs $h1,$d1,xzr
adc $h2,$h2,xzr
cbnz $len,.Loop
stp $h0,$h1,[$ctx] // store hash value
str $h2,[$ctx,#16]
.Lno_data:
ret
.size poly1305_blocks,.-poly1305_blocks
.type poly1305_emit,%function
.align 5
poly1305_emit:
ldp $h0,$h1,[$ctx] // load hash base 2^64
ldr $h2,[$ctx,#16]
ldp $t0,$t1,[$nonce] // load nonce
adds $d0,$h0,#5 // compare to modulus
adcs $d1,$h1,xzr
adc $d2,$h2,xzr
tst $d2,#-4 // see if it's carried/borrowed
csel $h0,$h0,$d0,eq
csel $h1,$h1,$d1,eq
#ifdef __AARCH64EB__
ror $t0,$t0,#32 // flip nonce words
ror $t1,$t1,#32
#endif
adds $h0,$h0,$t0 // accumulate nonce
adc $h1,$h1,$t1
#ifdef __AARCH64EB__
rev $h0,$h0 // flip output bytes
rev $h1,$h1
#endif
stp $h0,$h1,[$mac] // write result
ret
.size poly1305_emit,.-poly1305_emit
___
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
my ($T0,$T1,$MASK) = map("v$_",(29..31));
my ($in2,$zeros)=("x16","x17");
my $is_base2_26 = $zeros; # borrow
$code.=<<___;
.type __poly1305_mult,%function
.align 5
__poly1305_mult:
mul $d0,$h0,$r0 // h0*r0
umulh $d1,$h0,$r0
mul $t0,$h1,$s1 // h1*5*r1
umulh $t1,$h1,$s1
adds $d0,$d0,$t0
mul $t0,$h0,$r1 // h0*r1
adc $d1,$d1,$t1
umulh $d2,$h0,$r1
adds $d1,$d1,$t0
mul $t0,$h1,$r0 // h1*r0
adc $d2,$d2,xzr
umulh $t1,$h1,$r0
adds $d1,$d1,$t0
mul $t0,$h2,$s1 // h2*5*r1
adc $d2,$d2,$t1
mul $t1,$h2,$r0 // h2*r0
adds $d1,$d1,$t0
adc $d2,$d2,$t1
and $t0,$d2,#-4 // final reduction
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
adcs $h1,$d1,xzr
adc $h2,$h2,xzr
ret
.size __poly1305_mult,.-__poly1305_mult
.type __poly1305_splat,%function
.align 5
__poly1305_splat:
and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x13,$h0,#26,#26
extr x14,$h1,$h0,#52
and x14,x14,#0x03ffffff
ubfx x15,$h1,#14,#26
extr x16,$h2,$h1,#40
str w12,[$ctx,#16*0] // r0
add w12,w13,w13,lsl#2 // r1*5
str w13,[$ctx,#16*1] // r1
add w13,w14,w14,lsl#2 // r2*5
str w12,[$ctx,#16*2] // s1
str w14,[$ctx,#16*3] // r2
add w14,w15,w15,lsl#2 // r3*5
str w13,[$ctx,#16*4] // s2
str w15,[$ctx,#16*5] // r3
add w15,w16,w16,lsl#2 // r4*5
str w14,[$ctx,#16*6] // s3
str w16,[$ctx,#16*7] // r4
str w15,[$ctx,#16*8] // s4
ret
.size __poly1305_splat,.-__poly1305_splat
#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON)
#ifdef __KERNEL__
.globl poly1305_blocks_neon
.globl poly1305_emit_neon
#endif
.type poly1305_blocks_neon,%function
.align 5
poly1305_blocks_neon:
ldr $is_base2_26,[$ctx,#24]
cmp $len,#128
b.hs .Lblocks_neon
cbz $is_base2_26,poly1305_blocks
.Lblocks_neon:
stp x29,x30,[sp,#-80]!
add x29,sp,#0
ands $len,$len,#-16
b.eq .Lno_data_neon
cbz $is_base2_26,.Lbase2_64_neon
ldp w10,w11,[$ctx] // load hash value base 2^26
ldp w12,w13,[$ctx,#8]
ldr w14,[$ctx,#16]
tst $len,#31
b.eq .Leven_neon
ldp $r0,$r1,[$ctx,#32] // load key value
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr $h1,x12,#12
adds $h0,$h0,x12,lsl#52
add $h1,$h1,x13,lsl#14
adc $h1,$h1,xzr
lsr $h2,x14,#24
adds $h1,$h1,x14,lsl#40
adc $d2,$h2,xzr // can be partially reduced...
ldp $d0,$d1,[$inp],#16 // load input
sub $len,$len,#16
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
and $t0,$d2,#-4 // ... so reduce
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$h0,$t0
adcs $h1,$h1,xzr
adc $h2,$h2,xzr
#ifdef __AARCH64EB__
rev $d0,$d0
rev $d1,$d1
#endif
adds $h0,$h0,$d0 // accumulate input
adcs $h1,$h1,$d1
adc $h2,$h2,$padbit
bl __poly1305_mult
ldr x30,[sp,#8]
cbz $padbit,.Lstore_base2_64_neon
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,$h0,#26,#26
extr x12,$h1,$h0,#52
and x12,x12,#0x03ffffff
ubfx x13,$h1,#14,#26
extr x14,$h2,$h1,#40
cbnz $len,.Leven_neon
stp w10,w11,[$ctx] // store hash value base 2^26
stp w12,w13,[$ctx,#8]
str w14,[$ctx,#16]
b .Lno_data_neon
.align 4
.Lstore_base2_64_neon:
stp $h0,$h1,[$ctx] // store hash value base 2^64
stp $h2,xzr,[$ctx,#16] // note that is_base2_26 is zeroed
b .Lno_data_neon
.align 4
.Lbase2_64_neon:
ldp $r0,$r1,[$ctx,#32] // load key value
ldp $h0,$h1,[$ctx] // load hash value base 2^64
ldr $h2,[$ctx,#16]
tst $len,#31
b.eq .Linit_neon
ldp $d0,$d1,[$inp],#16 // load input
sub $len,$len,#16
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
#ifdef __AARCH64EB__
rev $d0,$d0
rev $d1,$d1
#endif
adds $h0,$h0,$d0 // accumulate input
adcs $h1,$h1,$d1
adc $h2,$h2,$padbit
bl __poly1305_mult
.Linit_neon:
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,$h0,#26,#26
extr x12,$h1,$h0,#52
and x12,x12,#0x03ffffff
ubfx x13,$h1,#14,#26
extr x14,$h2,$h1,#40
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov ${H0},x10
fmov ${H1},x11
fmov ${H2},x12
fmov ${H3},x13
fmov ${H4},x14
////////////////////////////////// initialize r^n table
mov $h0,$r0 // r^1
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
mov $h1,$r1
mov $h2,xzr
add $ctx,$ctx,#48+12
bl __poly1305_splat
bl __poly1305_mult // r^2
sub $ctx,$ctx,#4
bl __poly1305_splat
bl __poly1305_mult // r^3
sub $ctx,$ctx,#4
bl __poly1305_splat
bl __poly1305_mult // r^4
sub $ctx,$ctx,#4
bl __poly1305_splat
ldr x30,[sp,#8]
add $in2,$inp,#32
adr $zeros,.Lzeros
subs $len,$len,#64
csel $in2,$zeros,$in2,lo
mov x4,#1
str x4,[$ctx,#-24] // set is_base2_26
sub $ctx,$ctx,#48 // restore original $ctx
b .Ldo_neon
.align 4
.Leven_neon:
add $in2,$inp,#32
adr $zeros,.Lzeros
subs $len,$len,#64
csel $in2,$zeros,$in2,lo
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov ${H0},x10
fmov ${H1},x11
fmov ${H2},x12
fmov ${H3},x13
fmov ${H4},x14
.Ldo_neon:
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
ldp x9,x13,[$in2],#48
lsl $padbit,$padbit,#24
add x15,$ctx,#48
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov $IN23_0,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,$padbit,x12,lsr#40
add x13,$padbit,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov $IN23_1,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
fmov $IN23_2,x8
fmov $IN23_3,x10
fmov $IN23_4,x12
ldp x8,x12,[$inp],#16 // inp[0:1]
ldp x9,x13,[$inp],#48
ld1 {$R0,$R1,$S1,$R2},[x15],#64
ld1 {$S2,$R3,$S3,$R4},[x15],#64
ld1 {$S4},[x15]
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov $IN01_0,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,$padbit,x12,lsr#40
add x13,$padbit,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov $IN01_1,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
movi $MASK.2d,#-1
fmov $IN01_2,x8
fmov $IN01_3,x10
fmov $IN01_4,x12
ushr $MASK.2d,$MASK.2d,#38
b.ls .Lskip_loop
.align 4
.Loop_neon:
////////////////////////////////////////////////////////////////
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
// \___________________/
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
// \___________________/ \____________________/
//
// Note that we start with inp[2:3]*r^2. This is because it
// doesn't depend on reduction in previous iteration.
////////////////////////////////////////////////////////////////
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
subs $len,$len,#64
umull $ACC4,$IN23_0,${R4}[2]
csel $in2,$zeros,$in2,lo
umull $ACC3,$IN23_0,${R3}[2]
umull $ACC2,$IN23_0,${R2}[2]
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
umull $ACC1,$IN23_0,${R1}[2]
ldp x9,x13,[$in2],#48
umull $ACC0,$IN23_0,${R0}[2]
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
umlal $ACC4,$IN23_1,${R3}[2]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal $ACC3,$IN23_1,${R2}[2]
and x5,x9,#0x03ffffff
umlal $ACC2,$IN23_1,${R1}[2]
ubfx x6,x8,#26,#26
umlal $ACC1,$IN23_1,${R0}[2]
ubfx x7,x9,#26,#26
umlal $ACC0,$IN23_1,${S4}[2]
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal $ACC4,$IN23_2,${R2}[2]
extr x8,x12,x8,#52
umlal $ACC3,$IN23_2,${R1}[2]
extr x9,x13,x9,#52
umlal $ACC2,$IN23_2,${R0}[2]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal $ACC1,$IN23_2,${S4}[2]
fmov $IN23_0,x4
umlal $ACC0,$IN23_2,${S3}[2]
and x8,x8,#0x03ffffff
umlal $ACC4,$IN23_3,${R1}[2]
and x9,x9,#0x03ffffff
umlal $ACC3,$IN23_3,${R0}[2]
ubfx x10,x12,#14,#26
umlal $ACC2,$IN23_3,${S4}[2]
ubfx x11,x13,#14,#26
umlal $ACC1,$IN23_3,${S3}[2]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal $ACC0,$IN23_3,${S2}[2]
fmov $IN23_1,x6
add $IN01_2,$IN01_2,$H2
add x12,$padbit,x12,lsr#40
umlal $ACC4,$IN23_4,${R0}[2]
add x13,$padbit,x13,lsr#40
umlal $ACC3,$IN23_4,${S4}[2]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal $ACC2,$IN23_4,${S3}[2]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal $ACC1,$IN23_4,${S2}[2]
fmov $IN23_2,x8
umlal $ACC0,$IN23_4,${S1}[2]
fmov $IN23_3,x10
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4 and accumulate
add $IN01_0,$IN01_0,$H0
fmov $IN23_4,x12
umlal $ACC3,$IN01_2,${R1}[0]
ldp x8,x12,[$inp],#16 // inp[0:1]
umlal $ACC0,$IN01_2,${S3}[0]
ldp x9,x13,[$inp],#48
umlal $ACC4,$IN01_2,${R2}[0]
umlal $ACC1,$IN01_2,${S4}[0]
umlal $ACC2,$IN01_2,${R0}[0]
#ifdef __AARCH64EB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
add $IN01_1,$IN01_1,$H1
umlal $ACC3,$IN01_0,${R3}[0]
umlal $ACC4,$IN01_0,${R4}[0]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal $ACC2,$IN01_0,${R2}[0]
and x5,x9,#0x03ffffff
umlal $ACC0,$IN01_0,${R0}[0]
ubfx x6,x8,#26,#26
umlal $ACC1,$IN01_0,${R1}[0]
ubfx x7,x9,#26,#26
add $IN01_3,$IN01_3,$H3
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal $ACC3,$IN01_1,${R2}[0]
extr x8,x12,x8,#52
umlal $ACC4,$IN01_1,${R3}[0]
extr x9,x13,x9,#52
umlal $ACC0,$IN01_1,${S4}[0]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal $ACC2,$IN01_1,${R1}[0]
fmov $IN01_0,x4
umlal $ACC1,$IN01_1,${R0}[0]
and x8,x8,#0x03ffffff
add $IN01_4,$IN01_4,$H4
and x9,x9,#0x03ffffff
umlal $ACC3,$IN01_3,${R0}[0]
ubfx x10,x12,#14,#26
umlal $ACC0,$IN01_3,${S2}[0]
ubfx x11,x13,#14,#26
umlal $ACC4,$IN01_3,${R1}[0]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal $ACC1,$IN01_3,${S3}[0]
fmov $IN01_1,x6
umlal $ACC2,$IN01_3,${S4}[0]
add x12,$padbit,x12,lsr#40
umlal $ACC3,$IN01_4,${S4}[0]
add x13,$padbit,x13,lsr#40
umlal $ACC0,$IN01_4,${S1}[0]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal $ACC4,$IN01_4,${R0}[0]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal $ACC1,$IN01_4,${S2}[0]
fmov $IN01_2,x8
umlal $ACC2,$IN01_4,${S3}[0]
fmov $IN01_3,x10
fmov $IN01_4,x12
/////////////////////////////////////////////////////////////////
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
// and P. Schwabe
//
// [see discussion in poly1305-armv4 module]
ushr $T0.2d,$ACC3,#26
xtn $H3,$ACC3
ushr $T1.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
add $ACC4,$ACC4,$T0.2d // h3 -> h4
bic $H3,#0xfc,lsl#24 // &=0x03ffffff
add $ACC1,$ACC1,$T1.2d // h0 -> h1
ushr $T0.2d,$ACC4,#26
xtn $H4,$ACC4
ushr $T1.2d,$ACC1,#26
xtn $H1,$ACC1
bic $H4,#0xfc,lsl#24
add $ACC2,$ACC2,$T1.2d // h1 -> h2
add $ACC0,$ACC0,$T0.2d
shl $T0.2d,$T0.2d,#2
shrn $T1.2s,$ACC2,#26
xtn $H2,$ACC2
add $ACC0,$ACC0,$T0.2d // h4 -> h0
bic $H1,#0xfc,lsl#24
add $H3,$H3,$T1.2s // h2 -> h3
bic $H2,#0xfc,lsl#24
shrn $T0.2s,$ACC0,#26
xtn $H0,$ACC0
ushr $T1.2s,$H3,#26
bic $H3,#0xfc,lsl#24
bic $H0,#0xfc,lsl#24
add $H1,$H1,$T0.2s // h0 -> h1
add $H4,$H4,$T1.2s // h3 -> h4
b.hi .Loop_neon
.Lskip_loop:
dup $IN23_2,${IN23_2}[0]
add $IN01_2,$IN01_2,$H2
////////////////////////////////////////////////////////////////
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
adds $len,$len,#32
b.ne .Long_tail
dup $IN23_2,${IN01_2}[0]
add $IN23_0,$IN01_0,$H0
add $IN23_3,$IN01_3,$H3
add $IN23_1,$IN01_1,$H1
add $IN23_4,$IN01_4,$H4
.Long_tail:
dup $IN23_0,${IN23_0}[0]
umull2 $ACC0,$IN23_2,${S3}
umull2 $ACC3,$IN23_2,${R1}
umull2 $ACC4,$IN23_2,${R2}
umull2 $ACC2,$IN23_2,${R0}
umull2 $ACC1,$IN23_2,${S4}
dup $IN23_1,${IN23_1}[0]
umlal2 $ACC0,$IN23_0,${R0}
umlal2 $ACC2,$IN23_0,${R2}
umlal2 $ACC3,$IN23_0,${R3}
umlal2 $ACC4,$IN23_0,${R4}
umlal2 $ACC1,$IN23_0,${R1}
dup $IN23_3,${IN23_3}[0]
umlal2 $ACC0,$IN23_1,${S4}
umlal2 $ACC3,$IN23_1,${R2}
umlal2 $ACC2,$IN23_1,${R1}
umlal2 $ACC4,$IN23_1,${R3}
umlal2 $ACC1,$IN23_1,${R0}
dup $IN23_4,${IN23_4}[0]
umlal2 $ACC3,$IN23_3,${R0}
umlal2 $ACC4,$IN23_3,${R1}
umlal2 $ACC0,$IN23_3,${S2}
umlal2 $ACC1,$IN23_3,${S3}
umlal2 $ACC2,$IN23_3,${S4}
umlal2 $ACC3,$IN23_4,${S4}
umlal2 $ACC0,$IN23_4,${S1}
umlal2 $ACC4,$IN23_4,${R0}
umlal2 $ACC1,$IN23_4,${S2}
umlal2 $ACC2,$IN23_4,${S3}
b.eq .Lshort_tail
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4:r^3 and accumulate
add $IN01_0,$IN01_0,$H0
umlal $ACC3,$IN01_2,${R1}
umlal $ACC0,$IN01_2,${S3}
umlal $ACC4,$IN01_2,${R2}
umlal $ACC1,$IN01_2,${S4}
umlal $ACC2,$IN01_2,${R0}
add $IN01_1,$IN01_1,$H1
umlal $ACC3,$IN01_0,${R3}
umlal $ACC0,$IN01_0,${R0}
umlal $ACC4,$IN01_0,${R4}
umlal $ACC1,$IN01_0,${R1}
umlal $ACC2,$IN01_0,${R2}
add $IN01_3,$IN01_3,$H3
umlal $ACC3,$IN01_1,${R2}
umlal $ACC0,$IN01_1,${S4}
umlal $ACC4,$IN01_1,${R3}
umlal $ACC1,$IN01_1,${R0}
umlal $ACC2,$IN01_1,${R1}
add $IN01_4,$IN01_4,$H4
umlal $ACC3,$IN01_3,${R0}
umlal $ACC0,$IN01_3,${S2}
umlal $ACC4,$IN01_3,${R1}
umlal $ACC1,$IN01_3,${S3}
umlal $ACC2,$IN01_3,${S4}
umlal $ACC3,$IN01_4,${S4}
umlal $ACC0,$IN01_4,${S1}
umlal $ACC4,$IN01_4,${R0}
umlal $ACC1,$IN01_4,${S2}
umlal $ACC2,$IN01_4,${S3}
.Lshort_tail:
////////////////////////////////////////////////////////////////
// horizontal add
addp $ACC3,$ACC3,$ACC3
ldp d8,d9,[sp,#16] // meet ABI requirements
addp $ACC0,$ACC0,$ACC0
ldp d10,d11,[sp,#32]
addp $ACC4,$ACC4,$ACC4
ldp d12,d13,[sp,#48]
addp $ACC1,$ACC1,$ACC1
ldp d14,d15,[sp,#64]
addp $ACC2,$ACC2,$ACC2
////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
ushr $T0.2d,$ACC3,#26
and $ACC3,$ACC3,$MASK.2d
ushr $T1.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
add $ACC4,$ACC4,$T0.2d // h3 -> h4
add $ACC1,$ACC1,$T1.2d // h0 -> h1
ushr $T0.2d,$ACC4,#26
and $ACC4,$ACC4,$MASK.2d
ushr $T1.2d,$ACC1,#26
and $ACC1,$ACC1,$MASK.2d
add $ACC2,$ACC2,$T1.2d // h1 -> h2
add $ACC0,$ACC0,$T0.2d
shl $T0.2d,$T0.2d,#2
ushr $T1.2d,$ACC2,#26
and $ACC2,$ACC2,$MASK.2d
add $ACC0,$ACC0,$T0.2d // h4 -> h0
add $ACC3,$ACC3,$T1.2d // h2 -> h3
ushr $T0.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
ushr $T1.2d,$ACC3,#26
and $ACC3,$ACC3,$MASK.2d
add $ACC1,$ACC1,$T0.2d // h0 -> h1
add $ACC4,$ACC4,$T1.2d // h3 -> h4
////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
st1 {$ACC4}[0],[$ctx]
.Lno_data_neon:
ldr x29,[sp],#80
ret
.size poly1305_blocks_neon,.-poly1305_blocks_neon
.type poly1305_emit_neon,%function
.align 5
poly1305_emit_neon:
ldr $is_base2_26,[$ctx,#24]
cbz $is_base2_26,poly1305_emit
ldp w10,w11,[$ctx] // load hash value base 2^26
ldp w12,w13,[$ctx,#8]
ldr w14,[$ctx,#16]
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr $h1,x12,#12
adds $h0,$h0,x12,lsl#52
add $h1,$h1,x13,lsl#14
adc $h1,$h1,xzr
lsr $h2,x14,#24
adds $h1,$h1,x14,lsl#40
adc $h2,$h2,xzr // can be partially reduced...
ldp $t0,$t1,[$nonce] // load nonce
and $d0,$h2,#-4 // ... so reduce
add $d0,$d0,$h2,lsr#2
and $h2,$h2,#3
adds $h0,$h0,$d0
adcs $h1,$h1,xzr
adc $h2,$h2,xzr
adds $d0,$h0,#5 // compare to modulus
adcs $d1,$h1,xzr
adc $d2,$h2,xzr
tst $d2,#-4 // see if it's carried/borrowed
csel $h0,$h0,$d0,eq
csel $h1,$h1,$d1,eq
#ifdef __AARCH64EB__
ror $t0,$t0,#32 // flip nonce words
ror $t1,$t1,#32
#endif
adds $h0,$h0,$t0 // accumulate nonce
adc $h1,$h1,$t1
#ifdef __AARCH64EB__
rev $h0,$h0 // flip output bytes
rev $h1,$h1
#endif
stp $h0,$h1,[$mac] // write result
ret
.size poly1305_emit_neon,.-poly1305_emit_neon
#endif
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
#ifndef __KERNEL__
.LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-.
#endif
#endif
.align 2
___
open SELF,$0;
while(<SELF>) {
next if (/^#!/);
last if (!s/^#/\/\// and !/^$/);
print;
}
close SELF;
foreach (split("\n",$code)) {
s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
s/\.[124]([sd])\[/.$1\[/;
print $_,"\n";
}
close STDOUT;

View File

@ -0,0 +1,205 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is based in part on Andrew Moon's poly1305-donna, which is in the
* public domain.
*/
struct poly1305_internal {
u32 h[5];
u32 r[5];
u32 s[4];
};
static void poly1305_init_generic(void *ctx, const u8 key[16])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
/* s = 5*r */
st->s[0] = st->r[1] * 5;
st->s[1] = st->r[2] * 5;
st->s[2] = st->r[3] * 5;
st->s[3] = st->r[4] * 5;
/* h = 0 */
st->h[0] = 0;
st->h[1] = 0;
st->h[2] = 0;
st->h[3] = 0;
st->h[4] = 0;
}
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
const u32 padbit)
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
const u32 hibit = padbit << 24;
u32 r0, r1, r2, r3, r4;
u32 s1, s2, s3, s4;
u32 h0, h1, h2, h3, h4;
u64 d0, d1, d2, d3, d4;
u32 c;
r0 = st->r[0];
r1 = st->r[1];
r2 = st->r[2];
r3 = st->r[3];
r4 = st->r[4];
s1 = st->s[0];
s2 = st->s[1];
s3 = st->s[2];
s4 = st->s[3];
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
h3 = st->h[3];
h4 = st->h[4];
while (len >= POLY1305_BLOCK_SIZE) {
/* h += m[i] */
h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
/* h *= r */
d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
((u64)h2 * s3) + ((u64)h3 * s2) +
((u64)h4 * s1);
d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
((u64)h2 * s4) + ((u64)h3 * s3) +
((u64)h4 * s2);
d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
((u64)h2 * r0) + ((u64)h3 * s4) +
((u64)h4 * s3);
d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
((u64)h2 * r1) + ((u64)h3 * r0) +
((u64)h4 * s4);
d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
((u64)h2 * r2) + ((u64)h3 * r1) +
((u64)h4 * r0);
/* (partial) h %= p */
c = (u32)(d0 >> 26);
h0 = (u32)d0 & 0x3ffffff;
d1 += c;
c = (u32)(d1 >> 26);
h1 = (u32)d1 & 0x3ffffff;
d2 += c;
c = (u32)(d2 >> 26);
h2 = (u32)d2 & 0x3ffffff;
d3 += c;
c = (u32)(d3 >> 26);
h3 = (u32)d3 & 0x3ffffff;
d4 += c;
c = (u32)(d4 >> 26);
h4 = (u32)d4 & 0x3ffffff;
h0 += c * 5;
c = (h0 >> 26);
h0 = h0 & 0x3ffffff;
h1 += c;
input += POLY1305_BLOCK_SIZE;
len -= POLY1305_BLOCK_SIZE;
}
st->h[0] = h0;
st->h[1] = h1;
st->h[2] = h2;
st->h[3] = h3;
st->h[4] = h4;
}
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
u32 h0, h1, h2, h3, h4, c;
u32 g0, g1, g2, g3, g4;
u64 f;
u32 mask;
/* fully carry h */
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
h3 = st->h[3];
h4 = st->h[4];
c = h1 >> 26;
h1 = h1 & 0x3ffffff;
h2 += c;
c = h2 >> 26;
h2 = h2 & 0x3ffffff;
h3 += c;
c = h3 >> 26;
h3 = h3 & 0x3ffffff;
h4 += c;
c = h4 >> 26;
h4 = h4 & 0x3ffffff;
h0 += c * 5;
c = h0 >> 26;
h0 = h0 & 0x3ffffff;
h1 += c;
/* compute h + -p */
g0 = h0 + 5;
c = g0 >> 26;
g0 &= 0x3ffffff;
g1 = h1 + c;
c = g1 >> 26;
g1 &= 0x3ffffff;
g2 = h2 + c;
c = g2 >> 26;
g2 &= 0x3ffffff;
g3 = h3 + c;
c = g3 >> 26;
g3 &= 0x3ffffff;
g4 = h4 + c - (1UL << 26);
/* select h if h < p, or h + -p if h >= p */
mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
g0 &= mask;
g1 &= mask;
g2 &= mask;
g3 &= mask;
g4 &= mask;
mask = ~mask;
h0 = (h0 & mask) | g0;
h1 = (h1 & mask) | g1;
h2 = (h2 & mask) | g2;
h3 = (h3 & mask) | g3;
h4 = (h4 & mask) | g4;
/* h = h % (2^128) */
h0 = ((h0) | (h1 << 26)) & 0xffffffff;
h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
/* mac = (h + nonce) % (2^128) */
f = (u64)h0 + nonce[0];
h0 = (u32)f;
f = (u64)h1 + nonce[1] + (f >> 32);
h1 = (u32)f;
f = (u64)h2 + nonce[2] + (f >> 32);
h2 = (u32)f;
f = (u64)h3 + nonce[3] + (f >> 32);
h3 = (u32)f;
put_unaligned_le32(h0, &mac[0]);
put_unaligned_le32(h1, &mac[4]);
put_unaligned_le32(h2, &mac[8]);
put_unaligned_le32(h3, &mac[12]);
}

View File

@ -0,0 +1,182 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This is based in part on Andrew Moon's poly1305-donna, which is in the
* public domain.
*/
typedef __uint128_t u128;
struct poly1305_internal {
u64 r[3];
u64 h[3];
u64 s[2];
};
static void poly1305_init_generic(void *ctx, const u8 key[16])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
u64 t0, t1;
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
t0 = get_unaligned_le64(&key[0]);
t1 = get_unaligned_le64(&key[8]);
st->r[0] = t0 & 0xffc0fffffffULL;
st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
st->r[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
/* s = 20*r */
st->s[0] = st->r[1] * 20;
st->s[1] = st->r[2] * 20;
/* h = 0 */
st->h[0] = 0;
st->h[1] = 0;
st->h[2] = 0;
}
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
const u32 padbit)
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
const u64 hibit = ((u64)padbit) << 40;
u64 r0, r1, r2;
u64 s1, s2;
u64 h0, h1, h2;
u64 c;
u128 d0, d1, d2, d;
r0 = st->r[0];
r1 = st->r[1];
r2 = st->r[2];
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
s1 = st->s[0];
s2 = st->s[1];
while (len >= POLY1305_BLOCK_SIZE) {
u64 t0, t1;
/* h += m[i] */
t0 = get_unaligned_le64(&input[0]);
t1 = get_unaligned_le64(&input[8]);
h0 += t0 & 0xfffffffffffULL;
h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit;
/* h *= r */
d0 = (u128)h0 * r0;
d = (u128)h1 * s2;
d0 += d;
d = (u128)h2 * s1;
d0 += d;
d1 = (u128)h0 * r1;
d = (u128)h1 * r0;
d1 += d;
d = (u128)h2 * s2;
d1 += d;
d2 = (u128)h0 * r2;
d = (u128)h1 * r1;
d2 += d;
d = (u128)h2 * r0;
d2 += d;
/* (partial) h %= p */
c = (u64)(d0 >> 44);
h0 = (u64)d0 & 0xfffffffffffULL;
d1 += c;
c = (u64)(d1 >> 44);
h1 = (u64)d1 & 0xfffffffffffULL;
d2 += c;
c = (u64)(d2 >> 42);
h2 = (u64)d2 & 0x3ffffffffffULL;
h0 += c * 5;
c = h0 >> 44;
h0 = h0 & 0xfffffffffffULL;
h1 += c;
input += POLY1305_BLOCK_SIZE;
len -= POLY1305_BLOCK_SIZE;
}
st->h[0] = h0;
st->h[1] = h1;
st->h[2] = h2;
}
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
{
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
u64 h0, h1, h2, c;
u64 g0, g1, g2;
u64 t0, t1;
/* fully carry h */
h0 = st->h[0];
h1 = st->h[1];
h2 = st->h[2];
c = h1 >> 44;
h1 &= 0xfffffffffffULL;
h2 += c;
c = h2 >> 42;
h2 &= 0x3ffffffffffULL;
h0 += c * 5;
c = h0 >> 44;
h0 &= 0xfffffffffffULL;
h1 += c;
c = h1 >> 44;
h1 &= 0xfffffffffffULL;
h2 += c;
c = h2 >> 42;
h2 &= 0x3ffffffffffULL;
h0 += c * 5;
c = h0 >> 44;
h0 &= 0xfffffffffffULL;
h1 += c;
/* compute h + -p */
g0 = h0 + 5;
c = g0 >> 44;
g0 &= 0xfffffffffffULL;
g1 = h1 + c;
c = g1 >> 44;
g1 &= 0xfffffffffffULL;
g2 = h2 + c - (1ULL << 42);
/* select h if h < p, or h + -p if h >= p */
c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
g0 &= c;
g1 &= c;
g2 &= c;
c = ~c;
h0 = (h0 & c) | g0;
h1 = (h1 & c) | g1;
h2 = (h2 & c) | g2;
/* h = (h + nonce) */
t0 = ((u64)nonce[1] << 32) | nonce[0];
t1 = ((u64)nonce[3] << 32) | nonce[2];
h0 += t0 & 0xfffffffffffULL;
c = h0 >> 44;
h0 &= 0xfffffffffffULL;
h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
c = h1 >> 44;
h1 &= 0xfffffffffffULL;
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
h2 &= 0x3ffffffffffULL;
/* mac = h % (2^128) */
h0 = h0 | (h1 << 44);
h1 = (h1 >> 20) | (h2 << 24);
put_unaligned_le64(h0, &mac[0]);
put_unaligned_le64(h1, &mac[8]);
}

View File

@ -0,0 +1,37 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
static bool *const poly1305_nobs[] __initconst = { };
static void __init poly1305_fpu_init(void)
{
}
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_mips(ctx, key);
return true;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
poly1305_blocks_mips(ctx, inp, len, padbit);
return true;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
poly1305_emit_mips(ctx, mac, nonce);
return true;
}

View File

@ -0,0 +1,407 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved.
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
#define LSB 3
#else
#define MSB 3
#define LSB 0
#endif
#define POLY1305_BLOCK_SIZE 16
.text
#define H0 $t0
#define H1 $t1
#define H2 $t2
#define H3 $t3
#define H4 $t4
#define R0 $t5
#define R1 $t6
#define R2 $t7
#define R3 $t8
#define O0 $s0
#define O1 $s4
#define O2 $v1
#define O3 $t9
#define O4 $s5
#define S1 $s1
#define S2 $s2
#define S3 $s3
#define SC $at
#define CA $v0
/* Input arguments */
#define poly $a0
#define src $a1
#define srclen $a2
#define hibit $a3
/* Location in the opaque buffer
* R[0..3], CA, H[0..4]
*/
#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0)
#define PTR_POLY1305_CA (16 ) ## ($a0)
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)
#define POLY1305_BLOCK_SIZE 16
#define POLY1305_STACK_SIZE 32
.set noat
.align 4
.globl poly1305_blocks_mips
.ent poly1305_blocks_mips
poly1305_blocks_mips:
.frame $sp, POLY1305_STACK_SIZE, $ra
/* srclen &= 0xFFFFFFF0 */
ins srclen, $zero, 0, 4
addiu $sp, -(POLY1305_STACK_SIZE)
/* check srclen >= 16 bytes */
beqz srclen, .Lpoly1305_blocks_mips_end
/* Calculate last round based on src address pointer.
* last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
*/
addu srclen, src
lw R0, PTR_POLY1305_R(0)
lw R1, PTR_POLY1305_R(1)
lw R2, PTR_POLY1305_R(2)
lw R3, PTR_POLY1305_R(3)
/* store the used save registers. */
sw $s0, 0($sp)
sw $s1, 4($sp)
sw $s2, 8($sp)
sw $s3, 12($sp)
sw $s4, 16($sp)
sw $s5, 20($sp)
/* load Hx and Carry */
lw CA, PTR_POLY1305_CA
lw H0, PTR_POLY1305_H(0)
lw H1, PTR_POLY1305_H(1)
lw H2, PTR_POLY1305_H(2)
lw H3, PTR_POLY1305_H(3)
lw H4, PTR_POLY1305_H(4)
/* Sx = Rx + (Rx >> 2) */
srl S1, R1, 2
srl S2, R2, 2
srl S3, R3, 2
addu S1, R1
addu S2, R2
addu S3, R3
addiu SC, $zero, 1
.Lpoly1305_loop:
lwl O0, 0+MSB(src)
lwl O1, 4+MSB(src)
lwl O2, 8+MSB(src)
lwl O3,12+MSB(src)
lwr O0, 0+LSB(src)
lwr O1, 4+LSB(src)
lwr O2, 8+LSB(src)
lwr O3,12+LSB(src)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
wsbh O0
wsbh O1
wsbh O2
wsbh O3
rotr O0, 16
rotr O1, 16
rotr O2, 16
rotr O3, 16
#endif
/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */
addu H0, CA
sltu CA, H0, CA
addu O0, H0
sltu H0, O0, H0
addu CA, H0
/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */
addu H1, CA
sltu CA, H1, CA
addu O1, H1
sltu H1, O1, H1
addu CA, H1
/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */
addu H2, CA
sltu CA, H2, CA
addu O2, H2
sltu H2, O2, H2
addu CA, H2
/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */
addu H3, CA
sltu CA, H3, CA
addu O3, H3
sltu H3, O3, H3
addu CA, H3
/* h4 += (u32)(d3 >> 32) + padbit; */
addu H4, hibit
addu O4, H4, CA
/* D0 */
multu O0, R0
maddu O1, S3
maddu O2, S2
maddu O3, S1
mfhi CA
mflo H0
/* D1 */
multu O0, R1
maddu O1, R0
maddu O2, S3
maddu O3, S2
maddu O4, S1
maddu CA, SC
mfhi CA
mflo H1
/* D2 */
multu O0, R2
maddu O1, R1
maddu O2, R0
maddu O3, S3
maddu O4, S2
maddu CA, SC
mfhi CA
mflo H2
/* D4 */
mul H4, O4, R0
/* D3 */
multu O0, R3
maddu O1, R2
maddu O2, R1
maddu O3, R0
maddu O4, S3
maddu CA, SC
mfhi CA
mflo H3
addiu src, POLY1305_BLOCK_SIZE
/* h4 += (u32)(d3 >> 32); */
addu O4, H4, CA
/* h4 &= 3 */
andi H4, O4, 3
/* c = (h4 >> 2) + (h4 & ~3U); */
srl CA, O4, 2
ins O4, $zero, 0, 2
addu CA, O4
/* able to do a 16 byte block. */
bne src, srclen, .Lpoly1305_loop
/* restore the used save registers. */
lw $s0, 0($sp)
lw $s1, 4($sp)
lw $s2, 8($sp)
lw $s3, 12($sp)
lw $s4, 16($sp)
lw $s5, 20($sp)
/* store Hx and Carry */
sw CA, PTR_POLY1305_CA
sw H0, PTR_POLY1305_H(0)
sw H1, PTR_POLY1305_H(1)
sw H2, PTR_POLY1305_H(2)
sw H3, PTR_POLY1305_H(3)
sw H4, PTR_POLY1305_H(4)
.Lpoly1305_blocks_mips_end:
addiu $sp, POLY1305_STACK_SIZE
/* Jump Back */
jr $ra
.end poly1305_blocks_mips
.set at
/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
#define MAC $a1
#define NONCE $a2
#define G0 $t5
#define G1 $t6
#define G2 $t7
#define G3 $t8
#define G4 $t9
.set noat
.align 4
.globl poly1305_emit_mips
.ent poly1305_emit_mips
poly1305_emit_mips:
/* load Hx and Carry */
lw CA, PTR_POLY1305_CA
lw H0, PTR_POLY1305_H(0)
lw H1, PTR_POLY1305_H(1)
lw H2, PTR_POLY1305_H(2)
lw H3, PTR_POLY1305_H(3)
lw H4, PTR_POLY1305_H(4)
/* Add left over carry */
addu H0, CA
sltu CA, H0, CA
addu H1, CA
sltu CA, H1, CA
addu H2, CA
sltu CA, H2, CA
addu H3, CA
sltu CA, H3, CA
addu H4, CA
/* compare to modulus by computing h + -p */
addiu G0, H0, 5
sltu CA, G0, H0
addu G1, H1, CA
sltu CA, G1, H1
addu G2, H2, CA
sltu CA, G2, H2
addu G3, H3, CA
sltu CA, G3, H3
addu G4, H4, CA
srl SC, G4, 2
/* if there was carry into 131st bit, h3:h0 = g3:g0 */
movn H0, G0, SC
movn H1, G1, SC
movn H2, G2, SC
movn H3, G3, SC
lwl G0, 0+MSB(NONCE)
lwl G1, 4+MSB(NONCE)
lwl G2, 8+MSB(NONCE)
lwl G3,12+MSB(NONCE)
lwr G0, 0+LSB(NONCE)
lwr G1, 4+LSB(NONCE)
lwr G2, 8+LSB(NONCE)
lwr G3,12+LSB(NONCE)
/* mac = (h + nonce) % (2^128) */
addu H0, G0
sltu CA, H0, G0
/* H1 */
addu H1, CA
sltu CA, H1, CA
addu H1, G1
sltu G1, H1, G1
addu CA, G1
/* H2 */
addu H2, CA
sltu CA, H2, CA
addu H2, G2
sltu G2, H2, G2
addu CA, G2
/* H3 */
addu H3, CA
addu H3, G3
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
wsbh H0
wsbh H1
wsbh H2
wsbh H3
rotr H0, 16
rotr H1, 16
rotr H2, 16
rotr H3, 16
#endif
/* store MAC */
swl H0, 0+MSB(MAC)
swl H1, 4+MSB(MAC)
swl H2, 8+MSB(MAC)
swl H3,12+MSB(MAC)
swr H0, 0+LSB(MAC)
swr H1, 4+LSB(MAC)
swr H2, 8+LSB(MAC)
swr H3,12+LSB(MAC)
jr $ra
.end poly1305_emit_mips
#define PR0 $t0
#define PR1 $t1
#define PR2 $t2
#define PR3 $t3
#define PT0 $t4
/* Input arguments CTX=$a0, KEY=$a1 */
.align 4
.globl poly1305_init_mips
.ent poly1305_init_mips
poly1305_init_mips:
lwl PR0, 0+MSB($a1)
lwl PR1, 4+MSB($a1)
lwl PR2, 8+MSB($a1)
lwl PR3,12+MSB($a1)
lwr PR0, 0+LSB($a1)
lwr PR1, 4+LSB($a1)
lwr PR2, 8+LSB($a1)
lwr PR3,12+LSB($a1)
/* store Hx and Carry */
sw $zero, PTR_POLY1305_CA
sw $zero, PTR_POLY1305_H(0)
sw $zero, PTR_POLY1305_H(1)
sw $zero, PTR_POLY1305_H(2)
sw $zero, PTR_POLY1305_H(3)
sw $zero, PTR_POLY1305_H(4)
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
wsbh PR0
wsbh PR1
wsbh PR2
wsbh PR3
rotr PR0, 16
rotr PR1, 16
rotr PR2, 16
rotr PR3, 16
#endif
lui PT0, 0x0FFF
ori PT0, 0xFFFC
/* AND 0x0fffffff; */
ext PR0, PR0, 0, (32-4)
/* AND 0x0ffffffc; */
and PR1, PT0
and PR2, PT0
and PR3, PT0
/* store Rx */
sw PR0, PTR_POLY1305_R(0)
sw PR1, PTR_POLY1305_R(1)
sw PR2, PTR_POLY1305_R(2)
sw PR3, PTR_POLY1305_R(3)
/* Jump Back */
jr $ra
.end poly1305_init_mips

View File

@ -0,0 +1,467 @@
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
#
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
# has relicensed it under the licenses specified in the SPDX header above.
# The original headers, including the original license headers, are
# included below for completeness.
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# Poly1305 hash for MIPS64.
#
# May 2016
#
# Numbers are cycles per processed byte with poly1305_blocks alone.
#
# IALU/gcc
# R1x000 5.64/+120% (big-endian)
# Octeon II 3.80/+280% (little-endian)
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
# one picks the latter, it's possible to arrange code in ABI neutral
# manner. Therefore let's stick to NUBI register layout:
#
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
#
# The return value is placed in $a0. Following coding rules facilitate
# interoperability:
#
# - never ever touch $tp, "thread pointer", former $gp [o32 can be
# excluded from the rule, because it's specified volatile];
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
# old code];
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
#
# For reference here is register layout for N32/64 MIPS ABIs:
#
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
# <appro@openssl.org>
#
######################################################################
$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
die "MIPS64 only" unless ($flavour =~ /64|n32/i);
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
$code.=<<___;
#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
defined(_MIPS_ARCH_MIPS64R6)) \\
&& !defined(_MIPS_ARCH_MIPS64R2)
# define _MIPS_ARCH_MIPS64R2
#endif
#if defined(_MIPS_ARCH_MIPS64R6)
# define dmultu(rs,rt)
# define mflo(rd,rs,rt) dmulu rd,rs,rt
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
#else
# define dmultu(rs,rt) dmultu rs,rt
# define mflo(rd,rs,rt) mflo rd
# define mfhi(rd,rs,rt) mfhi rd
#endif
#ifdef __KERNEL__
# define poly1305_init poly1305_init_mips
# define poly1305_blocks poly1305_blocks_mips
# define poly1305_emit poly1305_emit_mips
#endif
#if defined(__MIPSEB__) && !defined(MIPSEB)
# define MIPSEB
#endif
#ifdef MIPSEB
# define MSB 0
# define LSB 7
#else
# define MSB 7
# define LSB 0
#endif
.text
.set noat
.set noreorder
.align 5
.globl poly1305_init
.ent poly1305_init
poly1305_init:
.frame $sp,0,$ra
.set reorder
sd $zero,0($ctx)
sd $zero,8($ctx)
sd $zero,16($ctx)
beqz $inp,.Lno_key
#if defined(_MIPS_ARCH_MIPS64R6)
ld $in0,0($inp)
ld $in1,8($inp)
#else
ldl $in0,0+MSB($inp)
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
#endif
#ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2)
dsbh $in0,$in0 # byte swap
dsbh $in1,$in1
dshd $in0,$in0
dshd $in1,$in1
# else
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF
and $tmp1,$in0,$tmp0 # byte swap
and $tmp3,$in1,$tmp0
dsrl $tmp2,$in0,24
dsrl $tmp4,$in1,24
dsll $tmp1,24
dsll $tmp3,24
and $tmp2,$tmp0
and $tmp4,$tmp0
dsll $tmp0,8 # 0x0000FF000000FF00
or $tmp1,$tmp2
or $tmp3,$tmp4
and $tmp2,$in0,$tmp0
and $tmp4,$in1,$tmp0
dsrl $in0,8
dsrl $in1,8
dsll $tmp2,8
dsll $tmp4,8
and $in0,$tmp0
and $in1,$tmp0
or $tmp1,$tmp2
or $tmp3,$tmp4
or $in0,$tmp1
or $in1,$tmp3
dsrl $tmp1,$in0,32
dsrl $tmp3,$in1,32
dsll $in0,32
dsll $in1,32
or $in0,$tmp1
or $in1,$tmp3
# endif
#endif
li $tmp0,1
dsll $tmp0,32
daddiu $tmp0,-63
dsll $tmp0,28
daddiu $tmp0,-1 # 0ffffffc0fffffff
and $in0,$tmp0
daddiu $tmp0,-3 # 0ffffffc0ffffffc
and $in1,$tmp0
sd $in0,24($ctx)
dsrl $tmp0,$in1,2
sd $in1,32($ctx)
daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
sd $tmp0,40($ctx)
.Lno_key:
li $v0,0 # return 0
jr $ra
.end poly1305_init
___
{
my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
$code.=<<___;
.align 5
.globl poly1305_blocks
.ent poly1305_blocks
poly1305_blocks:
.set noreorder
dsrl $len,4 # number of complete blocks
bnez $len,poly1305_blocks_internal
nop
jr $ra
nop
.end poly1305_blocks
.align 5
.ent poly1305_blocks_internal
poly1305_blocks_internal:
.frame $sp,6*8,$ra
.mask $SAVED_REGS_MASK,-8
.set noreorder
dsubu $sp,6*8
sd $s5,40($sp)
sd $s4,32($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
sd $s3,24($sp)
sd $s2,16($sp)
sd $s1,8($sp)
sd $s0,0($sp)
___
$code.=<<___;
.set reorder
ld $h0,0($ctx) # load hash value
ld $h1,8($ctx)
ld $h2,16($ctx)
ld $r0,24($ctx) # load key
ld $r1,32($ctx)
ld $s1,40($ctx)
.Loop:
#if defined(_MIPS_ARCH_MIPS64R6)
ld $in0,0($inp) # load input
ld $in1,8($inp)
#else
ldl $in0,0+MSB($inp) # load input
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
#endif
daddiu $len,-1
daddiu $inp,16
#ifdef MIPSEB
# if defined(_MIPS_ARCH_MIPS64R2)
dsbh $in0,$in0 # byte swap
dsbh $in1,$in1
dshd $in0,$in0
dshd $in1,$in1
# else
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2 # 0x000000FF000000FF
and $tmp1,$in0,$tmp0 # byte swap
and $tmp3,$in1,$tmp0
dsrl $tmp2,$in0,24
dsrl $tmp4,$in1,24
dsll $tmp1,24
dsll $tmp3,24
and $tmp2,$tmp0
and $tmp4,$tmp0
dsll $tmp0,8 # 0x0000FF000000FF00
or $tmp1,$tmp2
or $tmp3,$tmp4
and $tmp2,$in0,$tmp0
and $tmp4,$in1,$tmp0
dsrl $in0,8
dsrl $in1,8
dsll $tmp2,8
dsll $tmp4,8
and $in0,$tmp0
and $in1,$tmp0
or $tmp1,$tmp2
or $tmp3,$tmp4
or $in0,$tmp1
or $in1,$tmp3
dsrl $tmp1,$in0,32
dsrl $tmp3,$in1,32
dsll $in0,32
dsll $in1,32
or $in0,$tmp1
or $in1,$tmp3
# endif
#endif
daddu $h0,$in0 # accumulate input
daddu $h1,$in1
sltu $tmp0,$h0,$in0
sltu $tmp1,$h1,$in1
daddu $h1,$tmp0
dmultu ($r0,$h0) # h0*r0
daddu $h2,$padbit
sltu $tmp0,$h1,$tmp0
mflo ($d0,$r0,$h0)
mfhi ($d1,$r0,$h0)
dmultu ($s1,$h1) # h1*5*r1
daddu $tmp0,$tmp1
daddu $h2,$tmp0
mflo ($tmp0,$s1,$h1)
mfhi ($tmp1,$s1,$h1)
dmultu ($r1,$h0) # h0*r1
daddu $d0,$tmp0
daddu $d1,$tmp1
mflo ($tmp2,$r1,$h0)
mfhi ($d2,$r1,$h0)
sltu $tmp0,$d0,$tmp0
daddu $d1,$tmp0
dmultu ($r0,$h1) # h1*r0
daddu $d1,$tmp2
sltu $tmp2,$d1,$tmp2
mflo ($tmp0,$r0,$h1)
mfhi ($tmp1,$r0,$h1)
daddu $d2,$tmp2
dmultu ($s1,$h2) # h2*5*r1
daddu $d1,$tmp0
daddu $d2,$tmp1
mflo ($tmp2,$s1,$h2)
dmultu ($r0,$h2) # h2*r0
sltu $tmp0,$d1,$tmp0
daddu $d2,$tmp0
mflo ($tmp3,$r0,$h2)
daddu $d1,$tmp2
daddu $d2,$tmp3
sltu $tmp2,$d1,$tmp2
daddu $d2,$tmp2
li $tmp0,-4 # final reduction
and $tmp0,$d2
dsrl $tmp1,$d2,2
andi $h2,$d2,3
daddu $tmp0,$tmp1
daddu $h0,$d0,$tmp0
sltu $tmp0,$h0,$tmp0
daddu $h1,$d1,$tmp0
sltu $tmp0,$h1,$tmp0
daddu $h2,$h2,$tmp0
bnez $len,.Loop
sd $h0,0($ctx) # store hash value
sd $h1,8($ctx)
sd $h2,16($ctx)
.set noreorder
ld $s5,40($sp) # epilogue
ld $s4,32($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
ld $s3,24($sp)
ld $s2,16($sp)
ld $s1,8($sp)
ld $s0,0($sp)
___
$code.=<<___;
jr $ra
daddu $sp,6*8
.end poly1305_blocks_internal
___
}
{
my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
$code.=<<___;
.align 5
.globl poly1305_emit
.ent poly1305_emit
poly1305_emit:
.frame $sp,0,$ra
.set reorder
ld $tmp0,0($ctx)
ld $tmp1,8($ctx)
ld $tmp2,16($ctx)
daddiu $in0,$tmp0,5 # compare to modulus
sltiu $tmp3,$in0,5
daddu $in1,$tmp1,$tmp3
sltu $tmp3,$in1,$tmp3
daddu $tmp2,$tmp2,$tmp3
dsrl $tmp2,2 # see if it carried/borrowed
dsubu $tmp2,$zero,$tmp2
nor $tmp3,$zero,$tmp2
and $in0,$tmp2
and $tmp0,$tmp3
and $in1,$tmp2
and $tmp1,$tmp3
or $in0,$tmp0
or $in1,$tmp1
lwu $tmp0,0($nonce) # load nonce
lwu $tmp1,4($nonce)
lwu $tmp2,8($nonce)
lwu $tmp3,12($nonce)
dsll $tmp1,32
dsll $tmp3,32
or $tmp0,$tmp1
or $tmp2,$tmp3
daddu $in0,$tmp0 # accumulate nonce
daddu $in1,$tmp2
sltu $tmp0,$in0,$tmp0
daddu $in1,$tmp0
dsrl $tmp0,$in0,8 # write mac value
dsrl $tmp1,$in0,16
dsrl $tmp2,$in0,24
sb $in0,0($mac)
dsrl $tmp3,$in0,32
sb $tmp0,1($mac)
dsrl $tmp0,$in0,40
sb $tmp1,2($mac)
dsrl $tmp1,$in0,48
sb $tmp2,3($mac)
dsrl $tmp2,$in0,56
sb $tmp3,4($mac)
dsrl $tmp3,$in1,8
sb $tmp0,5($mac)
dsrl $tmp0,$in1,16
sb $tmp1,6($mac)
dsrl $tmp1,$in1,24
sb $tmp2,7($mac)
sb $in1,8($mac)
dsrl $tmp2,$in1,32
sb $tmp3,9($mac)
dsrl $tmp3,$in1,40
sb $tmp0,10($mac)
dsrl $tmp0,$in1,48
sb $tmp1,11($mac)
dsrl $tmp1,$in1,56
sb $tmp2,12($mac)
sb $tmp3,13($mac)
sb $tmp0,14($mac)
sb $tmp1,15($mac)
jr $ra
.end poly1305_emit
.rdata
.align 2
___
}
open SELF,$0;
while(<SELF>) {
next if (/^#!/);
last if (!s/^#/\/\// and !/^$/);
print;
}
close SELF;
$output=pop and open STDOUT,">$output";
print $code;
close STDOUT;

View File

@ -0,0 +1,156 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/intel-family.h>
asmlinkage void poly1305_init_x86_64(void *ctx,
const u8 key[POLY1305_KEY_SIZE]);
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
const size_t len, const u32 padbit);
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4]);
asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4]);
asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
const u32 padbit);
asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
const size_t len, const u32 padbit);
static bool poly1305_use_avx __ro_after_init;
static bool poly1305_use_avx2 __ro_after_init;
static bool poly1305_use_avx512 __ro_after_init;
static bool *const poly1305_nobs[] __initconst = {
&poly1305_use_avx, &poly1305_use_avx2, &poly1305_use_avx512 };
static void __init poly1305_fpu_init(void)
{
poly1305_use_avx =
boot_cpu_has(X86_FEATURE_AVX) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
poly1305_use_avx2 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifndef COMPAT_CANNOT_USE_AVX512
poly1305_use_avx512 =
boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX512F) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
XFEATURE_MASK_AVX512, NULL) &&
/* Skylake downclocks unacceptably much when using zmm. */
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
#endif
}
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_x86_64(ctx, key);
return true;
}
struct poly1305_arch_internal {
union {
struct {
u32 h[5];
u32 is_base2_26;
};
u64 hs[3];
};
u64 r[2];
u64 pad;
struct { u32 r2, r1, r4, r3; } rn[9];
};
/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
* the unfortunate situation of using AVX and then having to go back to scalar
* -- because the user is silly and has called the update function from two
* separate contexts -- then we need to convert back to the original base before
* proceeding. It is possible to reason that the initial reduction below is
* sufficient given the implementation invariants. However, for an avoidance of
* doubt and because this is not performance critical, we do the full reduction
* anyway.
*/
static void convert_to_base2_64(void *ctx)
{
struct poly1305_arch_internal *state = ctx;
u32 cy;
if (!state->is_base2_26)
return;
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
state->hs[2] = state->h[4] >> 24;
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
state->hs[2] &= 3;
state->hs[0] += cy;
state->hs[1] += (cy = ULT(state->hs[0], cy));
state->hs[2] += ULT(state->hs[1], cy);
#undef ULT
state->is_base2_26 = 0;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
struct poly1305_arch_internal *state = ctx;
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
!simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_blocks_x86_64(ctx, inp, len, padbit);
return true;
}
for (;;) {
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
if (IS_ENABLED(CONFIG_AS_AVX512) && poly1305_use_avx512)
poly1305_blocks_avx512(ctx, inp, bytes, padbit);
else if (IS_ENABLED(CONFIG_AS_AVX2) && poly1305_use_avx2)
poly1305_blocks_avx2(ctx, inp, bytes, padbit);
else
poly1305_blocks_avx(ctx, inp, bytes, padbit);
len -= bytes;
if (!len)
break;
inp += bytes;
simd_relax(simd_context);
}
return true;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
struct poly1305_arch_internal *state = ctx;
if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
!state->is_base2_26 || !simd_use(simd_context)) {
convert_to_base2_64(ctx);
poly1305_emit_x86_64(ctx, mac, nonce);
} else
poly1305_emit_avx(ctx, mac, nonce);
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
// SPDX-License-Identifier: GPL-2.0 OR MIT
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* Implementation of the Poly1305 message authenticator.
*
* Information: https://cr.yp.to/mac.html
*/
#include <zinc/poly1305.h>
#include "../selftest/run.h"
#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/init.h>
#if defined(CONFIG_ZINC_ARCH_X86_64)
#include "poly1305-x86_64-glue.c"
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
#include "poly1305-arm-glue.c"
#elif defined(CONFIG_ZINC_ARCH_MIPS) || defined(CONFIG_ZINC_ARCH_MIPS64)
#include "poly1305-mips-glue.c"
#else
static inline bool poly1305_init_arch(void *ctx,
const u8 key[POLY1305_KEY_SIZE])
{
return false;
}
static inline bool poly1305_blocks_arch(void *ctx, const u8 *input,
size_t len, const u32 padbit,
simd_context_t *simd_context)
{
return false;
}
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
return false;
}
static bool *const poly1305_nobs[] __initconst = { };
static void __init poly1305_fpu_init(void)
{
}
#endif
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
#include "poly1305-donna64.c"
#else
#include "poly1305-donna32.c"
#endif
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE])
{
ctx->nonce[0] = get_unaligned_le32(&key[16]);
ctx->nonce[1] = get_unaligned_le32(&key[20]);
ctx->nonce[2] = get_unaligned_le32(&key[24]);
ctx->nonce[3] = get_unaligned_le32(&key[28]);
if (!poly1305_init_arch(ctx->opaque, key))
poly1305_init_generic(ctx->opaque, key);
ctx->num = 0;
}
static inline void poly1305_blocks(void *ctx, const u8 *input, const size_t len,
const u32 padbit,
simd_context_t *simd_context)
{
if (!poly1305_blocks_arch(ctx, input, len, padbit, simd_context))
poly1305_blocks_generic(ctx, input, len, padbit);
}
static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE],
const u32 nonce[4],
simd_context_t *simd_context)
{
if (!poly1305_emit_arch(ctx, mac, nonce, simd_context))
poly1305_emit_generic(ctx, mac, nonce);
}
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
simd_context_t *simd_context)
{
const size_t num = ctx->num;
size_t rem;
if (num) {
rem = POLY1305_BLOCK_SIZE - num;
if (len < rem) {
memcpy(ctx->data + num, input, len);
ctx->num = num + len;
return;
}
memcpy(ctx->data + num, input, rem);
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1,
simd_context);
input += rem;
len -= rem;
}
rem = len % POLY1305_BLOCK_SIZE;
len -= rem;
if (len >= POLY1305_BLOCK_SIZE) {
poly1305_blocks(ctx->opaque, input, len, 1, simd_context);
input += len;
}
if (rem)
memcpy(ctx->data, input, rem);
ctx->num = rem;
}
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
simd_context_t *simd_context)
{
size_t num = ctx->num;
if (num) {
ctx->data[num++] = 1;
while (num < POLY1305_BLOCK_SIZE)
ctx->data[num++] = 0;
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0,
simd_context);
}
poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context);
memzero_explicit(ctx, sizeof(*ctx));
}
#include "../selftest/poly1305.c"
static bool nosimd __initdata = false;
#ifndef COMPAT_ZINC_IS_A_MODULE
int __init poly1305_mod_init(void)
#else
static int __init mod_init(void)
#endif
{
if (!nosimd)
poly1305_fpu_init();
if (!selftest_run("poly1305", poly1305_selftest, poly1305_nobs,
ARRAY_SIZE(poly1305_nobs)))
return -ENOTRECOVERABLE;
return 0;
}
#ifdef COMPAT_ZINC_IS_A_MODULE
static void __exit mod_exit(void)
{
}
module_param(nosimd, bool, 0);
module_init(mod_init);
module_exit(mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Poly1305 one-time authenticator");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _ZINC_SELFTEST_RUN_H
#define _ZINC_SELFTEST_RUN_H
#include <linux/kernel.h>
#include <linux/printk.h>
#include <linux/bug.h>
static inline bool selftest_run(const char *name, bool (*selftest)(void),
bool *const nobs[], unsigned int nobs_len)
{
unsigned long set = 0, subset = 0, largest_subset = 0;
unsigned int i;
BUILD_BUG_ON(!__builtin_constant_p(nobs_len) ||
nobs_len >= BITS_PER_LONG);
if (!IS_ENABLED(CONFIG_ZINC_SELFTEST))
return true;
for (i = 0; i < nobs_len; ++i)
set |= ((unsigned long)*nobs[i]) << i;
do {
for (i = 0; i < nobs_len; ++i)
*nobs[i] = BIT(i) & subset;
if (selftest())
largest_subset = max(subset, largest_subset);
else
pr_err("%s self-test combination 0x%lx: FAIL\n", name,
subset);
subset = (subset - set) & set;
} while (subset);
for (i = 0; i < nobs_len; ++i)
*nobs[i] = BIT(i) & largest_subset;
if (largest_subset == set)
pr_info("%s self-tests: pass\n", name);
return !WARN_ON(largest_subset != set);
}
#endif

470
net/wireguard/device.c Normal file
View File

@ -0,0 +1,470 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "queueing.h"
#include "socket.h"
#include "timers.h"
#include "device.h"
#include "ratelimiter.h"
#include "peer.h"
#include "messages.h"
#include <linux/module.h>
#include <linux/rtnetlink.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/if_arp.h>
#include <linux/icmp.h>
#include <linux/suspend.h>
#include <net/icmp.h>
#include <net/rtnetlink.h>
#include <net/ip_tunnels.h>
#include <net/addrconf.h>
static LIST_HEAD(device_list);
static int wg_open(struct net_device *dev)
{
struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
struct inet6_dev *dev_v6 = __in6_dev_get(dev);
#endif
struct wg_device *wg = netdev_priv(dev);
struct wg_peer *peer;
int ret;
if (dev_v4) {
/* At some point we might put this check near the ip_rt_send_
* redirect call of ip_forward in net/ipv4/ip_forward.c, similar
* to the current secpath check.
*/
IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
}
#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
if (dev_v6)
#ifndef COMPAT_CANNOT_USE_DEV_CNF
dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
#else
dev_v6->addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
#endif
#endif
mutex_lock(&wg->device_update_lock);
ret = wg_socket_init(wg, wg->incoming_port);
if (ret < 0)
goto out;
list_for_each_entry(peer, &wg->peer_list, peer_list) {
wg_packet_send_staged_packets(peer);
if (peer->persistent_keepalive_interval)
wg_packet_send_keepalive(peer);
}
out:
mutex_unlock(&wg->device_update_lock);
return ret;
}
#ifdef CONFIG_PM_SLEEP
static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
void *data)
{
struct wg_device *wg;
struct wg_peer *peer;
/* If the machine is constantly suspending and resuming, as part of
* its normal operation rather than as a somewhat rare event, then we
* don't actually want to clear keys.
*/
if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID))
return 0;
if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
return 0;
rtnl_lock();
list_for_each_entry(wg, &device_list, device_list) {
mutex_lock(&wg->device_update_lock);
list_for_each_entry(peer, &wg->peer_list, peer_list) {
del_timer(&peer->timer_zero_key_material);
wg_noise_handshake_clear(&peer->handshake);
wg_noise_keypairs_clear(&peer->keypairs);
}
mutex_unlock(&wg->device_update_lock);
}
rtnl_unlock();
rcu_barrier();
return 0;
}
static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
#endif
static int wg_stop(struct net_device *dev)
{
struct wg_device *wg = netdev_priv(dev);
struct wg_peer *peer;
mutex_lock(&wg->device_update_lock);
list_for_each_entry(peer, &wg->peer_list, peer_list) {
wg_packet_purge_staged_packets(peer);
wg_timers_stop(peer);
wg_noise_handshake_clear(&peer->handshake);
wg_noise_keypairs_clear(&peer->keypairs);
wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
}
mutex_unlock(&wg->device_update_lock);
skb_queue_purge(&wg->incoming_handshakes);
wg_socket_reinit(wg, NULL, NULL);
return 0;
}
static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct wg_device *wg = netdev_priv(dev);
struct sk_buff_head packets;
struct wg_peer *peer;
struct sk_buff *next;
sa_family_t family;
u32 mtu;
int ret;
if (unlikely(!wg_check_packet_protocol(skb))) {
ret = -EPROTONOSUPPORT;
net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
goto err;
}
peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb);
if (unlikely(!peer)) {
ret = -ENOKEY;
if (skb->protocol == htons(ETH_P_IP))
net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n",
dev->name, &ip_hdr(skb)->daddr);
else if (skb->protocol == htons(ETH_P_IPV6))
net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
dev->name, &ipv6_hdr(skb)->daddr);
goto err;
}
family = READ_ONCE(peer->endpoint.addr.sa_family);
if (unlikely(family != AF_INET && family != AF_INET6)) {
ret = -EDESTADDRREQ;
net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n",
dev->name, peer->internal_id);
goto err_peer;
}
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
__skb_queue_head_init(&packets);
if (!skb_is_gso(skb)) {
skb_mark_not_on_list(skb);
} else {
struct sk_buff *segs = skb_gso_segment(skb, 0);
if (unlikely(IS_ERR(segs))) {
ret = PTR_ERR(segs);
goto err_peer;
}
dev_kfree_skb(skb);
skb = segs;
}
skb_list_walk_safe(skb, skb, next) {
skb_mark_not_on_list(skb);
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
continue;
/* We only need to keep the original dst around for icmp,
* so at this point we're in a position to drop it.
*/
skb_dst_drop(skb);
PACKET_CB(skb)->mtu = mtu;
__skb_queue_tail(&packets, skb);
}
spin_lock_bh(&peer->staged_packet_queue.lock);
/* If the queue is getting too big, we start removing the oldest packets
* until it's small again. We do this before adding the new packet, so
* we don't remove GSO segments that are in excess.
*/
while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
++dev->stats.tx_dropped;
}
skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
spin_unlock_bh(&peer->staged_packet_queue.lock);
wg_packet_send_staged_packets(peer);
wg_peer_put(peer);
return NETDEV_TX_OK;
err_peer:
wg_peer_put(peer);
err:
++dev->stats.tx_errors;
if (skb->protocol == htons(ETH_P_IP))
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
else if (skb->protocol == htons(ETH_P_IPV6))
icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
kfree_skb(skb);
return ret;
}
static const struct net_device_ops netdev_ops = {
.ndo_open = wg_open,
.ndo_stop = wg_stop,
.ndo_start_xmit = wg_xmit,
.ndo_get_stats64 = ip_tunnel_get_stats64
};
static void wg_destruct(struct net_device *dev)
{
struct wg_device *wg = netdev_priv(dev);
rtnl_lock();
list_del(&wg->device_list);
rtnl_unlock();
mutex_lock(&wg->device_update_lock);
rcu_assign_pointer(wg->creating_net, NULL);
wg->incoming_port = 0;
wg_socket_reinit(wg, NULL, NULL);
/* The final references are cleared in the below calls to destroy_workqueue. */
wg_peer_remove_all(wg);
destroy_workqueue(wg->handshake_receive_wq);
destroy_workqueue(wg->handshake_send_wq);
destroy_workqueue(wg->packet_crypt_wq);
wg_packet_queue_free(&wg->decrypt_queue, true);
wg_packet_queue_free(&wg->encrypt_queue, true);
rcu_barrier(); /* Wait for all the peers to be actually freed. */
wg_ratelimiter_uninit();
memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
skb_queue_purge(&wg->incoming_handshakes);
free_percpu(dev->tstats);
free_percpu(wg->incoming_handshakes_worker);
kvfree(wg->index_hashtable);
kvfree(wg->peer_hashtable);
mutex_unlock(&wg->device_update_lock);
pr_debug("%s: Interface destroyed\n", dev->name);
free_netdev(dev);
}
static const struct device_type device_type = { .name = KBUILD_MODNAME };
static void wg_setup(struct net_device *dev)
{
struct wg_device *wg = netdev_priv(dev);
enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
NETIF_F_SG | NETIF_F_GSO |
NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
dev->netdev_ops = &netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
dev->type = ARPHRD_NONE;
dev->flags = IFF_POINTOPOINT | IFF_NOARP;
#ifndef COMPAT_CANNOT_USE_IFF_NO_QUEUE
dev->priv_flags |= IFF_NO_QUEUE;
#else
dev->tx_queue_len = 0;
#endif
dev->features |= NETIF_F_LLTX;
dev->features |= WG_NETDEV_FEATURES;
dev->hw_features |= WG_NETDEV_FEATURES;
dev->hw_enc_features |= WG_NETDEV_FEATURES;
dev->mtu = ETH_DATA_LEN - overhead;
#ifndef COMPAT_CANNOT_USE_MAX_MTU
dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
#endif
SET_NETDEV_DEVTYPE(dev, &device_type);
/* We need to keep the dst around in case of icmp replies. */
netif_keep_dst(dev);
memset(wg, 0, sizeof(*wg));
wg->dev = dev;
}
static int wg_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct wg_device *wg = netdev_priv(dev);
int ret = -ENOMEM;
rcu_assign_pointer(wg->creating_net, src_net);
init_rwsem(&wg->static_identity.lock);
mutex_init(&wg->socket_update_lock);
mutex_init(&wg->device_update_lock);
skb_queue_head_init(&wg->incoming_handshakes);
wg_allowedips_init(&wg->peer_allowedips);
wg_cookie_checker_init(&wg->cookie_checker, wg);
INIT_LIST_HEAD(&wg->peer_list);
wg->device_update_gen = 1;
wg->peer_hashtable = wg_pubkey_hashtable_alloc();
if (!wg->peer_hashtable)
return ret;
wg->index_hashtable = wg_index_hashtable_alloc();
if (!wg->index_hashtable)
goto err_free_peer_hashtable;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
goto err_free_index_hashtable;
wg->incoming_handshakes_worker =
wg_packet_percpu_multicore_worker_alloc(
wg_packet_handshake_receive_worker, wg);
if (!wg->incoming_handshakes_worker)
goto err_free_tstats;
wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
if (!wg->handshake_receive_wq)
goto err_free_incoming_handshakes;
wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
if (!wg->handshake_send_wq)
goto err_destroy_handshake_receive;
wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s",
WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
if (!wg->packet_crypt_wq)
goto err_destroy_handshake_send;
ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
true, MAX_QUEUED_PACKETS);
if (ret < 0)
goto err_destroy_packet_crypt;
ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
true, MAX_QUEUED_PACKETS);
if (ret < 0)
goto err_free_encrypt_queue;
ret = wg_ratelimiter_init();
if (ret < 0)
goto err_free_decrypt_queue;
ret = register_netdevice(dev);
if (ret < 0)
goto err_uninit_ratelimiter;
list_add(&wg->device_list, &device_list);
/* We wait until the end to assign priv_destructor, so that
* register_netdevice doesn't call it for us if it fails.
*/
dev->priv_destructor = wg_destruct;
pr_debug("%s: Interface created\n", dev->name);
return ret;
err_uninit_ratelimiter:
wg_ratelimiter_uninit();
err_free_decrypt_queue:
wg_packet_queue_free(&wg->decrypt_queue, true);
err_free_encrypt_queue:
wg_packet_queue_free(&wg->encrypt_queue, true);
err_destroy_packet_crypt:
destroy_workqueue(wg->packet_crypt_wq);
err_destroy_handshake_send:
destroy_workqueue(wg->handshake_send_wq);
err_destroy_handshake_receive:
destroy_workqueue(wg->handshake_receive_wq);
err_free_incoming_handshakes:
free_percpu(wg->incoming_handshakes_worker);
err_free_tstats:
free_percpu(dev->tstats);
err_free_index_hashtable:
kvfree(wg->index_hashtable);
err_free_peer_hashtable:
kvfree(wg->peer_hashtable);
return ret;
}
static struct rtnl_link_ops link_ops __read_mostly = {
.kind = KBUILD_MODNAME,
.priv_size = sizeof(struct wg_device),
.setup = wg_setup,
.newlink = wg_newlink,
};
static void wg_netns_pre_exit(struct net *net)
{
struct wg_device *wg;
rtnl_lock();
list_for_each_entry(wg, &device_list, device_list) {
if (rcu_access_pointer(wg->creating_net) == net) {
pr_debug("%s: Creating namespace exiting\n", wg->dev->name);
netif_carrier_off(wg->dev);
mutex_lock(&wg->device_update_lock);
rcu_assign_pointer(wg->creating_net, NULL);
wg_socket_reinit(wg, NULL, NULL);
mutex_unlock(&wg->device_update_lock);
}
}
rtnl_unlock();
}
static struct pernet_operations pernet_ops = {
.pre_exit = wg_netns_pre_exit
};
int __init wg_device_init(void)
{
int ret;
#ifdef CONFIG_PM_SLEEP
ret = register_pm_notifier(&pm_notifier);
if (ret)
return ret;
#endif
ret = register_pernet_device(&pernet_ops);
if (ret)
goto error_pm;
ret = rtnl_link_register(&link_ops);
if (ret)
goto error_pernet;
return 0;
error_pernet:
unregister_pernet_device(&pernet_ops);
error_pm:
#ifdef CONFIG_PM_SLEEP
unregister_pm_notifier(&pm_notifier);
#endif
return ret;
}
void wg_device_uninit(void)
{
rtnl_link_unregister(&link_ops);
unregister_pernet_device(&pernet_ops);
#ifdef CONFIG_PM_SLEEP
unregister_pm_notifier(&pm_notifier);
#endif
rcu_barrier();
}

64
net/wireguard/device.h Normal file
View File

@ -0,0 +1,64 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_DEVICE_H
#define _WG_DEVICE_H
#include "noise.h"
#include "allowedips.h"
#include "peerlookup.h"
#include "cookie.h"
#include <linux/types.h>
#include <linux/netdevice.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/net.h>
#include <linux/ptr_ring.h>
struct wg_device;
struct multicore_worker {
void *ptr;
struct work_struct work;
};
struct crypt_queue {
struct ptr_ring ring;
union {
struct {
struct multicore_worker __percpu *worker;
int last_cpu;
};
struct work_struct work;
};
};
struct wg_device {
struct net_device *dev;
struct crypt_queue encrypt_queue, decrypt_queue;
struct sock __rcu *sock4, *sock6;
struct net __rcu *creating_net;
struct noise_static_identity static_identity;
struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
struct workqueue_struct *packet_crypt_wq;
struct sk_buff_head incoming_handshakes;
int incoming_handshake_cpu;
struct multicore_worker __percpu *incoming_handshakes_worker;
struct cookie_checker cookie_checker;
struct pubkey_hashtable *peer_hashtable;
struct index_hashtable *index_hashtable;
struct allowedips peer_allowedips;
struct mutex device_update_lock, socket_update_lock;
struct list_head device_list, peer_list;
unsigned int num_peers, device_update_gen;
u32 fwmark;
u16 incoming_port;
};
int wg_device_init(void);
void wg_device_uninit(void);
#endif /* _WG_DEVICE_H */

9
net/wireguard/dkms.conf Normal file
View File

@ -0,0 +1,9 @@
PACKAGE_NAME="wireguard"
PACKAGE_VERSION="1.0.20200908"
AUTOINSTALL=yes
BUILT_MODULE_NAME="wireguard"
DEST_MODULE_LOCATION="/kernel/net"
# requires kernel 3.10 - 5.5, inclusive:
BUILD_EXCLUSIVE_KERNEL="^((5\.[0-5]($|[.-]))|(4\.)|(3\.1[0-9]))"

69
net/wireguard/main.c Normal file
View File

@ -0,0 +1,69 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "version.h"
#include "device.h"
#include "noise.h"
#include "queueing.h"
#include "ratelimiter.h"
#include "netlink.h"
#include "uapi/wireguard.h"
#include "crypto/zinc.h"
#include <linux/init.h>
#include <linux/module.h>
#include <linux/genetlink.h>
#include <net/rtnetlink.h>
static int __init mod_init(void)
{
int ret;
if ((ret = chacha20_mod_init()) || (ret = poly1305_mod_init()) ||
(ret = chacha20poly1305_mod_init()) || (ret = blake2s_mod_init()) ||
(ret = curve25519_mod_init()))
return ret;
#ifdef DEBUG
if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
!wg_ratelimiter_selftest())
return -ENOTRECOVERABLE;
#endif
wg_noise_init();
ret = wg_device_init();
if (ret < 0)
goto err_device;
ret = wg_genetlink_init();
if (ret < 0)
goto err_netlink;
pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n");
pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n");
return 0;
err_netlink:
wg_device_uninit();
err_device:
return ret;
}
static void __exit mod_exit(void)
{
wg_genetlink_uninit();
wg_device_uninit();
}
module_init(mod_init);
module_exit(mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("WireGuard secure network tunnel");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
MODULE_VERSION(WIREGUARD_VERSION);
MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME);
MODULE_INFO(intree, "Y");

128
net/wireguard/messages.h Normal file
View File

@ -0,0 +1,128 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_MESSAGES_H
#define _WG_MESSAGES_H
#include <zinc/curve25519.h>
#include <zinc/chacha20poly1305.h>
#include <zinc/blake2s.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/skbuff.h>
enum noise_lengths {
NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
};
#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
enum cookie_values {
COOKIE_SECRET_MAX_AGE = 2 * 60,
COOKIE_SECRET_LATENCY = 5,
COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
COOKIE_LEN = 16
};
enum counter_values {
COUNTER_BITS_TOTAL = 8192,
COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
};
enum limits {
REKEY_AFTER_MESSAGES = 1ULL << 60,
REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
REKEY_TIMEOUT = 5,
REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3,
REKEY_AFTER_TIME = 120,
REJECT_AFTER_TIME = 180,
INITIATIONS_PER_SECOND = 50,
MAX_PEERS_PER_DEVICE = 1U << 20,
KEEPALIVE_TIMEOUT = 10,
MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
MAX_STAGED_PACKETS = 128,
MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
};
enum message_type {
MESSAGE_INVALID = 0,
MESSAGE_HANDSHAKE_INITIATION = 1,
MESSAGE_HANDSHAKE_RESPONSE = 2,
MESSAGE_HANDSHAKE_COOKIE = 3,
MESSAGE_DATA = 4
};
struct message_header {
/* The actual layout of this that we want is:
* u8 type
* u8 reserved_zero[3]
*
* But it turns out that by encoding this as little endian,
* we achieve the same thing, and it makes checking faster.
*/
__le32 type;
};
struct message_macs {
u8 mac1[COOKIE_LEN];
u8 mac2[COOKIE_LEN];
};
struct message_handshake_initiation {
struct message_header header;
__le32 sender_index;
u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
struct message_macs macs;
};
struct message_handshake_response {
struct message_header header;
__le32 sender_index;
__le32 receiver_index;
u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
u8 encrypted_nothing[noise_encrypted_len(0)];
struct message_macs macs;
};
struct message_handshake_cookie {
struct message_header header;
__le32 receiver_index;
u8 nonce[COOKIE_NONCE_LEN];
u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
};
struct message_data {
struct message_header header;
__le32 key_idx;
__le64 counter;
u8 encrypted_data[];
};
#define message_data_len(plain_len) \
(noise_encrypted_len(plain_len) + sizeof(struct message_data))
enum message_alignments {
MESSAGE_PADDING_MULTIPLE = 16,
MESSAGE_MINIMUM_LENGTH = message_data_len(0)
};
#define SKB_HEADER_LEN \
(max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \
sizeof(struct udphdr) + NET_SKB_PAD)
#define DATA_PACKET_HEAD_ROOM \
ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ };
#endif /* _WG_MESSAGES_H */

658
net/wireguard/netlink.c Normal file
View File

@ -0,0 +1,658 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "netlink.h"
#include "device.h"
#include "peer.h"
#include "socket.h"
#include "queueing.h"
#include "messages.h"
#include "uapi/wireguard.h"
#include <linux/if.h>
#include <net/genetlink.h>
#include <net/sock.h>
#include <crypto/algapi.h>
static struct genl_family genl_family;
static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
[WGDEVICE_A_IFINDEX] = { .type = NLA_U32 },
[WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
[WGDEVICE_A_PRIVATE_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
[WGDEVICE_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
[WGDEVICE_A_FLAGS] = { .type = NLA_U32 },
[WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 },
[WGDEVICE_A_FWMARK] = { .type = NLA_U32 },
[WGDEVICE_A_PEERS] = { .type = NLA_NESTED }
};
static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
[WGPEER_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
[WGPEER_A_PRESHARED_KEY] = NLA_POLICY_EXACT_LEN(NOISE_SYMMETRIC_KEY_LEN),
[WGPEER_A_FLAGS] = { .type = NLA_U32 },
[WGPEER_A_ENDPOINT] = NLA_POLICY_MIN_LEN(sizeof(struct sockaddr)),
[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 },
[WGPEER_A_LAST_HANDSHAKE_TIME] = NLA_POLICY_EXACT_LEN(sizeof(struct __kernel_timespec)),
[WGPEER_A_RX_BYTES] = { .type = NLA_U64 },
[WGPEER_A_TX_BYTES] = { .type = NLA_U64 },
[WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED },
[WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 }
};
static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
[WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 },
[WGALLOWEDIP_A_IPADDR] = NLA_POLICY_MIN_LEN(sizeof(struct in_addr)),
[WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 }
};
static struct wg_device *lookup_interface(struct nlattr **attrs,
struct sk_buff *skb)
{
struct net_device *dev = NULL;
if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME])
return ERR_PTR(-EBADR);
if (attrs[WGDEVICE_A_IFINDEX])
dev = dev_get_by_index(sock_net(skb->sk),
nla_get_u32(attrs[WGDEVICE_A_IFINDEX]));
else if (attrs[WGDEVICE_A_IFNAME])
dev = dev_get_by_name(sock_net(skb->sk),
nla_data(attrs[WGDEVICE_A_IFNAME]));
if (!dev)
return ERR_PTR(-ENODEV);
if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind ||
strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) {
dev_put(dev);
return ERR_PTR(-EOPNOTSUPP);
}
return netdev_priv(dev);
}
static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr,
int family)
{
struct nlattr *allowedip_nest;
allowedip_nest = nla_nest_start(skb, 0);
if (!allowedip_nest)
return -EMSGSIZE;
if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) ||
nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) ||
nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ?
sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) {
nla_nest_cancel(skb, allowedip_nest);
return -EMSGSIZE;
}
nla_nest_end(skb, allowedip_nest);
return 0;
}
struct dump_ctx {
struct wg_device *wg;
struct wg_peer *next_peer;
u64 allowedips_seq;
struct allowedips_node *next_allowedip;
};
#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args)
static int
get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx)
{
struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0);
struct allowedips_node *allowedips_node = ctx->next_allowedip;
bool fail;
if (!peer_nest)
return -EMSGSIZE;
down_read(&peer->handshake.lock);
fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN,
peer->handshake.remote_static);
up_read(&peer->handshake.lock);
if (fail)
goto err;
if (!allowedips_node) {
const struct __kernel_timespec last_handshake = {
.tv_sec = peer->walltime_last_handshake.tv_sec,
.tv_nsec = peer->walltime_last_handshake.tv_nsec
};
down_read(&peer->handshake.lock);
fail = nla_put(skb, WGPEER_A_PRESHARED_KEY,
NOISE_SYMMETRIC_KEY_LEN,
peer->handshake.preshared_key);
up_read(&peer->handshake.lock);
if (fail)
goto err;
if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME,
sizeof(last_handshake), &last_handshake) ||
nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
peer->persistent_keepalive_interval) ||
nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes,
WGPEER_A_UNSPEC) ||
nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes,
WGPEER_A_UNSPEC) ||
nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1))
goto err;
read_lock_bh(&peer->endpoint_lock);
if (peer->endpoint.addr.sa_family == AF_INET)
fail = nla_put(skb, WGPEER_A_ENDPOINT,
sizeof(peer->endpoint.addr4),
&peer->endpoint.addr4);
else if (peer->endpoint.addr.sa_family == AF_INET6)
fail = nla_put(skb, WGPEER_A_ENDPOINT,
sizeof(peer->endpoint.addr6),
&peer->endpoint.addr6);
read_unlock_bh(&peer->endpoint_lock);
if (fail)
goto err;
allowedips_node =
list_first_entry_or_null(&peer->allowedips_list,
struct allowedips_node, peer_list);
}
if (!allowedips_node)
goto no_allowedips;
if (!ctx->allowedips_seq)
ctx->allowedips_seq = peer->device->peer_allowedips.seq;
else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq)
goto no_allowedips;
allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS);
if (!allowedips_nest)
goto err;
list_for_each_entry_from(allowedips_node, &peer->allowedips_list,
peer_list) {
u8 cidr, ip[16] __aligned(__alignof(u64));
int family;
family = wg_allowedips_read_node(allowedips_node, ip, &cidr);
if (get_allowedips(skb, ip, cidr, family)) {
nla_nest_end(skb, allowedips_nest);
nla_nest_end(skb, peer_nest);
ctx->next_allowedip = allowedips_node;
return -EMSGSIZE;
}
}
nla_nest_end(skb, allowedips_nest);
no_allowedips:
nla_nest_end(skb, peer_nest);
ctx->next_allowedip = NULL;
ctx->allowedips_seq = 0;
return 0;
err:
nla_nest_cancel(skb, peer_nest);
return -EMSGSIZE;
}
static int wg_get_device_start(struct netlink_callback *cb)
{
struct wg_device *wg;
wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb);
if (IS_ERR(wg))
return PTR_ERR(wg);
DUMP_CTX(cb)->wg = wg;
return 0;
}
static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct wg_peer *peer, *next_peer_cursor;
struct dump_ctx *ctx = DUMP_CTX(cb);
struct wg_device *wg = ctx->wg;
struct nlattr *peers_nest;
int ret = -EMSGSIZE;
bool done = true;
void *hdr;
rtnl_lock();
mutex_lock(&wg->device_update_lock);
cb->seq = wg->device_update_gen;
next_peer_cursor = ctx->next_peer;
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE);
if (!hdr)
goto out;
genl_dump_check_consistent(cb, hdr);
if (!ctx->next_peer) {
if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT,
wg->incoming_port) ||
nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) ||
nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) ||
nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name))
goto out;
down_read(&wg->static_identity.lock);
if (wg->static_identity.has_identity) {
if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY,
NOISE_PUBLIC_KEY_LEN,
wg->static_identity.static_private) ||
nla_put(skb, WGDEVICE_A_PUBLIC_KEY,
NOISE_PUBLIC_KEY_LEN,
wg->static_identity.static_public)) {
up_read(&wg->static_identity.lock);
goto out;
}
}
up_read(&wg->static_identity.lock);
}
peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS);
if (!peers_nest)
goto out;
ret = 0;
/* If the last cursor was removed via list_del_init in peer_remove, then
* we just treat this the same as there being no more peers left. The
* reason is that seq_nr should indicate to userspace that this isn't a
* coherent dump anyway, so they'll try again.
*/
if (list_empty(&wg->peer_list) ||
(ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) {
nla_nest_cancel(skb, peers_nest);
goto out;
}
lockdep_assert_held(&wg->device_update_lock);
peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list);
list_for_each_entry_continue(peer, &wg->peer_list, peer_list) {
if (get_peer(peer, skb, ctx)) {
done = false;
break;
}
next_peer_cursor = peer;
}
nla_nest_end(skb, peers_nest);
out:
if (!ret && !done && next_peer_cursor)
wg_peer_get(next_peer_cursor);
wg_peer_put(ctx->next_peer);
mutex_unlock(&wg->device_update_lock);
rtnl_unlock();
if (ret) {
genlmsg_cancel(skb, hdr);
return ret;
}
genlmsg_end(skb, hdr);
if (done) {
ctx->next_peer = NULL;
return 0;
}
ctx->next_peer = next_peer_cursor;
return skb->len;
/* At this point, we can't really deal ourselves with safely zeroing out
* the private key material after usage. This will need an additional API
* in the kernel for marking skbs as zero_on_free.
*/
}
static int wg_get_device_done(struct netlink_callback *cb)
{
struct dump_ctx *ctx = DUMP_CTX(cb);
if (ctx->wg)
dev_put(ctx->wg->dev);
wg_peer_put(ctx->next_peer);
return 0;
}
static int set_port(struct wg_device *wg, u16 port)
{
struct wg_peer *peer;
if (wg->incoming_port == port)
return 0;
list_for_each_entry(peer, &wg->peer_list, peer_list)
wg_socket_clear_peer_endpoint_src(peer);
if (!netif_running(wg->dev)) {
wg->incoming_port = port;
return 0;
}
return wg_socket_init(wg, port);
}
static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs)
{
int ret = -EINVAL;
u16 family;
u8 cidr;
if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] ||
!attrs[WGALLOWEDIP_A_CIDR_MASK])
return ret;
family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]);
cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]);
if (family == AF_INET && cidr <= 32 &&
nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr))
ret = wg_allowedips_insert_v4(
&peer->device->peer_allowedips,
nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
&peer->device->device_update_lock);
else if (family == AF_INET6 && cidr <= 128 &&
nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr))
ret = wg_allowedips_insert_v6(
&peer->device->peer_allowedips,
nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
&peer->device->device_update_lock);
return ret;
}
static int set_peer(struct wg_device *wg, struct nlattr **attrs)
{
u8 *public_key = NULL, *preshared_key = NULL;
struct wg_peer *peer = NULL;
u32 flags = 0;
int ret;
ret = -EINVAL;
if (attrs[WGPEER_A_PUBLIC_KEY] &&
nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN)
public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]);
else
goto out;
if (attrs[WGPEER_A_PRESHARED_KEY] &&
nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN)
preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]);
if (attrs[WGPEER_A_FLAGS])
flags = nla_get_u32(attrs[WGPEER_A_FLAGS]);
ret = -EOPNOTSUPP;
if (flags & ~__WGPEER_F_ALL)
goto out;
ret = -EPFNOSUPPORT;
if (attrs[WGPEER_A_PROTOCOL_VERSION]) {
if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1)
goto out;
}
peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
nla_data(attrs[WGPEER_A_PUBLIC_KEY]));
ret = 0;
if (!peer) { /* Peer doesn't exist yet. Add a new one. */
if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY))
goto out;
/* The peer is new, so there aren't allowed IPs to remove. */
flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS;
down_read(&wg->static_identity.lock);
if (wg->static_identity.has_identity &&
!memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]),
wg->static_identity.static_public,
NOISE_PUBLIC_KEY_LEN)) {
/* We silently ignore peers that have the same public
* key as the device. The reason we do it silently is
* that we'd like for people to be able to reuse the
* same set of API calls across peers.
*/
up_read(&wg->static_identity.lock);
ret = 0;
goto out;
}
up_read(&wg->static_identity.lock);
peer = wg_peer_create(wg, public_key, preshared_key);
if (IS_ERR(peer)) {
ret = PTR_ERR(peer);
peer = NULL;
goto out;
}
/* Take additional reference, as though we've just been
* looked up.
*/
wg_peer_get(peer);
}
if (flags & WGPEER_F_REMOVE_ME) {
wg_peer_remove(peer);
goto out;
}
if (preshared_key) {
down_write(&peer->handshake.lock);
memcpy(&peer->handshake.preshared_key, preshared_key,
NOISE_SYMMETRIC_KEY_LEN);
up_write(&peer->handshake.lock);
}
if (attrs[WGPEER_A_ENDPOINT]) {
struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
if ((len == sizeof(struct sockaddr_in) &&
addr->sa_family == AF_INET) ||
(len == sizeof(struct sockaddr_in6) &&
addr->sa_family == AF_INET6)) {
struct endpoint endpoint = { { { 0 } } };
memcpy(&endpoint.addr, addr, len);
wg_socket_set_peer_endpoint(peer, &endpoint);
}
}
if (flags & WGPEER_F_REPLACE_ALLOWEDIPS)
wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer,
&wg->device_update_lock);
if (attrs[WGPEER_A_ALLOWEDIPS]) {
struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1];
int rem;
nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) {
ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX,
attr, allowedip_policy, NULL);
if (ret < 0)
goto out;
ret = set_allowedip(peer, allowedip);
if (ret < 0)
goto out;
}
}
if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) {
const u16 persistent_keepalive_interval = nla_get_u16(
attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]);
const bool send_keepalive =
!peer->persistent_keepalive_interval &&
persistent_keepalive_interval &&
netif_running(wg->dev);
peer->persistent_keepalive_interval = persistent_keepalive_interval;
if (send_keepalive)
wg_packet_send_keepalive(peer);
}
if (netif_running(wg->dev))
wg_packet_send_staged_packets(peer);
out:
wg_peer_put(peer);
if (attrs[WGPEER_A_PRESHARED_KEY])
memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]),
nla_len(attrs[WGPEER_A_PRESHARED_KEY]));
return ret;
}
static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
{
struct wg_device *wg = lookup_interface(info->attrs, skb);
u32 flags = 0;
int ret;
if (IS_ERR(wg)) {
ret = PTR_ERR(wg);
goto out_nodev;
}
rtnl_lock();
mutex_lock(&wg->device_update_lock);
if (info->attrs[WGDEVICE_A_FLAGS])
flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]);
ret = -EOPNOTSUPP;
if (flags & ~__WGDEVICE_F_ALL)
goto out;
if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) {
struct net *net;
rcu_read_lock();
net = rcu_dereference(wg->creating_net);
ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0;
rcu_read_unlock();
if (ret)
goto out;
}
++wg->device_update_gen;
if (info->attrs[WGDEVICE_A_FWMARK]) {
struct wg_peer *peer;
wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]);
list_for_each_entry(peer, &wg->peer_list, peer_list)
wg_socket_clear_peer_endpoint_src(peer);
}
if (info->attrs[WGDEVICE_A_LISTEN_PORT]) {
ret = set_port(wg,
nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT]));
if (ret)
goto out;
}
if (flags & WGDEVICE_F_REPLACE_PEERS)
wg_peer_remove_all(wg);
if (info->attrs[WGDEVICE_A_PRIVATE_KEY] &&
nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) ==
NOISE_PUBLIC_KEY_LEN) {
u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
u8 public_key[NOISE_PUBLIC_KEY_LEN];
struct wg_peer *peer, *temp;
if (!crypto_memneq(wg->static_identity.static_private,
private_key, NOISE_PUBLIC_KEY_LEN))
goto skip_set_private_key;
/* We remove before setting, to prevent race, which means doing
* two 25519-genpub ops.
*/
if (curve25519_generate_public(public_key, private_key)) {
peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
public_key);
if (peer) {
wg_peer_put(peer);
wg_peer_remove(peer);
}
}
down_write(&wg->static_identity.lock);
wg_noise_set_static_identity_private_key(&wg->static_identity,
private_key);
list_for_each_entry_safe(peer, temp, &wg->peer_list,
peer_list) {
wg_noise_precompute_static_static(peer);
wg_noise_expire_current_peer_keypairs(peer);
}
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
up_write(&wg->static_identity.lock);
}
skip_set_private_key:
if (info->attrs[WGDEVICE_A_PEERS]) {
struct nlattr *attr, *peer[WGPEER_A_MAX + 1];
int rem;
nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) {
ret = nla_parse_nested(peer, WGPEER_A_MAX, attr,
peer_policy, NULL);
if (ret < 0)
goto out;
ret = set_peer(wg, peer);
if (ret < 0)
goto out;
}
}
ret = 0;
out:
mutex_unlock(&wg->device_update_lock);
rtnl_unlock();
dev_put(wg->dev);
out_nodev:
if (info->attrs[WGDEVICE_A_PRIVATE_KEY])
memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]),
nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]));
return ret;
}
#ifndef COMPAT_CANNOT_USE_CONST_GENL_OPS
static const
#else
static
#endif
struct genl_ops genl_ops[] = {
{
.cmd = WG_CMD_GET_DEVICE,
#ifndef COMPAT_CANNOT_USE_NETLINK_START
.start = wg_get_device_start,
#endif
.dumpit = wg_get_device_dump,
.done = wg_get_device_done,
#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
.policy = device_policy,
#endif
.flags = GENL_UNS_ADMIN_PERM
}, {
.cmd = WG_CMD_SET_DEVICE,
.doit = wg_set_device,
#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
.policy = device_policy,
#endif
.flags = GENL_UNS_ADMIN_PERM
}
};
static struct genl_family genl_family
#ifndef COMPAT_CANNOT_USE_GENL_NOPS
__ro_after_init = {
.ops = genl_ops,
.n_ops = ARRAY_SIZE(genl_ops),
#else
= {
#endif
.name = WG_GENL_NAME,
.version = WG_GENL_VERSION,
.maxattr = WGDEVICE_A_MAX,
.module = THIS_MODULE,
#ifndef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
.policy = device_policy,
#endif
.netnsok = true
};
int __init wg_genetlink_init(void)
{
return genl_register_family(&genl_family);
}
void __exit wg_genetlink_uninit(void)
{
genl_unregister_family(&genl_family);
}

12
net/wireguard/netlink.h Normal file
View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_NETLINK_H
#define _WG_NETLINK_H
int wg_genetlink_init(void);
void wg_genetlink_uninit(void);
#endif /* _WG_NETLINK_H */

830
net/wireguard/noise.c Normal file
View File

@ -0,0 +1,830 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "noise.h"
#include "device.h"
#include "peer.h"
#include "messages.h"
#include "queueing.h"
#include "peerlookup.h"
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
#include <linux/scatterlist.h>
#include <linux/highmem.h>
#include <crypto/algapi.h>
/* This implements Noise_IKpsk2:
*
* <- s
* ******
* -> e, es, s, ss, {t}
* <- e, ee, se, psk, {}
*/
static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com";
static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
static atomic64_t keypair_counter = ATOMIC64_INIT(0);
void __init wg_noise_init(void)
{
struct blake2s_state blake;
blake2s(handshake_init_chaining_key, handshake_name, NULL,
NOISE_HASH_LEN, sizeof(handshake_name), 0);
blake2s_init(&blake, NOISE_HASH_LEN);
blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
blake2s_update(&blake, identifier_name, sizeof(identifier_name));
blake2s_final(&blake, handshake_init_hash);
}
/* Must hold peer->handshake.static_identity->lock */
void wg_noise_precompute_static_static(struct wg_peer *peer)
{
down_write(&peer->handshake.lock);
if (!peer->handshake.static_identity->has_identity ||
!curve25519(peer->handshake.precomputed_static_static,
peer->handshake.static_identity->static_private,
peer->handshake.remote_static))
memset(peer->handshake.precomputed_static_static, 0,
NOISE_PUBLIC_KEY_LEN);
up_write(&peer->handshake.lock);
}
void wg_noise_handshake_init(struct noise_handshake *handshake,
struct noise_static_identity *static_identity,
const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
struct wg_peer *peer)
{
memset(handshake, 0, sizeof(*handshake));
init_rwsem(&handshake->lock);
handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
handshake->entry.peer = peer;
memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
if (peer_preshared_key)
memcpy(handshake->preshared_key, peer_preshared_key,
NOISE_SYMMETRIC_KEY_LEN);
handshake->static_identity = static_identity;
handshake->state = HANDSHAKE_ZEROED;
wg_noise_precompute_static_static(peer);
}
static void handshake_zero(struct noise_handshake *handshake)
{
memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
memset(&handshake->hash, 0, NOISE_HASH_LEN);
memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
handshake->remote_index = 0;
handshake->state = HANDSHAKE_ZEROED;
}
void wg_noise_handshake_clear(struct noise_handshake *handshake)
{
down_write(&handshake->lock);
wg_index_hashtable_remove(
handshake->entry.peer->device->index_hashtable,
&handshake->entry);
handshake_zero(handshake);
up_write(&handshake->lock);
}
static struct noise_keypair *keypair_create(struct wg_peer *peer)
{
struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL);
if (unlikely(!keypair))
return NULL;
spin_lock_init(&keypair->receiving_counter.lock);
keypair->internal_id = atomic64_inc_return(&keypair_counter);
keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
keypair->entry.peer = peer;
kref_init(&keypair->refcount);
return keypair;
}
static void keypair_free_rcu(struct rcu_head *rcu)
{
kfree_sensitive(container_of(rcu, struct noise_keypair, rcu));
}
static void keypair_free_kref(struct kref *kref)
{
struct noise_keypair *keypair =
container_of(kref, struct noise_keypair, refcount);
net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n",
keypair->entry.peer->device->dev->name,
keypair->internal_id,
keypair->entry.peer->internal_id);
wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable,
&keypair->entry);
call_rcu(&keypair->rcu, keypair_free_rcu);
}
void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now)
{
if (unlikely(!keypair))
return;
if (unlikely(unreference_now))
wg_index_hashtable_remove(
keypair->entry.peer->device->index_hashtable,
&keypair->entry);
kref_put(&keypair->refcount, keypair_free_kref);
}
struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair)
{
RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
"Taking noise keypair reference without holding the RCU BH read lock");
if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
return NULL;
return keypair;
}
void wg_noise_keypairs_clear(struct noise_keypairs *keypairs)
{
struct noise_keypair *old;
spin_lock_bh(&keypairs->keypair_update_lock);
/* We zero the next_keypair before zeroing the others, so that
* wg_noise_received_with_keypair returns early before subsequent ones
* are zeroed.
*/
old = rcu_dereference_protected(keypairs->next_keypair,
lockdep_is_held(&keypairs->keypair_update_lock));
RCU_INIT_POINTER(keypairs->next_keypair, NULL);
wg_noise_keypair_put(old, true);
old = rcu_dereference_protected(keypairs->previous_keypair,
lockdep_is_held(&keypairs->keypair_update_lock));
RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
wg_noise_keypair_put(old, true);
old = rcu_dereference_protected(keypairs->current_keypair,
lockdep_is_held(&keypairs->keypair_update_lock));
RCU_INIT_POINTER(keypairs->current_keypair, NULL);
wg_noise_keypair_put(old, true);
spin_unlock_bh(&keypairs->keypair_update_lock);
}
void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer)
{
struct noise_keypair *keypair;
wg_noise_handshake_clear(&peer->handshake);
wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
spin_lock_bh(&peer->keypairs.keypair_update_lock);
keypair = rcu_dereference_protected(peer->keypairs.next_keypair,
lockdep_is_held(&peer->keypairs.keypair_update_lock));
if (keypair)
keypair->sending.is_valid = false;
keypair = rcu_dereference_protected(peer->keypairs.current_keypair,
lockdep_is_held(&peer->keypairs.keypair_update_lock));
if (keypair)
keypair->sending.is_valid = false;
spin_unlock_bh(&peer->keypairs.keypair_update_lock);
}
static void add_new_keypair(struct noise_keypairs *keypairs,
struct noise_keypair *new_keypair)
{
struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
spin_lock_bh(&keypairs->keypair_update_lock);
previous_keypair = rcu_dereference_protected(keypairs->previous_keypair,
lockdep_is_held(&keypairs->keypair_update_lock));
next_keypair = rcu_dereference_protected(keypairs->next_keypair,
lockdep_is_held(&keypairs->keypair_update_lock));
current_keypair = rcu_dereference_protected(keypairs->current_keypair,
lockdep_is_held(&keypairs->keypair_update_lock));
if (new_keypair->i_am_the_initiator) {
/* If we're the initiator, it means we've sent a handshake, and
* received a confirmation response, which means this new
* keypair can now be used.
*/
if (next_keypair) {
/* If there already was a next keypair pending, we
* demote it to be the previous keypair, and free the
* existing current. Note that this means KCI can result
* in this transition. It would perhaps be more sound to
* always just get rid of the unused next keypair
* instead of putting it in the previous slot, but this
* might be a bit less robust. Something to think about
* for the future.
*/
RCU_INIT_POINTER(keypairs->next_keypair, NULL);
rcu_assign_pointer(keypairs->previous_keypair,
next_keypair);
wg_noise_keypair_put(current_keypair, true);
} else /* If there wasn't an existing next keypair, we replace
* the previous with the current one.
*/
rcu_assign_pointer(keypairs->previous_keypair,
current_keypair);
/* At this point we can get rid of the old previous keypair, and
* set up the new keypair.
*/
wg_noise_keypair_put(previous_keypair, true);
rcu_assign_pointer(keypairs->current_keypair, new_keypair);
} else {
/* If we're the responder, it means we can't use the new keypair
* until we receive confirmation via the first data packet, so
* we get rid of the existing previous one, the possibly
* existing next one, and slide in the new next one.
*/
rcu_assign_pointer(keypairs->next_keypair, new_keypair);
wg_noise_keypair_put(next_keypair, true);
RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
wg_noise_keypair_put(previous_keypair, true);
}
spin_unlock_bh(&keypairs->keypair_update_lock);
}
bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
struct noise_keypair *received_keypair)
{
struct noise_keypair *old_keypair;
bool key_is_new;
/* We first check without taking the spinlock. */
key_is_new = received_keypair ==
rcu_access_pointer(keypairs->next_keypair);
if (likely(!key_is_new))
return false;
spin_lock_bh(&keypairs->keypair_update_lock);
/* After locking, we double check that things didn't change from
* beneath us.
*/
if (unlikely(received_keypair !=
rcu_dereference_protected(keypairs->next_keypair,
lockdep_is_held(&keypairs->keypair_update_lock)))) {
spin_unlock_bh(&keypairs->keypair_update_lock);
return false;
}
/* When we've finally received the confirmation, we slide the next
* into the current, the current into the previous, and get rid of
* the old previous.
*/
old_keypair = rcu_dereference_protected(keypairs->previous_keypair,
lockdep_is_held(&keypairs->keypair_update_lock));
rcu_assign_pointer(keypairs->previous_keypair,
rcu_dereference_protected(keypairs->current_keypair,
lockdep_is_held(&keypairs->keypair_update_lock)));
wg_noise_keypair_put(old_keypair, true);
rcu_assign_pointer(keypairs->current_keypair, received_keypair);
RCU_INIT_POINTER(keypairs->next_keypair, NULL);
spin_unlock_bh(&keypairs->keypair_update_lock);
return true;
}
/* Must hold static_identity->lock */
void wg_noise_set_static_identity_private_key(
struct noise_static_identity *static_identity,
const u8 private_key[NOISE_PUBLIC_KEY_LEN])
{
memcpy(static_identity->static_private, private_key,
NOISE_PUBLIC_KEY_LEN);
curve25519_clamp_secret(static_identity->static_private);
static_identity->has_identity = curve25519_generate_public(
static_identity->static_public, private_key);
}
/* This is Hugo Krawczyk's HKDF:
* - https://eprint.iacr.org/2010/264.pdf
* - https://tools.ietf.org/html/rfc5869
*/
static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
size_t first_len, size_t second_len, size_t third_len,
size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
{
u8 output[BLAKE2S_HASH_SIZE + 1];
u8 secret[BLAKE2S_HASH_SIZE];
WARN_ON(IS_ENABLED(DEBUG) &&
(first_len > BLAKE2S_HASH_SIZE ||
second_len > BLAKE2S_HASH_SIZE ||
third_len > BLAKE2S_HASH_SIZE ||
((second_len || second_dst || third_len || third_dst) &&
(!first_len || !first_dst)) ||
((third_len || third_dst) && (!second_len || !second_dst))));
/* Extract entropy from data into secret */
blake2s_hmac(secret, data, chaining_key, BLAKE2S_HASH_SIZE, data_len,
NOISE_HASH_LEN);
if (!first_dst || !first_len)
goto out;
/* Expand first key: key = secret, data = 0x1 */
output[0] = 1;
blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE, 1,
BLAKE2S_HASH_SIZE);
memcpy(first_dst, output, first_len);
if (!second_dst || !second_len)
goto out;
/* Expand second key: key = secret, data = first-key || 0x2 */
output[BLAKE2S_HASH_SIZE] = 2;
blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE,
BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
memcpy(second_dst, output, second_len);
if (!third_dst || !third_len)
goto out;
/* Expand third key: key = secret, data = second-key || 0x3 */
output[BLAKE2S_HASH_SIZE] = 3;
blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE,
BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
memcpy(third_dst, output, third_len);
out:
/* Clear sensitive data from stack */
memzero_explicit(secret, BLAKE2S_HASH_SIZE);
memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
}
static void derive_keys(struct noise_symmetric_key *first_dst,
struct noise_symmetric_key *second_dst,
const u8 chaining_key[NOISE_HASH_LEN])
{
u64 birthdate = ktime_get_coarse_boottime_ns();
kdf(first_dst->key, second_dst->key, NULL, NULL,
NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
chaining_key);
first_dst->birthdate = second_dst->birthdate = birthdate;
first_dst->is_valid = second_dst->is_valid = true;
}
static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
u8 key[NOISE_SYMMETRIC_KEY_LEN],
const u8 private[NOISE_PUBLIC_KEY_LEN],
const u8 public[NOISE_PUBLIC_KEY_LEN])
{
u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
if (unlikely(!curve25519(dh_calculation, private, public)))
return false;
kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN,
NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
return true;
}
static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
u8 key[NOISE_SYMMETRIC_KEY_LEN],
const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
{
static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
return false;
kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
chaining_key);
return true;
}
static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
{
struct blake2s_state blake;
blake2s_init(&blake, NOISE_HASH_LEN);
blake2s_update(&blake, hash, NOISE_HASH_LEN);
blake2s_update(&blake, src, src_len);
blake2s_final(&blake, hash);
}
static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN],
u8 key[NOISE_SYMMETRIC_KEY_LEN],
const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
{
u8 temp_hash[NOISE_HASH_LEN];
kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN,
NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
mix_hash(hash, temp_hash, NOISE_HASH_LEN);
memzero_explicit(temp_hash, NOISE_HASH_LEN);
}
static void handshake_init(u8 chaining_key[NOISE_HASH_LEN],
u8 hash[NOISE_HASH_LEN],
const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
{
memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
}
static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext,
size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
u8 hash[NOISE_HASH_LEN])
{
chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash,
NOISE_HASH_LEN,
0 /* Always zero for Noise_IK */, key);
mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
}
static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext,
size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
u8 hash[NOISE_HASH_LEN])
{
if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len,
hash, NOISE_HASH_LEN,
0 /* Always zero for Noise_IK */, key))
return false;
mix_hash(hash, src_ciphertext, src_len);
return true;
}
static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN],
const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN],
u8 chaining_key[NOISE_HASH_LEN],
u8 hash[NOISE_HASH_LEN])
{
if (ephemeral_dst != ephemeral_src)
memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0,
NOISE_PUBLIC_KEY_LEN, chaining_key);
}
static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
{
struct timespec64 now;
ktime_get_real_ts64(&now);
/* In order to prevent some sort of infoleak from precise timers, we
* round down the nanoseconds part to the closest rounded-down power of
* two to the maximum initiations per second allowed anyway by the
* implementation.
*/
now.tv_nsec = ALIGN_DOWN(now.tv_nsec,
rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND));
/* https://cr.yp.to/libtai/tai64.html */
*(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec);
*(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec);
}
bool
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
struct noise_handshake *handshake)
{
u8 timestamp[NOISE_TIMESTAMP_LEN];
u8 key[NOISE_SYMMETRIC_KEY_LEN];
bool ret = false;
/* We need to wait for crng _before_ taking any locks, since
* curve25519_generate_secret uses get_random_bytes_wait.
*/
wait_for_random_bytes();
down_read(&handshake->static_identity->lock);
down_write(&handshake->lock);
if (unlikely(!handshake->static_identity->has_identity))
goto out;
dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
handshake_init(handshake->chaining_key, handshake->hash,
handshake->remote_static);
/* e */
curve25519_generate_secret(handshake->ephemeral_private);
if (!curve25519_generate_public(dst->unencrypted_ephemeral,
handshake->ephemeral_private))
goto out;
message_ephemeral(dst->unencrypted_ephemeral,
dst->unencrypted_ephemeral, handshake->chaining_key,
handshake->hash);
/* es */
if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private,
handshake->remote_static))
goto out;
/* s */
message_encrypt(dst->encrypted_static,
handshake->static_identity->static_public,
NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
/* ss */
if (!mix_precomputed_dh(handshake->chaining_key, key,
handshake->precomputed_static_static))
goto out;
/* {t} */
tai64n_now(timestamp);
message_encrypt(dst->encrypted_timestamp, timestamp,
NOISE_TIMESTAMP_LEN, key, handshake->hash);
dst->sender_index = wg_index_hashtable_insert(
handshake->entry.peer->device->index_hashtable,
&handshake->entry);
handshake->state = HANDSHAKE_CREATED_INITIATION;
ret = true;
out:
up_write(&handshake->lock);
up_read(&handshake->static_identity->lock);
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
return ret;
}
struct wg_peer *
wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
struct wg_device *wg)
{
struct wg_peer *peer = NULL, *ret_peer = NULL;
struct noise_handshake *handshake;
bool replay_attack, flood_attack;
u8 key[NOISE_SYMMETRIC_KEY_LEN];
u8 chaining_key[NOISE_HASH_LEN];
u8 hash[NOISE_HASH_LEN];
u8 s[NOISE_PUBLIC_KEY_LEN];
u8 e[NOISE_PUBLIC_KEY_LEN];
u8 t[NOISE_TIMESTAMP_LEN];
u64 initiation_consumption;
down_read(&wg->static_identity.lock);
if (unlikely(!wg->static_identity.has_identity))
goto out;
handshake_init(chaining_key, hash, wg->static_identity.static_public);
/* e */
message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
/* es */
if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
goto out;
/* s */
if (!message_decrypt(s, src->encrypted_static,
sizeof(src->encrypted_static), key, hash))
goto out;
/* Lookup which peer we're actually talking to */
peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s);
if (!peer)
goto out;
handshake = &peer->handshake;
/* ss */
if (!mix_precomputed_dh(chaining_key, key,
handshake->precomputed_static_static))
goto out;
/* {t} */
if (!message_decrypt(t, src->encrypted_timestamp,
sizeof(src->encrypted_timestamp), key, hash))
goto out;
down_read(&handshake->lock);
replay_attack = memcmp(t, handshake->latest_timestamp,
NOISE_TIMESTAMP_LEN) <= 0;
flood_attack = (s64)handshake->last_initiation_consumption +
NSEC_PER_SEC / INITIATIONS_PER_SECOND >
(s64)ktime_get_coarse_boottime_ns();
up_read(&handshake->lock);
if (replay_attack || flood_attack)
goto out;
/* Success! Copy everything to peer */
down_write(&handshake->lock);
memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0)
memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
memcpy(handshake->hash, hash, NOISE_HASH_LEN);
memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
handshake->remote_index = src->sender_index;
initiation_consumption = ktime_get_coarse_boottime_ns();
if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0)
handshake->last_initiation_consumption = initiation_consumption;
handshake->state = HANDSHAKE_CONSUMED_INITIATION;
up_write(&handshake->lock);
ret_peer = peer;
out:
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
memzero_explicit(hash, NOISE_HASH_LEN);
memzero_explicit(chaining_key, NOISE_HASH_LEN);
up_read(&wg->static_identity.lock);
if (!ret_peer)
wg_peer_put(peer);
return ret_peer;
}
bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
struct noise_handshake *handshake)
{
u8 key[NOISE_SYMMETRIC_KEY_LEN];
bool ret = false;
/* We need to wait for crng _before_ taking any locks, since
* curve25519_generate_secret uses get_random_bytes_wait.
*/
wait_for_random_bytes();
down_read(&handshake->static_identity->lock);
down_write(&handshake->lock);
if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
goto out;
dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
dst->receiver_index = handshake->remote_index;
/* e */
curve25519_generate_secret(handshake->ephemeral_private);
if (!curve25519_generate_public(dst->unencrypted_ephemeral,
handshake->ephemeral_private))
goto out;
message_ephemeral(dst->unencrypted_ephemeral,
dst->unencrypted_ephemeral, handshake->chaining_key,
handshake->hash);
/* ee */
if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
handshake->remote_ephemeral))
goto out;
/* se */
if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
handshake->remote_static))
goto out;
/* psk */
mix_psk(handshake->chaining_key, handshake->hash, key,
handshake->preshared_key);
/* {} */
message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
dst->sender_index = wg_index_hashtable_insert(
handshake->entry.peer->device->index_hashtable,
&handshake->entry);
handshake->state = HANDSHAKE_CREATED_RESPONSE;
ret = true;
out:
up_write(&handshake->lock);
up_read(&handshake->static_identity->lock);
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
return ret;
}
struct wg_peer *
wg_noise_handshake_consume_response(struct message_handshake_response *src,
struct wg_device *wg)
{
enum noise_handshake_state state = HANDSHAKE_ZEROED;
struct wg_peer *peer = NULL, *ret_peer = NULL;
struct noise_handshake *handshake;
u8 key[NOISE_SYMMETRIC_KEY_LEN];
u8 hash[NOISE_HASH_LEN];
u8 chaining_key[NOISE_HASH_LEN];
u8 e[NOISE_PUBLIC_KEY_LEN];
u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
u8 static_private[NOISE_PUBLIC_KEY_LEN];
u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
down_read(&wg->static_identity.lock);
if (unlikely(!wg->static_identity.has_identity))
goto out;
handshake = (struct noise_handshake *)wg_index_hashtable_lookup(
wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE,
src->receiver_index, &peer);
if (unlikely(!handshake))
goto out;
down_read(&handshake->lock);
state = handshake->state;
memcpy(hash, handshake->hash, NOISE_HASH_LEN);
memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
memcpy(ephemeral_private, handshake->ephemeral_private,
NOISE_PUBLIC_KEY_LEN);
memcpy(preshared_key, handshake->preshared_key,
NOISE_SYMMETRIC_KEY_LEN);
up_read(&handshake->lock);
if (state != HANDSHAKE_CREATED_INITIATION)
goto fail;
/* e */
message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
/* ee */
if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
goto fail;
/* se */
if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
goto fail;
/* psk */
mix_psk(chaining_key, hash, key, preshared_key);
/* {} */
if (!message_decrypt(NULL, src->encrypted_nothing,
sizeof(src->encrypted_nothing), key, hash))
goto fail;
/* Success! Copy everything to peer */
down_write(&handshake->lock);
/* It's important to check that the state is still the same, while we
* have an exclusive lock.
*/
if (handshake->state != state) {
up_write(&handshake->lock);
goto fail;
}
memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
memcpy(handshake->hash, hash, NOISE_HASH_LEN);
memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
handshake->remote_index = src->sender_index;
handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
up_write(&handshake->lock);
ret_peer = peer;
goto out;
fail:
wg_peer_put(peer);
out:
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
memzero_explicit(hash, NOISE_HASH_LEN);
memzero_explicit(chaining_key, NOISE_HASH_LEN);
memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN);
up_read(&wg->static_identity.lock);
return ret_peer;
}
bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
struct noise_keypairs *keypairs)
{
struct noise_keypair *new_keypair;
bool ret = false;
down_write(&handshake->lock);
if (handshake->state != HANDSHAKE_CREATED_RESPONSE &&
handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
goto out;
new_keypair = keypair_create(handshake->entry.peer);
if (!new_keypair)
goto out;
new_keypair->i_am_the_initiator = handshake->state ==
HANDSHAKE_CONSUMED_RESPONSE;
new_keypair->remote_index = handshake->remote_index;
if (new_keypair->i_am_the_initiator)
derive_keys(&new_keypair->sending, &new_keypair->receiving,
handshake->chaining_key);
else
derive_keys(&new_keypair->receiving, &new_keypair->sending,
handshake->chaining_key);
handshake_zero(handshake);
rcu_read_lock_bh();
if (likely(!READ_ONCE(container_of(handshake, struct wg_peer,
handshake)->is_dead))) {
add_new_keypair(keypairs, new_keypair);
net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n",
handshake->entry.peer->device->dev->name,
new_keypair->internal_id,
handshake->entry.peer->internal_id);
ret = wg_index_hashtable_replace(
handshake->entry.peer->device->index_hashtable,
&handshake->entry, &new_keypair->entry);
} else {
kfree_sensitive(new_keypair);
}
rcu_read_unlock_bh();
out:
up_write(&handshake->lock);
return ret;
}

135
net/wireguard/noise.h Normal file
View File

@ -0,0 +1,135 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_NOISE_H
#define _WG_NOISE_H
#include "messages.h"
#include "peerlookup.h"
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/mutex.h>
#include <linux/kref.h>
struct noise_replay_counter {
u64 counter;
spinlock_t lock;
unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
};
struct noise_symmetric_key {
u8 key[NOISE_SYMMETRIC_KEY_LEN];
u64 birthdate;
bool is_valid;
};
struct noise_keypair {
struct index_hashtable_entry entry;
struct noise_symmetric_key sending;
atomic64_t sending_counter;
struct noise_symmetric_key receiving;
struct noise_replay_counter receiving_counter;
__le32 remote_index;
bool i_am_the_initiator;
struct kref refcount;
struct rcu_head rcu;
u64 internal_id;
};
struct noise_keypairs {
struct noise_keypair __rcu *current_keypair;
struct noise_keypair __rcu *previous_keypair;
struct noise_keypair __rcu *next_keypair;
spinlock_t keypair_update_lock;
};
struct noise_static_identity {
u8 static_public[NOISE_PUBLIC_KEY_LEN];
u8 static_private[NOISE_PUBLIC_KEY_LEN];
struct rw_semaphore lock;
bool has_identity;
};
enum noise_handshake_state {
HANDSHAKE_ZEROED,
HANDSHAKE_CREATED_INITIATION,
HANDSHAKE_CONSUMED_INITIATION,
HANDSHAKE_CREATED_RESPONSE,
HANDSHAKE_CONSUMED_RESPONSE
};
struct noise_handshake {
struct index_hashtable_entry entry;
enum noise_handshake_state state;
u64 last_initiation_consumption;
struct noise_static_identity *static_identity;
u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
u8 remote_static[NOISE_PUBLIC_KEY_LEN];
u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
u8 hash[NOISE_HASH_LEN];
u8 chaining_key[NOISE_HASH_LEN];
u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
__le32 remote_index;
/* Protects all members except the immutable (after noise_handshake_
* init): remote_static, precomputed_static_static, static_identity.
*/
struct rw_semaphore lock;
};
struct wg_device;
void wg_noise_init(void);
void wg_noise_handshake_init(struct noise_handshake *handshake,
struct noise_static_identity *static_identity,
const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
struct wg_peer *peer);
void wg_noise_handshake_clear(struct noise_handshake *handshake);
static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
{
atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() -
(u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC);
}
void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now);
struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair);
void wg_noise_keypairs_clear(struct noise_keypairs *keypairs);
bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
struct noise_keypair *received_keypair);
void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
void wg_noise_set_static_identity_private_key(
struct noise_static_identity *static_identity,
const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
void wg_noise_precompute_static_static(struct wg_peer *peer);
bool
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
struct noise_handshake *handshake);
struct wg_peer *
wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
struct wg_device *wg);
bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
struct noise_handshake *handshake);
struct wg_peer *
wg_noise_handshake_consume_response(struct message_handshake_response *src,
struct wg_device *wg);
bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
struct noise_keypairs *keypairs);
#endif /* _WG_NOISE_H */

237
net/wireguard/peer.c Normal file
View File

@ -0,0 +1,237 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "peer.h"
#include "device.h"
#include "queueing.h"
#include "timers.h"
#include "peerlookup.h"
#include "noise.h"
#include <linux/kref.h>
#include <linux/lockdep.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
static atomic64_t peer_counter = ATOMIC64_INIT(0);
struct wg_peer *wg_peer_create(struct wg_device *wg,
const u8 public_key[NOISE_PUBLIC_KEY_LEN],
const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
{
struct wg_peer *peer;
int ret = -ENOMEM;
lockdep_assert_held(&wg->device_update_lock);
if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
return ERR_PTR(ret);
peer = kzalloc(sizeof(*peer), GFP_KERNEL);
if (unlikely(!peer))
return ERR_PTR(ret);
peer->device = wg;
wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
public_key, preshared_key, peer);
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err_1;
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
MAX_QUEUED_PACKETS))
goto err_2;
if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
MAX_QUEUED_PACKETS))
goto err_3;
peer->internal_id = atomic64_inc_return(&peer_counter);
peer->serial_work_cpu = nr_cpumask_bits;
wg_cookie_init(&peer->latest_cookie);
wg_timers_init(peer);
wg_cookie_checker_precompute_peer_keys(peer);
spin_lock_init(&peer->keypairs.keypair_update_lock);
INIT_WORK(&peer->transmit_handshake_work,
wg_packet_handshake_send_worker);
rwlock_init(&peer->endpoint_lock);
kref_init(&peer->refcount);
skb_queue_head_init(&peer->staged_packet_queue);
wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state);
netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll,
NAPI_POLL_WEIGHT);
napi_enable(&peer->napi);
list_add_tail(&peer->peer_list, &wg->peer_list);
INIT_LIST_HEAD(&peer->allowedips_list);
wg_pubkey_hashtable_add(wg->peer_hashtable, peer);
++wg->num_peers;
pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
return peer;
err_3:
wg_packet_queue_free(&peer->tx_queue, false);
err_2:
dst_cache_destroy(&peer->endpoint_cache);
err_1:
kfree(peer);
return ERR_PTR(ret);
}
struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer)
{
RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
"Taking peer reference without holding the RCU read lock");
if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
return NULL;
return peer;
}
static void peer_make_dead(struct wg_peer *peer)
{
/* Remove from configuration-time lookup structures. */
list_del_init(&peer->peer_list);
wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer,
&peer->device->device_update_lock);
wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer);
/* Mark as dead, so that we don't allow jumping contexts after. */
WRITE_ONCE(peer->is_dead, true);
/* The caller must now synchronize_rcu() for this to take effect. */
}
static void peer_remove_after_dead(struct wg_peer *peer)
{
WARN_ON(!peer->is_dead);
/* No more keypairs can be created for this peer, since is_dead protects
* add_new_keypair, so we can now destroy existing ones.
*/
wg_noise_keypairs_clear(&peer->keypairs);
/* Destroy all ongoing timers that were in-flight at the beginning of
* this function.
*/
wg_timers_stop(peer);
/* The transition between packet encryption/decryption queues isn't
* guarded by is_dead, but each reference's life is strictly bounded by
* two generations: once for parallel crypto and once for serial
* ingestion, so we can simply flush twice, and be sure that we no
* longer have references inside these queues.
*/
/* a) For encrypt/decrypt. */
flush_workqueue(peer->device->packet_crypt_wq);
/* b.1) For send (but not receive, since that's napi). */
flush_workqueue(peer->device->packet_crypt_wq);
/* b.2.1) For receive (but not send, since that's wq). */
napi_disable(&peer->napi);
/* b.2.1) It's now safe to remove the napi struct, which must be done
* here from process context.
*/
netif_napi_del(&peer->napi);
/* Ensure any workstructs we own (like transmit_handshake_work or
* clear_peer_work) no longer are in use.
*/
flush_workqueue(peer->device->handshake_send_wq);
/* After the above flushes, a peer might still be active in a few
* different contexts: 1) from xmit(), before hitting is_dead and
* returning, 2) from wg_packet_consume_data(), before hitting is_dead
* and returning, 3) from wg_receive_handshake_packet() after a point
* where it has processed an incoming handshake packet, but where
* all calls to pass it off to timers fails because of is_dead. We won't
* have new references in (1) eventually, because we're removed from
* allowedips; we won't have new references in (2) eventually, because
* wg_index_hashtable_lookup will always return NULL, since we removed
* all existing keypairs and no more can be created; we won't have new
* references in (3) eventually, because we're removed from the pubkey
* hash table, which allows for a maximum of one handshake response,
* via the still-uncleared index hashtable entry, but not more than one,
* and in wg_cookie_message_consume, the lookup eventually gets a peer
* with a refcount of zero, so no new reference is taken.
*/
--peer->device->num_peers;
wg_peer_put(peer);
}
/* We have a separate "remove" function make sure that all active places where
* a peer is currently operating will eventually come to an end and not pass
* their reference onto another context.
*/
void wg_peer_remove(struct wg_peer *peer)
{
if (unlikely(!peer))
return;
lockdep_assert_held(&peer->device->device_update_lock);
peer_make_dead(peer);
synchronize_rcu();
peer_remove_after_dead(peer);
}
void wg_peer_remove_all(struct wg_device *wg)
{
struct wg_peer *peer, *temp;
LIST_HEAD(dead_peers);
lockdep_assert_held(&wg->device_update_lock);
/* Avoid having to traverse individually for each one. */
wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
peer_make_dead(peer);
list_add_tail(&peer->peer_list, &dead_peers);
}
synchronize_rcu();
list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
peer_remove_after_dead(peer);
}
static void rcu_release(struct rcu_head *rcu)
{
struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
dst_cache_destroy(&peer->endpoint_cache);
wg_packet_queue_free(&peer->rx_queue, false);
wg_packet_queue_free(&peer->tx_queue, false);
/* The final zeroing takes care of clearing any remaining handshake key
* material and other potentially sensitive information.
*/
kfree_sensitive(peer);
}
static void kref_release(struct kref *refcount)
{
struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount);
pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n",
peer->device->dev->name, peer->internal_id,
&peer->endpoint.addr);
/* Remove ourself from dynamic runtime lookup structures, now that the
* last reference is gone.
*/
wg_index_hashtable_remove(peer->device->index_hashtable,
&peer->handshake.entry);
/* Remove any lingering packets that didn't have a chance to be
* transmitted.
*/
wg_packet_purge_staged_packets(peer);
/* Free the memory used. */
call_rcu(&peer->rcu, rcu_release);
}
void wg_peer_put(struct wg_peer *peer)
{
if (unlikely(!peer))
return;
kref_put(&peer->refcount, kref_release);
}

83
net/wireguard/peer.h Normal file
View File

@ -0,0 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_PEER_H
#define _WG_PEER_H
#include "device.h"
#include "noise.h"
#include "cookie.h"
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/spinlock.h>
#include <linux/kref.h>
#include <net/dst_cache.h>
struct wg_device;
struct endpoint {
union {
struct sockaddr addr;
struct sockaddr_in addr4;
struct sockaddr_in6 addr6;
};
union {
struct {
struct in_addr src4;
/* Essentially the same as addr6->scope_id */
int src_if4;
};
struct in6_addr src6;
};
};
struct wg_peer {
struct wg_device *device;
struct crypt_queue tx_queue, rx_queue;
struct sk_buff_head staged_packet_queue;
int serial_work_cpu;
struct noise_keypairs keypairs;
struct endpoint endpoint;
struct dst_cache endpoint_cache;
rwlock_t endpoint_lock;
struct noise_handshake handshake;
atomic64_t last_sent_handshake;
struct work_struct transmit_handshake_work, clear_peer_work;
struct cookie latest_cookie;
struct hlist_node pubkey_hash;
u64 rx_bytes, tx_bytes;
struct timer_list timer_retransmit_handshake, timer_send_keepalive;
struct timer_list timer_new_handshake, timer_zero_key_material;
struct timer_list timer_persistent_keepalive;
unsigned int timer_handshake_attempts;
u16 persistent_keepalive_interval;
bool timer_need_another_keepalive;
bool sent_lastminute_handshake;
struct timespec64 walltime_last_handshake;
struct kref refcount;
struct rcu_head rcu;
struct list_head peer_list;
struct list_head allowedips_list;
u64 internal_id;
struct napi_struct napi;
bool is_dead;
};
struct wg_peer *wg_peer_create(struct wg_device *wg,
const u8 public_key[NOISE_PUBLIC_KEY_LEN],
const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer);
static inline struct wg_peer *wg_peer_get(struct wg_peer *peer)
{
kref_get(&peer->refcount);
return peer;
}
void wg_peer_put(struct wg_peer *peer);
void wg_peer_remove(struct wg_peer *peer);
void wg_peer_remove_all(struct wg_device *wg);
#endif /* _WG_PEER_H */

226
net/wireguard/peerlookup.c Normal file
View File

@ -0,0 +1,226 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "peerlookup.h"
#include "peer.h"
#include "noise.h"
static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table,
const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
{
/* siphash gives us a secure 64bit number based on a random key. Since
* the bits are uniformly distributed, we can then mask off to get the
* bits we need.
*/
const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key);
return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)];
}
struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void)
{
struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;
get_random_bytes(&table->key, sizeof(table->key));
hash_init(table->hashtable);
mutex_init(&table->lock);
return table;
}
void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
struct wg_peer *peer)
{
mutex_lock(&table->lock);
hlist_add_head_rcu(&peer->pubkey_hash,
pubkey_bucket(table, peer->handshake.remote_static));
mutex_unlock(&table->lock);
}
void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
struct wg_peer *peer)
{
mutex_lock(&table->lock);
hlist_del_init_rcu(&peer->pubkey_hash);
mutex_unlock(&table->lock);
}
/* Returns a strong reference to a peer */
struct wg_peer *
wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
{
struct wg_peer *iter_peer, *peer = NULL;
rcu_read_lock_bh();
hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey),
pubkey_hash) {
if (!memcmp(pubkey, iter_peer->handshake.remote_static,
NOISE_PUBLIC_KEY_LEN)) {
peer = iter_peer;
break;
}
}
peer = wg_peer_get_maybe_zero(peer);
rcu_read_unlock_bh();
return peer;
}
static struct hlist_head *index_bucket(struct index_hashtable *table,
const __le32 index)
{
/* Since the indices are random and thus all bits are uniformly
* distributed, we can find its bucket simply by masking.
*/
return &table->hashtable[(__force u32)index &
(HASH_SIZE(table->hashtable) - 1)];
}
struct index_hashtable *wg_index_hashtable_alloc(void)
{
struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;
hash_init(table->hashtable);
spin_lock_init(&table->lock);
return table;
}
/* At the moment, we limit ourselves to 2^20 total peers, which generally might
* amount to 2^20*3 items in this hashtable. The algorithm below works by
* picking a random number and testing it. We can see that these limits mean we
* usually succeed pretty quickly:
*
* >>> def calculation(tries, size):
* ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32))
* ...
* >>> calculation(1, 2**20 * 3)
* 0.999267578125
* >>> calculation(2, 2**20 * 3)
* 0.0007318854331970215
* >>> calculation(3, 2**20 * 3)
* 5.360489012673497e-07
* >>> calculation(4, 2**20 * 3)
* 3.9261394135792216e-10
*
* At the moment, we don't do any masking, so this algorithm isn't exactly
* constant time in either the random guessing or in the hash list lookup. We
* could require a minimum of 3 tries, which would successfully mask the
* guessing. this would not, however, help with the growing hash lengths, which
* is another thing to consider moving forward.
*/
__le32 wg_index_hashtable_insert(struct index_hashtable *table,
struct index_hashtable_entry *entry)
{
struct index_hashtable_entry *existing_entry;
spin_lock_bh(&table->lock);
hlist_del_init_rcu(&entry->index_hash);
spin_unlock_bh(&table->lock);
rcu_read_lock_bh();
search_unused_slot:
/* First we try to find an unused slot, randomly, while unlocked. */
entry->index = (__force __le32)get_random_u32();
hlist_for_each_entry_rcu_bh(existing_entry,
index_bucket(table, entry->index),
index_hash) {
if (existing_entry->index == entry->index)
/* If it's already in use, we continue searching. */
goto search_unused_slot;
}
/* Once we've found an unused slot, we lock it, and then double-check
* that nobody else stole it from us.
*/
spin_lock_bh(&table->lock);
hlist_for_each_entry_rcu_bh(existing_entry,
index_bucket(table, entry->index),
index_hash) {
if (existing_entry->index == entry->index) {
spin_unlock_bh(&table->lock);
/* If it was stolen, we start over. */
goto search_unused_slot;
}
}
/* Otherwise, we know we have it exclusively (since we're locked),
* so we insert.
*/
hlist_add_head_rcu(&entry->index_hash,
index_bucket(table, entry->index));
spin_unlock_bh(&table->lock);
rcu_read_unlock_bh();
return entry->index;
}
bool wg_index_hashtable_replace(struct index_hashtable *table,
struct index_hashtable_entry *old,
struct index_hashtable_entry *new)
{
bool ret;
spin_lock_bh(&table->lock);
ret = !hlist_unhashed(&old->index_hash);
if (unlikely(!ret))
goto out;
new->index = old->index;
hlist_replace_rcu(&old->index_hash, &new->index_hash);
/* Calling init here NULLs out index_hash, and in fact after this
* function returns, it's theoretically possible for this to get
* reinserted elsewhere. That means the RCU lookup below might either
* terminate early or jump between buckets, in which case the packet
* simply gets dropped, which isn't terrible.
*/
INIT_HLIST_NODE(&old->index_hash);
out:
spin_unlock_bh(&table->lock);
return ret;
}
void wg_index_hashtable_remove(struct index_hashtable *table,
struct index_hashtable_entry *entry)
{
spin_lock_bh(&table->lock);
hlist_del_init_rcu(&entry->index_hash);
spin_unlock_bh(&table->lock);
}
/* Returns a strong reference to a entry->peer */
struct index_hashtable_entry *
wg_index_hashtable_lookup(struct index_hashtable *table,
const enum index_hashtable_type type_mask,
const __le32 index, struct wg_peer **peer)
{
struct index_hashtable_entry *iter_entry, *entry = NULL;
rcu_read_lock_bh();
hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index),
index_hash) {
if (iter_entry->index == index) {
if (likely(iter_entry->type & type_mask))
entry = iter_entry;
break;
}
}
if (likely(entry)) {
entry->peer = wg_peer_get_maybe_zero(entry->peer);
if (likely(entry->peer))
*peer = entry->peer;
else
entry = NULL;
}
rcu_read_unlock_bh();
return entry;
}

View File

@ -0,0 +1,64 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_PEERLOOKUP_H
#define _WG_PEERLOOKUP_H
#include "messages.h"
#include <linux/hashtable.h>
#include <linux/mutex.h>
#include <linux/siphash.h>
struct wg_peer;
struct pubkey_hashtable {
/* TODO: move to rhashtable */
DECLARE_HASHTABLE(hashtable, 11);
siphash_key_t key;
struct mutex lock;
};
struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void);
void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
struct wg_peer *peer);
void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
struct wg_peer *peer);
struct wg_peer *
wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
struct index_hashtable {
/* TODO: move to rhashtable */
DECLARE_HASHTABLE(hashtable, 13);
spinlock_t lock;
};
enum index_hashtable_type {
INDEX_HASHTABLE_HANDSHAKE = 1U << 0,
INDEX_HASHTABLE_KEYPAIR = 1U << 1
};
struct index_hashtable_entry {
struct wg_peer *peer;
struct hlist_node index_hash;
enum index_hashtable_type type;
__le32 index;
};
struct index_hashtable *wg_index_hashtable_alloc(void);
__le32 wg_index_hashtable_insert(struct index_hashtable *table,
struct index_hashtable_entry *entry);
bool wg_index_hashtable_replace(struct index_hashtable *table,
struct index_hashtable_entry *old,
struct index_hashtable_entry *new);
void wg_index_hashtable_remove(struct index_hashtable *table,
struct index_hashtable_entry *entry);
struct index_hashtable_entry *
wg_index_hashtable_lookup(struct index_hashtable *table,
const enum index_hashtable_type type_mask,
const __le32 index, struct wg_peer **peer);
#endif /* _WG_PEERLOOKUP_H */

55
net/wireguard/queueing.c Normal file
View File

@ -0,0 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "queueing.h"
struct multicore_worker __percpu *
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
{
int cpu;
struct multicore_worker __percpu *worker =
alloc_percpu(struct multicore_worker);
if (!worker)
return NULL;
for_each_possible_cpu(cpu) {
per_cpu_ptr(worker, cpu)->ptr = ptr;
INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function);
}
return worker;
}
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
bool multicore, unsigned int len)
{
int ret;
memset(queue, 0, sizeof(*queue));
ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
if (ret)
return ret;
if (function) {
if (multicore) {
queue->worker = wg_packet_percpu_multicore_worker_alloc(
function, queue);
if (!queue->worker) {
ptr_ring_cleanup(&queue->ring, NULL);
return -ENOMEM;
}
} else {
INIT_WORK(&queue->work, function);
}
}
return 0;
}
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
{
if (multicore)
free_percpu(queue->worker);
WARN_ON(!__ptr_ring_empty(&queue->ring));
ptr_ring_cleanup(&queue->ring, NULL);
}

196
net/wireguard/queueing.h Normal file
View File

@ -0,0 +1,196 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_QUEUEING_H
#define _WG_QUEUEING_H
#include "peer.h"
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/ip_tunnels.h>
struct wg_device;
struct wg_peer;
struct multicore_worker;
struct crypt_queue;
struct sk_buff;
/* queueing.c APIs: */
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
bool multicore, unsigned int len);
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
struct multicore_worker __percpu *
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
/* receive.c APIs: */
void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb);
void wg_packet_handshake_receive_worker(struct work_struct *work);
/* NAPI poll function: */
int wg_packet_rx_poll(struct napi_struct *napi, int budget);
/* Workqueue worker: */
void wg_packet_decrypt_worker(struct work_struct *work);
/* send.c APIs: */
void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
bool is_retry);
void wg_packet_send_handshake_response(struct wg_peer *peer);
void wg_packet_send_handshake_cookie(struct wg_device *wg,
struct sk_buff *initiating_skb,
__le32 sender_index);
void wg_packet_send_keepalive(struct wg_peer *peer);
void wg_packet_purge_staged_packets(struct wg_peer *peer);
void wg_packet_send_staged_packets(struct wg_peer *peer);
/* Workqueue workers: */
void wg_packet_handshake_send_worker(struct work_struct *work);
void wg_packet_tx_worker(struct work_struct *work);
void wg_packet_encrypt_worker(struct work_struct *work);
enum packet_state {
PACKET_STATE_UNCRYPTED,
PACKET_STATE_CRYPTED,
PACKET_STATE_DEAD
};
struct packet_cb {
u64 nonce;
struct noise_keypair *keypair;
atomic_t state;
u32 mtu;
u8 ds;
};
#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
static inline bool wg_check_packet_protocol(struct sk_buff *skb)
{
__be16 real_protocol = ip_tunnel_parse_protocol(skb);
return real_protocol && skb->protocol == real_protocol;
}
static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating)
{
const int pfmemalloc = skb->pfmemalloc;
u32 hash = skb->hash;
u8 l4_hash = skb->l4_hash;
u8 sw_hash = skb->sw_hash;
skb_scrub_packet(skb, true);
memset(&skb->headers_start, 0,
offsetof(struct sk_buff, headers_end) -
offsetof(struct sk_buff, headers_start));
skb->pfmemalloc = pfmemalloc;
if (encapsulating) {
skb->hash = hash;
skb->l4_hash = l4_hash;
skb->sw_hash = sw_hash;
}
skb->queue_mapping = 0;
skb->nohdr = 0;
skb->peeked = 0;
skb->mac_len = 0;
skb->dev = NULL;
#ifdef CONFIG_NET_SCHED
skb->tc_index = 0;
#endif
skb_reset_redirect(skb);
skb->hdr_len = skb_headroom(skb);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb_probe_transport_header(skb);
skb_reset_inner_headers(skb);
}
static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
{
unsigned int cpu = *stored_cpu, cpu_index, i;
if (unlikely(cpu == nr_cpumask_bits ||
!cpumask_test_cpu(cpu, cpu_online_mask))) {
cpu_index = id % cpumask_weight(cpu_online_mask);
cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < cpu_index; ++i)
cpu = cpumask_next(cpu, cpu_online_mask);
*stored_cpu = cpu;
}
return cpu;
}
/* This function is racy, in the sense that next is unlocked, so it could return
* the same CPU twice. A race-free version of this would be to instead store an
* atomic sequence number, do an increment-and-return, and then iterate through
* every possible CPU until we get to that index -- choose_cpu. However that's
* a bit slower, and it doesn't seem like this potential race actually
* introduces any performance loss, so we live with it.
*/
static inline int wg_cpumask_next_online(int *next)
{
int cpu = *next;
while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
*next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
return cpu;
}
static inline int wg_queue_enqueue_per_device_and_peer(
struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
{
int cpu;
atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
/* We first queue this up for the peer ingestion, but the consumer
* will wait for the state to change to CRYPTED or DEAD before.
*/
if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
return -ENOSPC;
/* Then we queue it up in the device queue, which consumes the
* packet as soon as it can.
*/
cpu = wg_cpumask_next_online(next_cpu);
if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
return -EPIPE;
queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
return 0;
}
static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
struct sk_buff *skb,
enum packet_state state)
{
/* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us.
*/
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
atomic_set_release(&PACKET_CB(skb)->state, state);
queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
peer->internal_id),
peer->device->packet_crypt_wq, &queue->work);
wg_peer_put(peer);
}
static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
enum packet_state state)
{
/* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us.
*/
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
atomic_set_release(&PACKET_CB(skb)->state, state);
napi_schedule(&peer->napi);
wg_peer_put(peer);
}
#ifdef DEBUG
bool wg_packet_counter_selftest(void);
#endif
#endif /* _WG_QUEUEING_H */

235
net/wireguard/ratelimiter.c Normal file
View File

@ -0,0 +1,235 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifdef COMPAT_CANNOT_DEPRECIATE_BH_RCU
/* We normally alias all non-_bh functions to the _bh ones in the compat layer,
* but that's not appropriate here, where we actually do want non-_bh ones.
*/
#undef synchronize_rcu
#define synchronize_rcu old_synchronize_rcu
#undef call_rcu
#define call_rcu old_call_rcu
#undef rcu_barrier
#define rcu_barrier old_rcu_barrier
#endif
#include "ratelimiter.h"
#include <linux/siphash.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <net/ip.h>
static struct kmem_cache *entry_cache;
static hsiphash_key_t key;
static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
static DEFINE_MUTEX(init_lock);
static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
static atomic_t total_entries = ATOMIC_INIT(0);
static unsigned int max_entries, table_size;
static void wg_ratelimiter_gc_entries(struct work_struct *);
static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
static struct hlist_head *table_v4;
#if IS_ENABLED(CONFIG_IPV6)
static struct hlist_head *table_v6;
#endif
struct ratelimiter_entry {
u64 last_time_ns, tokens, ip;
void *net;
spinlock_t lock;
struct hlist_node hash;
struct rcu_head rcu;
};
enum {
PACKETS_PER_SECOND = 20,
PACKETS_BURSTABLE = 5,
PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
};
static void entry_free(struct rcu_head *rcu)
{
kmem_cache_free(entry_cache,
container_of(rcu, struct ratelimiter_entry, rcu));
atomic_dec(&total_entries);
}
static void entry_uninit(struct ratelimiter_entry *entry)
{
hlist_del_rcu(&entry->hash);
call_rcu(&entry->rcu, entry_free);
}
/* Calling this function with a NULL work uninits all entries. */
static void wg_ratelimiter_gc_entries(struct work_struct *work)
{
const u64 now = ktime_get_coarse_boottime_ns();
struct ratelimiter_entry *entry;
struct hlist_node *temp;
unsigned int i;
for (i = 0; i < table_size; ++i) {
spin_lock(&table_lock);
hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
if (unlikely(!work) ||
now - entry->last_time_ns > NSEC_PER_SEC)
entry_uninit(entry);
}
#if IS_ENABLED(CONFIG_IPV6)
hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
if (unlikely(!work) ||
now - entry->last_time_ns > NSEC_PER_SEC)
entry_uninit(entry);
}
#endif
spin_unlock(&table_lock);
if (likely(work))
cond_resched();
}
if (likely(work))
queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
}
bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
{
/* We only take the bottom half of the net pointer, so that we can hash
* 3 words in the end. This way, siphash's len param fits into the final
* u32, and we don't incur an extra round.
*/
const u32 net_word = (unsigned long)net;
struct ratelimiter_entry *entry;
struct hlist_head *bucket;
u64 ip;
if (skb->protocol == htons(ETH_P_IP)) {
ip = (u64 __force)ip_hdr(skb)->saddr;
bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
(table_size - 1)];
}
#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
/* Only use 64 bits, so as to ratelimit the whole /64. */
memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
(table_size - 1)];
}
#endif
else
return false;
rcu_read_lock();
hlist_for_each_entry_rcu(entry, bucket, hash) {
if (entry->net == net && entry->ip == ip) {
u64 now, tokens;
bool ret;
/* Quasi-inspired by nft_limit.c, but this is actually a
* slightly different algorithm. Namely, we incorporate
* the burst as part of the maximum tokens, rather than
* as part of the rate.
*/
spin_lock(&entry->lock);
now = ktime_get_coarse_boottime_ns();
tokens = min_t(u64, TOKEN_MAX,
entry->tokens + now -
entry->last_time_ns);
entry->last_time_ns = now;
ret = tokens >= PACKET_COST;
entry->tokens = ret ? tokens - PACKET_COST : tokens;
spin_unlock(&entry->lock);
rcu_read_unlock();
return ret;
}
}
rcu_read_unlock();
if (atomic_inc_return(&total_entries) > max_entries)
goto err_oom;
entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
if (unlikely(!entry))
goto err_oom;
entry->net = net;
entry->ip = ip;
INIT_HLIST_NODE(&entry->hash);
spin_lock_init(&entry->lock);
entry->last_time_ns = ktime_get_coarse_boottime_ns();
entry->tokens = TOKEN_MAX - PACKET_COST;
spin_lock(&table_lock);
hlist_add_head_rcu(&entry->hash, bucket);
spin_unlock(&table_lock);
return true;
err_oom:
atomic_dec(&total_entries);
return false;
}
int wg_ratelimiter_init(void)
{
mutex_lock(&init_lock);
if (++init_refcnt != 1)
goto out;
entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
if (!entry_cache)
goto err;
/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
* but what it shares in common is that it uses a massive hashtable. So,
* we borrow their wisdom about good table sizes on different systems
* dependent on RAM. This calculation here comes from there.
*/
table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
max_t(unsigned long, 16, roundup_pow_of_two(
(totalram_pages() << PAGE_SHIFT) /
(1U << 14) / sizeof(struct hlist_head)));
max_entries = table_size * 8;
table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
if (unlikely(!table_v4))
goto err_kmemcache;
#if IS_ENABLED(CONFIG_IPV6)
table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
if (unlikely(!table_v6)) {
kvfree(table_v4);
goto err_kmemcache;
}
#endif
queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
get_random_bytes(&key, sizeof(key));
out:
mutex_unlock(&init_lock);
return 0;
err_kmemcache:
kmem_cache_destroy(entry_cache);
err:
--init_refcnt;
mutex_unlock(&init_lock);
return -ENOMEM;
}
void wg_ratelimiter_uninit(void)
{
mutex_lock(&init_lock);
if (!init_refcnt || --init_refcnt)
goto out;
cancel_delayed_work_sync(&gc_work);
wg_ratelimiter_gc_entries(NULL);
rcu_barrier();
kvfree(table_v4);
#if IS_ENABLED(CONFIG_IPV6)
kvfree(table_v6);
#endif
kmem_cache_destroy(entry_cache);
out:
mutex_unlock(&init_lock);
}
#include "selftest/ratelimiter.c"

View File

@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifndef _WG_RATELIMITER_H
#define _WG_RATELIMITER_H
#include <linux/skbuff.h>
int wg_ratelimiter_init(void);
void wg_ratelimiter_uninit(void);
bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net);
#ifdef DEBUG
bool wg_ratelimiter_selftest(void);
#endif
#endif /* _WG_RATELIMITER_H */

599
net/wireguard/receive.c Normal file
View File

@ -0,0 +1,599 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "queueing.h"
#include "device.h"
#include "peer.h"
#include "timers.h"
#include "messages.h"
#include "cookie.h"
#include "socket.h"
#include <linux/simd.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/udp.h>
#include <net/ip_tunnels.h>
/* Must be called with bh disabled. */
static void update_rx_stats(struct wg_peer *peer, size_t len)
{
struct pcpu_sw_netstats *tstats =
get_cpu_ptr(peer->device->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
++tstats->rx_packets;
tstats->rx_bytes += len;
peer->rx_bytes += len;
u64_stats_update_end(&tstats->syncp);
put_cpu_ptr(tstats);
}
#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
static size_t validate_header_len(struct sk_buff *skb)
{
if (unlikely(skb->len < sizeof(struct message_header)))
return 0;
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) &&
skb->len >= MESSAGE_MINIMUM_LENGTH)
return sizeof(struct message_data);
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) &&
skb->len == sizeof(struct message_handshake_initiation))
return sizeof(struct message_handshake_initiation);
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) &&
skb->len == sizeof(struct message_handshake_response))
return sizeof(struct message_handshake_response);
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) &&
skb->len == sizeof(struct message_handshake_cookie))
return sizeof(struct message_handshake_cookie);
return 0;
}
static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
{
size_t data_offset, data_len, header_len;
struct udphdr *udp;
if (unlikely(!wg_check_packet_protocol(skb) ||
skb_transport_header(skb) < skb->head ||
(skb_transport_header(skb) + sizeof(struct udphdr)) >
skb_tail_pointer(skb)))
return -EINVAL; /* Bogus IP header */
udp = udp_hdr(skb);
data_offset = (u8 *)udp - skb->data;
if (unlikely(data_offset > U16_MAX ||
data_offset + sizeof(struct udphdr) > skb->len))
/* Packet has offset at impossible location or isn't big enough
* to have UDP fields.
*/
return -EINVAL;
data_len = ntohs(udp->len);
if (unlikely(data_len < sizeof(struct udphdr) ||
data_len > skb->len - data_offset))
/* UDP packet is reporting too small of a size or lying about
* its size.
*/
return -EINVAL;
data_len -= sizeof(struct udphdr);
data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
if (unlikely(!pskb_may_pull(skb,
data_offset + sizeof(struct message_header)) ||
pskb_trim(skb, data_len + data_offset) < 0))
return -EINVAL;
skb_pull(skb, data_offset);
if (unlikely(skb->len != data_len))
/* Final len does not agree with calculated len */
return -EINVAL;
header_len = validate_header_len(skb);
if (unlikely(!header_len))
return -EINVAL;
__skb_push(skb, data_offset);
if (unlikely(!pskb_may_pull(skb, data_offset + header_len)))
return -EINVAL;
__skb_pull(skb, data_offset);
return 0;
}
static void wg_receive_handshake_packet(struct wg_device *wg,
struct sk_buff *skb)
{
enum cookie_mac_state mac_state;
struct wg_peer *peer = NULL;
/* This is global, so that our load calculation applies to the whole
* system. We don't care about races with it at all.
*/
static u64 last_under_load;
bool packet_needs_cookie;
bool under_load;
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) {
net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n",
wg->dev->name, skb);
wg_cookie_message_consume(
(struct message_handshake_cookie *)skb->data, wg);
return;
}
under_load = skb_queue_len(&wg->incoming_handshakes) >=
MAX_QUEUED_INCOMING_HANDSHAKES / 8;
if (under_load) {
last_under_load = ktime_get_coarse_boottime_ns();
} else if (last_under_load) {
under_load = !wg_birthdate_has_expired(last_under_load, 1);
if (!under_load)
last_under_load = 0;
}
mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
under_load);
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
(!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) {
packet_needs_cookie = false;
} else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) {
packet_needs_cookie = true;
} else {
net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n",
wg->dev->name, skb);
return;
}
switch (SKB_TYPE_LE32(skb)) {
case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): {
struct message_handshake_initiation *message =
(struct message_handshake_initiation *)skb->data;
if (packet_needs_cookie) {
wg_packet_send_handshake_cookie(wg, skb,
message->sender_index);
return;
}
peer = wg_noise_handshake_consume_initiation(message, wg);
if (unlikely(!peer)) {
net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n",
wg->dev->name, skb);
return;
}
wg_socket_set_peer_endpoint_from_skb(peer, skb);
net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n",
wg->dev->name, peer->internal_id,
&peer->endpoint.addr);
wg_packet_send_handshake_response(peer);
break;
}
case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): {
struct message_handshake_response *message =
(struct message_handshake_response *)skb->data;
if (packet_needs_cookie) {
wg_packet_send_handshake_cookie(wg, skb,
message->sender_index);
return;
}
peer = wg_noise_handshake_consume_response(message, wg);
if (unlikely(!peer)) {
net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n",
wg->dev->name, skb);
return;
}
wg_socket_set_peer_endpoint_from_skb(peer, skb);
net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n",
wg->dev->name, peer->internal_id,
&peer->endpoint.addr);
if (wg_noise_handshake_begin_session(&peer->handshake,
&peer->keypairs)) {
wg_timers_session_derived(peer);
wg_timers_handshake_complete(peer);
/* Calling this function will either send any existing
* packets in the queue and not send a keepalive, which
* is the best case, Or, if there's nothing in the
* queue, it will send a keepalive, in order to give
* immediate confirmation of the session.
*/
wg_packet_send_keepalive(peer);
}
break;
}
}
if (unlikely(!peer)) {
WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
return;
}
local_bh_disable();
update_rx_stats(peer, skb->len);
local_bh_enable();
wg_timers_any_authenticated_packet_received(peer);
wg_timers_any_authenticated_packet_traversal(peer);
wg_peer_put(peer);
}
void wg_packet_handshake_receive_worker(struct work_struct *work)
{
struct wg_device *wg = container_of(work, struct multicore_worker,
work)->ptr;
struct sk_buff *skb;
while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
wg_receive_handshake_packet(wg, skb);
dev_kfree_skb(skb);
cond_resched();
}
}
static void keep_key_fresh(struct wg_peer *peer)
{
struct noise_keypair *keypair;
bool send;
if (peer->sent_lastminute_handshake)
return;
rcu_read_lock_bh();
keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
send = keypair && READ_ONCE(keypair->sending.is_valid) &&
keypair->i_am_the_initiator &&
wg_birthdate_has_expired(keypair->sending.birthdate,
REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT);
rcu_read_unlock_bh();
if (unlikely(send)) {
peer->sent_lastminute_handshake = true;
wg_packet_send_queued_handshake_initiation(peer, false);
}
}
static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair,
simd_context_t *simd_context)
{
struct scatterlist sg[MAX_SKB_FRAGS + 8];
struct sk_buff *trailer;
unsigned int offset;
int num_frags;
if (unlikely(!keypair))
return false;
if (unlikely(!READ_ONCE(keypair->receiving.is_valid) ||
wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) ||
keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) {
WRITE_ONCE(keypair->receiving.is_valid, false);
return false;
}
PACKET_CB(skb)->nonce =
le64_to_cpu(((struct message_data *)skb->data)->counter);
/* We ensure that the network header is part of the packet before we
* call skb_cow_data, so that there's no chance that data is removed
* from the skb, so that later we can extract the original endpoint.
*/
offset = skb->data - skb_network_header(skb);
skb_push(skb, offset);
num_frags = skb_cow_data(skb, 0, &trailer);
offset += sizeof(struct message_data);
skb_pull(skb, offset);
if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
return false;
sg_init_table(sg, num_frags);
if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
return false;
if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
PACKET_CB(skb)->nonce,
keypair->receiving.key,
simd_context))
return false;
/* Another ugly situation of pushing and pulling the header so as to
* keep endpoint information intact.
*/
skb_push(skb, offset);
if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
return false;
skb_pull(skb, offset);
return true;
}
/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter)
{
unsigned long index, index_current, top, i;
bool ret = false;
spin_lock_bh(&counter->lock);
if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 ||
their_counter >= REJECT_AFTER_MESSAGES))
goto out;
++their_counter;
if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
counter->counter))
goto out;
index = their_counter >> ilog2(BITS_PER_LONG);
if (likely(their_counter > counter->counter)) {
index_current = counter->counter >> ilog2(BITS_PER_LONG);
top = min_t(unsigned long, index - index_current,
COUNTER_BITS_TOTAL / BITS_PER_LONG);
for (i = 1; i <= top; ++i)
counter->backtrack[(i + index_current) &
((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
counter->counter = their_counter;
}
index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
&counter->backtrack[index]);
out:
spin_unlock_bh(&counter->lock);
return ret;
}
#include "selftest/counter.c"
static void wg_packet_consume_data_done(struct wg_peer *peer,
struct sk_buff *skb,
struct endpoint *endpoint)
{
struct net_device *dev = peer->device->dev;
unsigned int len, len_before_trim;
struct wg_peer *routed_peer;
wg_socket_set_peer_endpoint(peer, endpoint);
if (unlikely(wg_noise_received_with_keypair(&peer->keypairs,
PACKET_CB(skb)->keypair))) {
wg_timers_handshake_complete(peer);
wg_packet_send_staged_packets(peer);
}
keep_key_fresh(peer);
wg_timers_any_authenticated_packet_received(peer);
wg_timers_any_authenticated_packet_traversal(peer);
/* A packet with length 0 is a keepalive packet */
if (unlikely(!skb->len)) {
update_rx_stats(peer, message_data_len(0));
net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n",
dev->name, peer->internal_id,
&peer->endpoint.addr);
goto packet_processed;
}
wg_timers_data_received(peer);
if (unlikely(skb_network_header(skb) < skb->head))
goto dishonest_packet_size;
if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) &&
(ip_hdr(skb)->version == 4 ||
(ip_hdr(skb)->version == 6 &&
pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
goto dishonest_packet_type;
skb->dev = dev;
/* We've already verified the Poly1305 auth tag, which means this packet
* was not modified in transit. We can therefore tell the networking
* stack that all checksums of every layer of encapsulation have already
* been checked "by the hardware" and therefore is unnecessary to check
* again in software.
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
#ifndef COMPAT_CANNOT_USE_CSUM_LEVEL
skb->csum_level = ~0; /* All levels */
#endif
skb->protocol = ip_tunnel_parse_protocol(skb);
if (skb->protocol == htons(ETH_P_IP)) {
len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr)))
goto dishonest_packet_size;
INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
len = ntohs(ipv6_hdr(skb)->payload_len) +
sizeof(struct ipv6hdr);
INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb)));
} else {
goto dishonest_packet_type;
}
if (unlikely(len > skb->len))
goto dishonest_packet_size;
len_before_trim = skb->len;
if (unlikely(pskb_trim(skb, len)))
goto packet_processed;
routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips,
skb);
wg_peer_put(routed_peer); /* We don't need the extra reference. */
if (unlikely(routed_peer != peer))
goto dishonest_packet_peer;
napi_gro_receive(&peer->napi, skb);
update_rx_stats(peer, message_data_len(len_before_trim));
return;
dishonest_packet_peer:
net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
dev->name, skb, peer->internal_id,
&peer->endpoint.addr);
++dev->stats.rx_errors;
++dev->stats.rx_frame_errors;
goto packet_processed;
dishonest_packet_type:
net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
dev->name, peer->internal_id, &peer->endpoint.addr);
++dev->stats.rx_errors;
++dev->stats.rx_frame_errors;
goto packet_processed;
dishonest_packet_size:
net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
dev->name, peer->internal_id, &peer->endpoint.addr);
++dev->stats.rx_errors;
++dev->stats.rx_length_errors;
goto packet_processed;
packet_processed:
dev_kfree_skb(skb);
}
int wg_packet_rx_poll(struct napi_struct *napi, int budget)
{
struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
struct crypt_queue *queue = &peer->rx_queue;
struct noise_keypair *keypair;
struct endpoint endpoint;
enum packet_state state;
struct sk_buff *skb;
int work_done = 0;
bool free;
if (unlikely(budget <= 0))
return 0;
while ((skb = __ptr_ring_peek(&queue->ring)) != NULL &&
(state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
PACKET_STATE_UNCRYPTED) {
__ptr_ring_discard_one(&queue->ring);
peer = PACKET_PEER(skb);
keypair = PACKET_CB(skb)->keypair;
free = true;
if (unlikely(state != PACKET_STATE_CRYPTED))
goto next;
if (unlikely(!counter_validate(&keypair->receiving_counter,
PACKET_CB(skb)->nonce))) {
net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
peer->device->dev->name,
PACKET_CB(skb)->nonce,
keypair->receiving_counter.counter);
goto next;
}
if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
goto next;
wg_reset_packet(skb, false);
wg_packet_consume_data_done(peer, skb, &endpoint);
free = false;
next:
wg_noise_keypair_put(keypair, false);
wg_peer_put(peer);
if (unlikely(free))
dev_kfree_skb(skb);
if (++work_done >= budget)
break;
}
if (work_done < budget)
napi_complete_done(napi, work_done);
return work_done;
}
void wg_packet_decrypt_worker(struct work_struct *work)
{
struct crypt_queue *queue = container_of(work, struct multicore_worker,
work)->ptr;
simd_context_t simd_context;
struct sk_buff *skb;
simd_get(&simd_context);
while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
enum packet_state state =
likely(decrypt_packet(skb, PACKET_CB(skb)->keypair,
&simd_context)) ?
PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
wg_queue_enqueue_per_peer_napi(skb, state);
simd_relax(&simd_context);
}
simd_put(&simd_context);
}
static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
{
__le32 idx = ((struct message_data *)skb->data)->key_idx;
struct wg_peer *peer = NULL;
int ret;
rcu_read_lock_bh();
PACKET_CB(skb)->keypair =
(struct noise_keypair *)wg_index_hashtable_lookup(
wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx,
&peer);
if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair)))
goto err_keypair;
if (unlikely(READ_ONCE(peer->is_dead)))
goto err;
ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue,
&peer->rx_queue, skb,
wg->packet_crypt_wq,
&wg->decrypt_queue.last_cpu);
if (unlikely(ret == -EPIPE))
wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD);
if (likely(!ret || ret == -EPIPE)) {
rcu_read_unlock_bh();
return;
}
err:
wg_noise_keypair_put(PACKET_CB(skb)->keypair, false);
err_keypair:
rcu_read_unlock_bh();
wg_peer_put(peer);
dev_kfree_skb(skb);
}
void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
{
if (unlikely(prepare_skb_header(skb, wg) < 0))
goto err;
switch (SKB_TYPE_LE32(skb)) {
case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
int cpu;
if (skb_queue_len(&wg->incoming_handshakes) >
MAX_QUEUED_INCOMING_HANDSHAKES ||
unlikely(!rng_is_initialized())) {
net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
wg->dev->name, skb);
goto err;
}
skb_queue_tail(&wg->incoming_handshakes, skb);
/* Queues up a call to packet_process_queued_handshake_
* packets(skb):
*/
cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
queue_work_on(cpu, wg->handshake_receive_wq,
&per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
break;
}
case cpu_to_le32(MESSAGE_DATA):
PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
wg_packet_consume_data(wg, skb);
break;
default:
WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
goto err;
}
return;
err:
dev_kfree_skb(skb);
}

View File

@ -0,0 +1,683 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*
* This contains some basic static unit tests for the allowedips data structure.
* It also has two additional modes that are disabled and meant to be used by
* folks directly playing with this file. If you define the macro
* DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in
* memory, it will be printed out as KERN_DEBUG in a format that can be passed
* to graphviz (the dot command) to visualize it. If you define the macro
* DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of
* randomized tests done against a trivial implementation, which may take
* upwards of a half-hour to complete. There's no set of users who should be
* enabling these, and the only developers that should go anywhere near these
* nobs are the ones who are reading this comment.
*/
#ifdef DEBUG
#include <linux/siphash.h>
static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
u8 cidr)
{
swap_endian(dst, src, bits);
memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
if (cidr)
dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
}
static __init void print_node(struct allowedips_node *node, u8 bits)
{
char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
char *fmt_declaration = KERN_DEBUG
"\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
char *style = "dotted";
u8 ip1[16], ip2[16];
u32 color = 0;
if (bits == 32) {
fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
fmt_declaration = KERN_DEBUG
"\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
} else if (bits == 128) {
fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
fmt_declaration = KERN_DEBUG
"\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
}
if (node->peer) {
hsiphash_key_t key = { { 0 } };
memcpy(&key, &node->peer, sizeof(node->peer));
color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 |
hsiphash_1u32(0xbabecafe, &key) % 200 << 8 |
hsiphash_1u32(0xabad1dea, &key) % 200;
style = "bold";
}
swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
printk(fmt_declaration, ip1, node->cidr, style, color);
if (node->bit[0]) {
swap_endian_and_apply_cidr(ip2,
rcu_dereference_raw(node->bit[0])->bits, bits,
node->cidr);
printk(fmt_connection, ip1, node->cidr, ip2,
rcu_dereference_raw(node->bit[0])->cidr);
print_node(rcu_dereference_raw(node->bit[0]), bits);
}
if (node->bit[1]) {
swap_endian_and_apply_cidr(ip2,
rcu_dereference_raw(node->bit[1])->bits,
bits, node->cidr);
printk(fmt_connection, ip1, node->cidr, ip2,
rcu_dereference_raw(node->bit[1])->cidr);
print_node(rcu_dereference_raw(node->bit[1]), bits);
}
}
static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
{
printk(KERN_DEBUG "digraph trie {\n");
print_node(rcu_dereference_raw(top), bits);
printk(KERN_DEBUG "}\n");
}
enum {
NUM_PEERS = 2000,
NUM_RAND_ROUTES = 400,
NUM_MUTATED_ROUTES = 100,
NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30
};
struct horrible_allowedips {
struct hlist_head head;
};
struct horrible_allowedips_node {
struct hlist_node table;
union nf_inet_addr ip;
union nf_inet_addr mask;
u8 ip_version;
void *value;
};
static __init void horrible_allowedips_init(struct horrible_allowedips *table)
{
INIT_HLIST_HEAD(&table->head);
}
static __init void horrible_allowedips_free(struct horrible_allowedips *table)
{
struct horrible_allowedips_node *node;
struct hlist_node *h;
hlist_for_each_entry_safe(node, h, &table->head, table) {
hlist_del(&node->table);
kfree(node);
}
}
static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
{
union nf_inet_addr mask;
memset(&mask, 0x00, 128 / 8);
memset(&mask, 0xff, cidr / 8);
if (cidr % 32)
mask.all[cidr / 32] = (__force u32)htonl(
(0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
return mask;
}
static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet)
{
return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) +
hweight32(subnet.all[2]) + hweight32(subnet.all[3]);
}
static __init inline void
horrible_mask_self(struct horrible_allowedips_node *node)
{
if (node->ip_version == 4) {
node->ip.ip &= node->mask.ip;
} else if (node->ip_version == 6) {
node->ip.ip6[0] &= node->mask.ip6[0];
node->ip.ip6[1] &= node->mask.ip6[1];
node->ip.ip6[2] &= node->mask.ip6[2];
node->ip.ip6[3] &= node->mask.ip6[3];
}
}
static __init inline bool
horrible_match_v4(const struct horrible_allowedips_node *node,
struct in_addr *ip)
{
return (ip->s_addr & node->mask.ip) == node->ip.ip;
}
static __init inline bool
horrible_match_v6(const struct horrible_allowedips_node *node,
struct in6_addr *ip)
{
return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
node->ip.ip6[0] &&
(ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
node->ip.ip6[1] &&
(ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
node->ip.ip6[2] &&
(ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
}
static __init void
horrible_insert_ordered(struct horrible_allowedips *table,
struct horrible_allowedips_node *node)
{
struct horrible_allowedips_node *other = NULL, *where = NULL;
u8 my_cidr = horrible_mask_to_cidr(node->mask);
hlist_for_each_entry(other, &table->head, table) {
if (!memcmp(&other->mask, &node->mask,
sizeof(union nf_inet_addr)) &&
!memcmp(&other->ip, &node->ip,
sizeof(union nf_inet_addr)) &&
other->ip_version == node->ip_version) {
other->value = node->value;
kfree(node);
return;
}
where = other;
if (horrible_mask_to_cidr(other->mask) <= my_cidr)
break;
}
if (!other && !where)
hlist_add_head(&node->table, &table->head);
else if (!other)
hlist_add_behind(&node->table, &where->table);
else
hlist_add_before(&node->table, &where->table);
}
static __init int
horrible_allowedips_insert_v4(struct horrible_allowedips *table,
struct in_addr *ip, u8 cidr, void *value)
{
struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
GFP_KERNEL);
if (unlikely(!node))
return -ENOMEM;
node->ip.in = *ip;
node->mask = horrible_cidr_to_mask(cidr);
node->ip_version = 4;
node->value = value;
horrible_mask_self(node);
horrible_insert_ordered(table, node);
return 0;
}
static __init int
horrible_allowedips_insert_v6(struct horrible_allowedips *table,
struct in6_addr *ip, u8 cidr, void *value)
{
struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
GFP_KERNEL);
if (unlikely(!node))
return -ENOMEM;
node->ip.in6 = *ip;
node->mask = horrible_cidr_to_mask(cidr);
node->ip_version = 6;
node->value = value;
horrible_mask_self(node);
horrible_insert_ordered(table, node);
return 0;
}
static __init void *
horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
struct in_addr *ip)
{
struct horrible_allowedips_node *node;
void *ret = NULL;
hlist_for_each_entry(node, &table->head, table) {
if (node->ip_version != 4)
continue;
if (horrible_match_v4(node, ip)) {
ret = node->value;
break;
}
}
return ret;
}
static __init void *
horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
struct in6_addr *ip)
{
struct horrible_allowedips_node *node;
void *ret = NULL;
hlist_for_each_entry(node, &table->head, table) {
if (node->ip_version != 6)
continue;
if (horrible_match_v6(node, ip)) {
ret = node->value;
break;
}
}
return ret;
}
static __init bool randomized_test(void)
{
unsigned int i, j, k, mutate_amount, cidr;
u8 ip[16], mutate_mask[16], mutated[16];
struct wg_peer **peers, *peer;
struct horrible_allowedips h;
DEFINE_MUTEX(mutex);
struct allowedips t;
bool ret = false;
mutex_init(&mutex);
wg_allowedips_init(&t);
horrible_allowedips_init(&h);
peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL);
if (unlikely(!peers)) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free;
}
for (i = 0; i < NUM_PEERS; ++i) {
peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL);
if (unlikely(!peers[i])) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free;
}
kref_init(&peers[i]->refcount);
}
mutex_lock(&mutex);
for (i = 0; i < NUM_RAND_ROUTES; ++i) {
prandom_bytes(ip, 4);
cidr = prandom_u32_max(32) + 1;
peer = peers[prandom_u32_max(NUM_PEERS)];
if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr,
peer, &mutex) < 0) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free_locked;
}
if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip,
cidr, peer) < 0) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free_locked;
}
for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
memcpy(mutated, ip, 4);
prandom_bytes(mutate_mask, 4);
mutate_amount = prandom_u32_max(32);
for (k = 0; k < mutate_amount / 8; ++k)
mutate_mask[k] = 0xff;
mutate_mask[k] = 0xff
<< ((8 - (mutate_amount % 8)) % 8);
for (; k < 4; ++k)
mutate_mask[k] = 0;
for (k = 0; k < 4; ++k)
mutated[k] = (mutated[k] & mutate_mask[k]) |
(~mutate_mask[k] &
prandom_u32_max(256));
cidr = prandom_u32_max(32) + 1;
peer = peers[prandom_u32_max(NUM_PEERS)];
if (wg_allowedips_insert_v4(&t,
(struct in_addr *)mutated,
cidr, peer, &mutex) < 0) {
pr_err("allowedips random malloc: FAIL\n");
goto free_locked;
}
if (horrible_allowedips_insert_v4(&h,
(struct in_addr *)mutated, cidr, peer)) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free_locked;
}
}
}
for (i = 0; i < NUM_RAND_ROUTES; ++i) {
prandom_bytes(ip, 16);
cidr = prandom_u32_max(128) + 1;
peer = peers[prandom_u32_max(NUM_PEERS)];
if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr,
peer, &mutex) < 0) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free_locked;
}
if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip,
cidr, peer) < 0) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free_locked;
}
for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
memcpy(mutated, ip, 16);
prandom_bytes(mutate_mask, 16);
mutate_amount = prandom_u32_max(128);
for (k = 0; k < mutate_amount / 8; ++k)
mutate_mask[k] = 0xff;
mutate_mask[k] = 0xff
<< ((8 - (mutate_amount % 8)) % 8);
for (; k < 4; ++k)
mutate_mask[k] = 0;
for (k = 0; k < 4; ++k)
mutated[k] = (mutated[k] & mutate_mask[k]) |
(~mutate_mask[k] &
prandom_u32_max(256));
cidr = prandom_u32_max(128) + 1;
peer = peers[prandom_u32_max(NUM_PEERS)];
if (wg_allowedips_insert_v6(&t,
(struct in6_addr *)mutated,
cidr, peer, &mutex) < 0) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free_locked;
}
if (horrible_allowedips_insert_v6(
&h, (struct in6_addr *)mutated, cidr,
peer)) {
pr_err("allowedips random self-test malloc: FAIL\n");
goto free_locked;
}
}
}
mutex_unlock(&mutex);
if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
print_tree(t.root4, 32);
print_tree(t.root6, 128);
}
for (i = 0; i < NUM_QUERIES; ++i) {
prandom_bytes(ip, 4);
if (lookup(t.root4, 32, ip) !=
horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
pr_err("allowedips random self-test: FAIL\n");
goto free;
}
}
for (i = 0; i < NUM_QUERIES; ++i) {
prandom_bytes(ip, 16);
if (lookup(t.root6, 128, ip) !=
horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
pr_err("allowedips random self-test: FAIL\n");
goto free;
}
}
ret = true;
free:
mutex_lock(&mutex);
free_locked:
wg_allowedips_free(&t, &mutex);
mutex_unlock(&mutex);
horrible_allowedips_free(&h);
if (peers) {
for (i = 0; i < NUM_PEERS; ++i)
kfree(peers[i]);
}
kfree(peers);
return ret;
}
static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
{
static struct in_addr ip;
u8 *split = (u8 *)&ip;
split[0] = a;
split[1] = b;
split[2] = c;
split[3] = d;
return &ip;
}
static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
{
static struct in6_addr ip;
__be32 *split = (__be32 *)&ip;
split[0] = cpu_to_be32(a);
split[1] = cpu_to_be32(b);
split[2] = cpu_to_be32(c);
split[3] = cpu_to_be32(d);
return &ip;
}
static __init struct wg_peer *init_peer(void)
{
struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL);
if (!peer)
return NULL;
kref_init(&peer->refcount);
INIT_LIST_HEAD(&peer->allowedips_list);
return peer;
}
#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \
wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \
cidr, mem, &mutex)
#define maybe_fail() do { \
++i; \
if (!_s) { \
pr_info("allowedips self-test %zu: FAIL\n", i); \
success = false; \
} \
} while (0)
#define test(version, mem, ipa, ipb, ipc, ipd) do { \
bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
ip##version(ipa, ipb, ipc, ipd)) == (mem); \
maybe_fail(); \
} while (0)
#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \
bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
ip##version(ipa, ipb, ipc, ipd)) != (mem); \
maybe_fail(); \
} while (0)
#define test_boolean(cond) do { \
bool _s = (cond); \
maybe_fail(); \
} while (0)
bool __init wg_allowedips_selftest(void)
{
bool found_a = false, found_b = false, found_c = false, found_d = false,
found_e = false, found_other = false;
struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(),
*d = init_peer(), *e = init_peer(), *f = init_peer(),
*g = init_peer(), *h = init_peer();
struct allowedips_node *iter_node;
bool success = false;
struct allowedips t;
DEFINE_MUTEX(mutex);
struct in6_addr ip;
size_t i = 0, count = 0;
__be64 part;
mutex_init(&mutex);
mutex_lock(&mutex);
wg_allowedips_init(&t);
if (!a || !b || !c || !d || !e || !f || !g || !h) {
pr_err("allowedips self-test malloc: FAIL\n");
goto free;
}
insert(4, a, 192, 168, 4, 0, 24);
insert(4, b, 192, 168, 4, 4, 32);
insert(4, c, 192, 168, 0, 0, 16);
insert(4, d, 192, 95, 5, 64, 27);
/* replaces previous entry, and maskself is required */
insert(4, c, 192, 95, 5, 65, 27);
insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
insert(4, e, 0, 0, 0, 0, 0);
insert(6, e, 0, 0, 0, 0, 0);
/* replaces previous entry */
insert(6, f, 0, 0, 0, 0, 0);
insert(6, g, 0x24046800, 0, 0, 0, 32);
/* maskself is required */
insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64);
insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
insert(4, g, 64, 15, 112, 0, 20);
/* maskself is required */
insert(4, h, 64, 15, 123, 211, 25);
insert(4, a, 10, 0, 0, 0, 25);
insert(4, b, 10, 0, 0, 128, 25);
insert(4, a, 10, 1, 0, 0, 30);
insert(4, b, 10, 1, 0, 4, 30);
insert(4, c, 10, 1, 0, 8, 29);
insert(4, d, 10, 1, 0, 16, 29);
if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
print_tree(t.root4, 32);
print_tree(t.root6, 128);
}
success = true;
test(4, a, 192, 168, 4, 20);
test(4, a, 192, 168, 4, 0);
test(4, b, 192, 168, 4, 4);
test(4, c, 192, 168, 200, 182);
test(4, c, 192, 95, 5, 68);
test(4, e, 192, 95, 5, 96);
test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
test(6, f, 0x26075300, 0x60006b01, 0, 0);
test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
test(6, h, 0x24046800, 0x40040800, 0, 0);
test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
test(4, g, 64, 15, 116, 26);
test(4, g, 64, 15, 127, 3);
test(4, g, 64, 15, 123, 1);
test(4, h, 64, 15, 123, 128);
test(4, h, 64, 15, 123, 129);
test(4, a, 10, 0, 0, 52);
test(4, b, 10, 0, 0, 220);
test(4, a, 10, 1, 0, 2);
test(4, b, 10, 1, 0, 6);
test(4, c, 10, 1, 0, 10);
test(4, d, 10, 1, 0, 20);
insert(4, a, 1, 0, 0, 0, 32);
insert(4, a, 64, 0, 0, 0, 32);
insert(4, a, 128, 0, 0, 0, 32);
insert(4, a, 192, 0, 0, 0, 32);
insert(4, a, 255, 0, 0, 0, 32);
wg_allowedips_remove_by_peer(&t, a, &mutex);
test_negative(4, a, 1, 0, 0, 0);
test_negative(4, a, 64, 0, 0, 0);
test_negative(4, a, 128, 0, 0, 0);
test_negative(4, a, 192, 0, 0, 0);
test_negative(4, a, 255, 0, 0, 0);
wg_allowedips_free(&t, &mutex);
wg_allowedips_init(&t);
insert(4, a, 192, 168, 0, 0, 16);
insert(4, a, 192, 168, 0, 0, 24);
wg_allowedips_remove_by_peer(&t, a, &mutex);
test_negative(4, a, 192, 168, 0, 1);
/* These will hit the WARN_ON(len >= 128) in free_node if something
* goes wrong.
*/
for (i = 0; i < 128; ++i) {
part = cpu_to_be64(~(1LLU << (i % 64)));
memset(&ip, 0xff, 16);
memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
}
wg_allowedips_free(&t, &mutex);
wg_allowedips_init(&t);
insert(4, a, 192, 95, 5, 93, 27);
insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
insert(4, a, 10, 1, 0, 20, 29);
insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83);
insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21);
list_for_each_entry(iter_node, &a->allowedips_list, peer_list) {
u8 cidr, ip[16] __aligned(__alignof(u64));
int family = wg_allowedips_read_node(iter_node, ip, &cidr);
count++;
if (cidr == 27 && family == AF_INET &&
!memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr)))
found_a = true;
else if (cidr == 128 && family == AF_INET6 &&
!memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543),
sizeof(struct in6_addr)))
found_b = true;
else if (cidr == 29 && family == AF_INET &&
!memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr)))
found_c = true;
else if (cidr == 83 && family == AF_INET6 &&
!memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0),
sizeof(struct in6_addr)))
found_d = true;
else if (cidr == 21 && family == AF_INET6 &&
!memcmp(ip, ip6(0x26075000, 0, 0, 0),
sizeof(struct in6_addr)))
found_e = true;
else
found_other = true;
}
test_boolean(count == 5);
test_boolean(found_a);
test_boolean(found_b);
test_boolean(found_c);
test_boolean(found_d);
test_boolean(found_e);
test_boolean(!found_other);
if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success)
success = randomized_test();
if (success)
pr_info("allowedips self-tests: pass\n");
free:
wg_allowedips_free(&t, &mutex);
kfree(a);
kfree(b);
kfree(c);
kfree(d);
kfree(e);
kfree(f);
kfree(g);
kfree(h);
mutex_unlock(&mutex);
return success;
}
#undef test_negative
#undef test
#undef remove
#undef insert
#undef init_peer
#endif

View File

@ -0,0 +1,111 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifdef DEBUG
bool __init wg_packet_counter_selftest(void)
{
struct noise_replay_counter *counter;
unsigned int test_num = 0, i;
bool success = true;
counter = kmalloc(sizeof(*counter), GFP_KERNEL);
if (unlikely(!counter)) {
pr_err("nonce counter self-test malloc: FAIL\n");
return false;
}
#define T_INIT do { \
memset(counter, 0, sizeof(*counter)); \
spin_lock_init(&counter->lock); \
} while (0)
#define T_LIM (COUNTER_WINDOW_SIZE + 1)
#define T(n, v) do { \
++test_num; \
if (counter_validate(counter, n) != (v)) { \
pr_err("nonce counter self-test %u: FAIL\n", \
test_num); \
success = false; \
} \
} while (0)
T_INIT;
/* 1 */ T(0, true);
/* 2 */ T(1, true);
/* 3 */ T(1, false);
/* 4 */ T(9, true);
/* 5 */ T(8, true);
/* 6 */ T(7, true);
/* 7 */ T(7, false);
/* 8 */ T(T_LIM, true);
/* 9 */ T(T_LIM - 1, true);
/* 10 */ T(T_LIM - 1, false);
/* 11 */ T(T_LIM - 2, true);
/* 12 */ T(2, true);
/* 13 */ T(2, false);
/* 14 */ T(T_LIM + 16, true);
/* 15 */ T(3, false);
/* 16 */ T(T_LIM + 16, false);
/* 17 */ T(T_LIM * 4, true);
/* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
/* 19 */ T(10, false);
/* 20 */ T(T_LIM * 4 - T_LIM, false);
/* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
/* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
/* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
/* 24 */ T(0, false);
/* 25 */ T(REJECT_AFTER_MESSAGES, false);
/* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
/* 27 */ T(REJECT_AFTER_MESSAGES, false);
/* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
/* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
/* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
/* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
/* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
/* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
/* 34 */ T(0, false);
T_INIT;
for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
T(i, true);
T(0, true);
T(0, false);
T_INIT;
for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
T(i, true);
T(1, true);
T(0, false);
T_INIT;
for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;)
T(i, true);
T_INIT;
for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;)
T(i, true);
T(0, false);
T_INIT;
for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
T(i, true);
T(COUNTER_WINDOW_SIZE + 1, true);
T(0, false);
T_INIT;
for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
T(i, true);
T(0, true);
T(COUNTER_WINDOW_SIZE + 1, true);
#undef T
#undef T_LIM
#undef T_INIT
if (success)
pr_info("nonce counter self-tests: pass\n");
kfree(counter);
return success;
}
#endif

View File

@ -0,0 +1,226 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#ifdef DEBUG
#include <linux/jiffies.h>
static const struct {
bool result;
unsigned int msec_to_sleep_before;
} expected_results[] __initconst = {
[0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
[PACKETS_BURSTABLE] = { false, 0 },
[PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
[PACKETS_BURSTABLE + 2] = { false, 0 },
[PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
[PACKETS_BURSTABLE + 4] = { true, 0 },
[PACKETS_BURSTABLE + 5] = { false, 0 }
};
static __init unsigned int maximum_jiffies_at_index(int index)
{
unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
int i;
for (i = 0; i <= index; ++i)
total_msecs += expected_results[i].msec_to_sleep_before;
return msecs_to_jiffies(total_msecs);
}
static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
struct sk_buff *skb6, struct ipv6hdr *hdr6,
int *test)
{
unsigned long loop_start_time;
int i;
wg_ratelimiter_gc_entries(NULL);
rcu_barrier();
loop_start_time = jiffies;
for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
if (expected_results[i].msec_to_sleep_before)
msleep(expected_results[i].msec_to_sleep_before);
if (time_is_before_jiffies(loop_start_time +
maximum_jiffies_at_index(i)))
return -ETIMEDOUT;
if (wg_ratelimiter_allow(skb4, &init_net) !=
expected_results[i].result)
return -EXFULL;
++(*test);
hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
if (time_is_before_jiffies(loop_start_time +
maximum_jiffies_at_index(i)))
return -ETIMEDOUT;
if (!wg_ratelimiter_allow(skb4, &init_net))
return -EXFULL;
++(*test);
hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
#if IS_ENABLED(CONFIG_IPV6)
hdr6->saddr.in6_u.u6_addr32[2] = htonl(i);
hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
if (time_is_before_jiffies(loop_start_time +
maximum_jiffies_at_index(i)))
return -ETIMEDOUT;
if (wg_ratelimiter_allow(skb6, &init_net) !=
expected_results[i].result)
return -EXFULL;
++(*test);
hdr6->saddr.in6_u.u6_addr32[0] =
htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
if (time_is_before_jiffies(loop_start_time +
maximum_jiffies_at_index(i)))
return -ETIMEDOUT;
if (!wg_ratelimiter_allow(skb6, &init_net))
return -EXFULL;
++(*test);
hdr6->saddr.in6_u.u6_addr32[0] =
htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
if (time_is_before_jiffies(loop_start_time +
maximum_jiffies_at_index(i)))
return -ETIMEDOUT;
#endif
}
return 0;
}
static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4,
int *test)
{
int i;
wg_ratelimiter_gc_entries(NULL);
rcu_barrier();
if (atomic_read(&total_entries))
return -EXFULL;
++(*test);
for (i = 0; i <= max_entries; ++i) {
hdr4->saddr = htonl(i);
if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries))
return -EXFULL;
++(*test);
}
return 0;
}
bool __init wg_ratelimiter_selftest(void)
{
enum { TRIALS_BEFORE_GIVING_UP = 5000 };
bool success = false;
int test = 0, trials;
struct sk_buff *skb4, *skb6 = NULL;
struct iphdr *hdr4;
struct ipv6hdr *hdr6 = NULL;
if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
return true;
BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
if (wg_ratelimiter_init())
goto out;
++test;
if (wg_ratelimiter_init()) {
wg_ratelimiter_uninit();
goto out;
}
++test;
if (wg_ratelimiter_init()) {
wg_ratelimiter_uninit();
wg_ratelimiter_uninit();
goto out;
}
++test;
skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
if (unlikely(!skb4))
goto err_nofree;
skb4->protocol = htons(ETH_P_IP);
hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4));
hdr4->saddr = htonl(8182);
skb_reset_network_header(skb4);
++test;
#if IS_ENABLED(CONFIG_IPV6)
skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
if (unlikely(!skb6)) {
kfree_skb(skb4);
goto err_nofree;
}
skb6->protocol = htons(ETH_P_IPV6);
hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6));
hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
skb_reset_network_header(skb6);
++test;
#endif
for (trials = TRIALS_BEFORE_GIVING_UP;;) {
int test_count = 0, ret;
ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
if (ret == -ETIMEDOUT) {
if (!trials--) {
test += test_count;
goto err;
}
msleep(500);
continue;
} else if (ret < 0) {
test += test_count;
goto err;
} else {
test += test_count;
break;
}
}
for (trials = TRIALS_BEFORE_GIVING_UP;;) {
int test_count = 0;
if (capacity_test(skb4, hdr4, &test_count) < 0) {
if (!trials--) {
test += test_count;
goto err;
}
msleep(50);
continue;
}
test += test_count;
break;
}
success = true;
err:
kfree_skb(skb4);
#if IS_ENABLED(CONFIG_IPV6)
kfree_skb(skb6);
#endif
err_nofree:
wg_ratelimiter_uninit();
wg_ratelimiter_uninit();
wg_ratelimiter_uninit();
/* Uninit one extra time to check underflow detection. */
wg_ratelimiter_uninit();
out:
if (success)
pr_info("ratelimiter self-tests: pass\n");
else
pr_err("ratelimiter self-test %d: FAIL\n", test);
return success;
}
#endif

429
net/wireguard/send.c Normal file
View File

@ -0,0 +1,429 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "queueing.h"
#include "timers.h"
#include "device.h"
#include "peer.h"
#include "socket.h"
#include "messages.h"
#include "cookie.h"
#include <linux/simd.h>
#include <linux/uio.h>
#include <linux/inetdevice.h>
#include <linux/socket.h>
#include <net/ip_tunnels.h>
#include <net/udp.h>
#include <net/sock.h>
static void wg_packet_send_handshake_initiation(struct wg_peer *peer)
{
struct message_handshake_initiation packet;
if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
REKEY_TIMEOUT))
return; /* This function is rate limited. */
atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n",
peer->device->dev->name, peer->internal_id,
&peer->endpoint.addr);
if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) {
wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
wg_timers_any_authenticated_packet_traversal(peer);
wg_timers_any_authenticated_packet_sent(peer);
atomic64_set(&peer->last_sent_handshake,
ktime_get_coarse_boottime_ns());
wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet),
HANDSHAKE_DSCP);
wg_timers_handshake_initiated(peer);
}
}
void wg_packet_handshake_send_worker(struct work_struct *work)
{
struct wg_peer *peer = container_of(work, struct wg_peer,
transmit_handshake_work);
wg_packet_send_handshake_initiation(peer);
wg_peer_put(peer);
}
void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
bool is_retry)
{
if (!is_retry)
peer->timer_handshake_attempts = 0;
rcu_read_lock_bh();
/* We check last_sent_handshake here in addition to the actual function
* we're queueing up, so that we don't queue things if not strictly
* necessary:
*/
if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
REKEY_TIMEOUT) ||
unlikely(READ_ONCE(peer->is_dead)))
goto out;
wg_peer_get(peer);
/* Queues up calling packet_send_queued_handshakes(peer), where we do a
* peer_put(peer) after:
*/
if (!queue_work(peer->device->handshake_send_wq,
&peer->transmit_handshake_work))
/* If the work was already queued, we want to drop the
* extra reference:
*/
wg_peer_put(peer);
out:
rcu_read_unlock_bh();
}
void wg_packet_send_handshake_response(struct wg_peer *peer)
{
struct message_handshake_response packet;
atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n",
peer->device->dev->name, peer->internal_id,
&peer->endpoint.addr);
if (wg_noise_handshake_create_response(&packet, &peer->handshake)) {
wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
if (wg_noise_handshake_begin_session(&peer->handshake,
&peer->keypairs)) {
wg_timers_session_derived(peer);
wg_timers_any_authenticated_packet_traversal(peer);
wg_timers_any_authenticated_packet_sent(peer);
atomic64_set(&peer->last_sent_handshake,
ktime_get_coarse_boottime_ns());
wg_socket_send_buffer_to_peer(peer, &packet,
sizeof(packet),
HANDSHAKE_DSCP);
}
}
}
void wg_packet_send_handshake_cookie(struct wg_device *wg,
struct sk_buff *initiating_skb,
__le32 sender_index)
{
struct message_handshake_cookie packet;
net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n",
wg->dev->name, initiating_skb);
wg_cookie_message_create(&packet, initiating_skb, sender_index,
&wg->cookie_checker);
wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet,
sizeof(packet));
}
static void keep_key_fresh(struct wg_peer *peer)
{
struct noise_keypair *keypair;
bool send;
rcu_read_lock_bh();
keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
send = keypair && READ_ONCE(keypair->sending.is_valid) &&
(atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES ||
(keypair->i_am_the_initiator &&
wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME)));
rcu_read_unlock_bh();
if (unlikely(send))
wg_packet_send_queued_handshake_initiation(peer, false);
}
static unsigned int calculate_skb_padding(struct sk_buff *skb)
{
unsigned int padded_size, last_unit = skb->len;
if (unlikely(!PACKET_CB(skb)->mtu))
return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
/* We do this modulo business with the MTU, just in case the networking
* layer gives us a packet that's bigger than the MTU. In that case, we
* wouldn't want the final subtraction to overflow in the case of the
* padded_size being clamped. Fortunately, that's very rarely the case,
* so we optimize for that not happening.
*/
if (unlikely(last_unit > PACKET_CB(skb)->mtu))
last_unit %= PACKET_CB(skb)->mtu;
padded_size = min(PACKET_CB(skb)->mtu,
ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
return padded_size - last_unit;
}
static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair,
simd_context_t *simd_context)
{
unsigned int padding_len, plaintext_len, trailer_len;
struct scatterlist sg[MAX_SKB_FRAGS + 8];
struct message_data *header;
struct sk_buff *trailer;
int num_frags;
/* Force hash calculation before encryption so that flow analysis is
* consistent over the inner packet.
*/
skb_get_hash(skb);
/* Calculate lengths. */
padding_len = calculate_skb_padding(skb);
trailer_len = padding_len + noise_encrypted_len(0);
plaintext_len = skb->len + padding_len;
/* Expand data section to have room for padding and auth tag. */
num_frags = skb_cow_data(skb, trailer_len, &trailer);
if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
return false;
/* Set the padding to zeros, and make sure it and the auth tag are part
* of the skb.
*/
memset(skb_tail_pointer(trailer), 0, padding_len);
/* Expand head section to have room for our header and the network
* stack's headers.
*/
if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
return false;
/* Finalize checksum calculation for the inner packet, if required. */
if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL &&
skb_checksum_help(skb)))
return false;
/* Only after checksumming can we safely add on the padding at the end
* and the header.
*/
skb_set_inner_network_header(skb, 0);
header = (struct message_data *)skb_push(skb, sizeof(*header));
header->header.type = cpu_to_le32(MESSAGE_DATA);
header->key_idx = keypair->remote_index;
header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
pskb_put(skb, trailer, trailer_len);
/* Now we can encrypt the scattergather segments */
sg_init_table(sg, num_frags);
if (skb_to_sgvec(skb, sg, sizeof(struct message_data),
noise_encrypted_len(plaintext_len)) <= 0)
return false;
return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0,
PACKET_CB(skb)->nonce,
keypair->sending.key,
simd_context);
}
void wg_packet_send_keepalive(struct wg_peer *peer)
{
struct sk_buff *skb;
if (skb_queue_empty(&peer->staged_packet_queue)) {
skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
GFP_ATOMIC);
if (unlikely(!skb))
return;
skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
skb->dev = peer->device->dev;
PACKET_CB(skb)->mtu = skb->dev->mtu;
skb_queue_tail(&peer->staged_packet_queue, skb);
net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n",
peer->device->dev->name, peer->internal_id,
&peer->endpoint.addr);
}
wg_packet_send_staged_packets(peer);
}
static void wg_packet_create_data_done(struct sk_buff *first,
struct wg_peer *peer)
{
struct sk_buff *skb, *next;
bool is_keepalive, data_sent = false;
wg_timers_any_authenticated_packet_traversal(peer);
wg_timers_any_authenticated_packet_sent(peer);
skb_list_walk_safe(first, skb, next) {
is_keepalive = skb->len == message_data_len(0);
if (likely(!wg_socket_send_skb_to_peer(peer, skb,
PACKET_CB(skb)->ds) && !is_keepalive))
data_sent = true;
}
if (likely(data_sent))
wg_timers_data_sent(peer);
keep_key_fresh(peer);
}
void wg_packet_tx_worker(struct work_struct *work)
{
struct crypt_queue *queue = container_of(work, struct crypt_queue,
work);
struct noise_keypair *keypair;
enum packet_state state;
struct sk_buff *first;
struct wg_peer *peer;
while ((first = __ptr_ring_peek(&queue->ring)) != NULL &&
(state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
PACKET_STATE_UNCRYPTED) {
__ptr_ring_discard_one(&queue->ring);
peer = PACKET_PEER(first);
keypair = PACKET_CB(first)->keypair;
if (likely(state == PACKET_STATE_CRYPTED))
wg_packet_create_data_done(first, peer);
else
kfree_skb_list(first);
wg_noise_keypair_put(keypair, false);
wg_peer_put(peer);
if (need_resched())
cond_resched();
}
}
void wg_packet_encrypt_worker(struct work_struct *work)
{
struct crypt_queue *queue = container_of(work, struct multicore_worker,
work)->ptr;
struct sk_buff *first, *skb, *next;
simd_context_t simd_context;
simd_get(&simd_context);
while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
enum packet_state state = PACKET_STATE_CRYPTED;
skb_list_walk_safe(first, skb, next) {
if (likely(encrypt_packet(skb,
PACKET_CB(first)->keypair,
&simd_context))) {
wg_reset_packet(skb, true);
} else {
state = PACKET_STATE_DEAD;
break;
}
}
wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
state);
simd_relax(&simd_context);
}
simd_put(&simd_context);
}
static void wg_packet_create_data(struct sk_buff *first)
{
struct wg_peer *peer = PACKET_PEER(first);
struct wg_device *wg = peer->device;
int ret = -EINVAL;
rcu_read_lock_bh();
if (unlikely(READ_ONCE(peer->is_dead)))
goto err;
ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue,
&peer->tx_queue, first,
wg->packet_crypt_wq,
&wg->encrypt_queue.last_cpu);
if (unlikely(ret == -EPIPE))
wg_queue_enqueue_per_peer(&peer->tx_queue, first,
PACKET_STATE_DEAD);
err:
rcu_read_unlock_bh();
if (likely(!ret || ret == -EPIPE))
return;
wg_noise_keypair_put(PACKET_CB(first)->keypair, false);
wg_peer_put(peer);
kfree_skb_list(first);
}
void wg_packet_purge_staged_packets(struct wg_peer *peer)
{
spin_lock_bh(&peer->staged_packet_queue.lock);
peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
__skb_queue_purge(&peer->staged_packet_queue);
spin_unlock_bh(&peer->staged_packet_queue.lock);
}
void wg_packet_send_staged_packets(struct wg_peer *peer)
{
struct noise_keypair *keypair;
struct sk_buff_head packets;
struct sk_buff *skb;
/* Steal the current queue into our local one. */
__skb_queue_head_init(&packets);
spin_lock_bh(&peer->staged_packet_queue.lock);
skb_queue_splice_init(&peer->staged_packet_queue, &packets);
spin_unlock_bh(&peer->staged_packet_queue.lock);
if (unlikely(skb_queue_empty(&packets)))
return;
/* First we make sure we have a valid reference to a valid key. */
rcu_read_lock_bh();
keypair = wg_noise_keypair_get(
rcu_dereference_bh(peer->keypairs.current_keypair));
rcu_read_unlock_bh();
if (unlikely(!keypair))
goto out_nokey;
if (unlikely(!READ_ONCE(keypair->sending.is_valid)))
goto out_nokey;
if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
REJECT_AFTER_TIME)))
goto out_invalid;
/* After we know we have a somewhat valid key, we now try to assign
* nonces to all of the packets in the queue. If we can't assign nonces
* for all of them, we just consider it a failure and wait for the next
* handshake.
*/
skb_queue_walk(&packets, skb) {
/* 0 for no outer TOS: no leak. TODO: at some later point, we
* might consider using flowi->tos as outer instead.
*/
PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
PACKET_CB(skb)->nonce =
atomic64_inc_return(&keypair->sending_counter) - 1;
if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
goto out_invalid;
}
packets.prev->next = NULL;
wg_peer_get(keypair->entry.peer);
PACKET_CB(packets.next)->keypair = keypair;
wg_packet_create_data(packets.next);
return;
out_invalid:
WRITE_ONCE(keypair->sending.is_valid, false);
out_nokey:
wg_noise_keypair_put(keypair, false);
/* We orphan the packets if we're waiting on a handshake, so that they
* don't block a socket's pool.
*/
skb_queue_walk(&packets, skb)
skb_orphan(skb);
/* Then we put them back on the top of the queue. We're not too
* concerned about accidentally getting things a little out of order if
* packets are being added really fast, because this queue is for before
* packets can even be sent and it's small anyway.
*/
spin_lock_bh(&peer->staged_packet_queue.lock);
skb_queue_splice(&packets, &peer->staged_packet_queue);
spin_unlock_bh(&peer->staged_packet_queue.lock);
/* If we're exiting because there's something wrong with the key, it
* means we should initiate a new handshake.
*/
wg_packet_send_queued_handshake_initiation(peer, false);
}

436
net/wireguard/socket.c Normal file
View File

@ -0,0 +1,436 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
*/
#include "device.h"
#include "peer.h"
#include "socket.h"
#include "queueing.h"
#include "messages.h"
#include <linux/ctype.h>
#include <linux/net.h>
#include <linux/if_vlan.h>
#include <linux/if_ether.h>
#include <linux/inetdevice.h>
#include <net/udp_tunnel.h>
#include <net/ipv6.h>
static int send4(struct wg_device *wg, struct sk_buff *skb,
struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
{
struct flowi4 fl = {
.saddr = endpoint->src4.s_addr,
.daddr = endpoint->addr4.sin_addr.s_addr,
.fl4_dport = endpoint->addr4.sin_port,
.flowi4_mark = wg->fwmark,
.flowi4_proto = IPPROTO_UDP
};
struct rtable *rt = NULL;
struct sock *sock;
int ret = 0;
skb_mark_not_on_list(skb);
skb->dev = wg->dev;
skb->mark = wg->fwmark;
rcu_read_lock_bh();
sock = rcu_dereference_bh(wg->sock4);
if (unlikely(!sock)) {
ret = -ENONET;
goto err;
}
fl.fl4_sport = inet_sk(sock)->inet_sport;
if (cache)
rt = dst_cache_get_ip4(cache, &fl.saddr);
if (!rt) {
security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
fl.saddr, RT_SCOPE_HOST))) {
endpoint->src4.s_addr = 0;
*(__force __be32 *)&endpoint->src_if4 = 0;
fl.saddr = 0;
if (cache)
dst_cache_reset(cache);
}
rt = ip_route_output_flow(sock_net(sock), &fl, sock);
if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) &&
PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
rt->dst.dev->ifindex != endpoint->src_if4)))) {
endpoint->src4.s_addr = 0;
*(__force __be32 *)&endpoint->src_if4 = 0;
fl.saddr = 0;
if (cache)
dst_cache_reset(cache);
if (!IS_ERR(rt))
ip_rt_put(rt);
rt = ip_route_output_flow(sock_net(sock), &fl, sock);
}
if (unlikely(IS_ERR(rt))) {
ret = PTR_ERR(rt);
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
wg->dev->name, &endpoint->addr, ret);
goto err;
}
if (cache)
dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
}
skb->ignore_df = 1;
udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds,
ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
fl.fl4_dport, false, false);
goto out;
err:
kfree_skb(skb);
out:
rcu_read_unlock_bh();
return ret;
}
static int send6(struct wg_device *wg, struct sk_buff *skb,
struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
{
#if IS_ENABLED(CONFIG_IPV6)
struct flowi6 fl = {
.saddr = endpoint->src6,
.daddr = endpoint->addr6.sin6_addr,
.fl6_dport = endpoint->addr6.sin6_port,
.flowi6_mark = wg->fwmark,
.flowi6_oif = endpoint->addr6.sin6_scope_id,
.flowi6_proto = IPPROTO_UDP
/* TODO: addr->sin6_flowinfo */
};
struct dst_entry *dst = NULL;
struct sock *sock;
int ret = 0;
skb_mark_not_on_list(skb);
skb->dev = wg->dev;
skb->mark = wg->fwmark;
rcu_read_lock_bh();
sock = rcu_dereference_bh(wg->sock6);
if (unlikely(!sock)) {
ret = -ENONET;
goto err;
}
fl.fl6_sport = inet_sk(sock)->inet_sport;
if (cache)
dst = dst_cache_get_ip6(cache, &fl.saddr);
if (!dst) {
security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
if (unlikely(!ipv6_addr_any(&fl.saddr) &&
!ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
endpoint->src6 = fl.saddr = in6addr_any;
if (cache)
dst_cache_reset(cache);
}
dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
NULL);
if (unlikely(IS_ERR(dst))) {
ret = PTR_ERR(dst);
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
wg->dev->name, &endpoint->addr, ret);
goto err;
}
if (cache)
dst_cache_set_ip6(cache, dst, &fl.saddr);
}
skb->ignore_df = 1;
udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds,
ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
fl.fl6_dport, false);
goto out;
err:
kfree_skb(skb);
out:
rcu_read_unlock_bh();
return ret;
#else
return -EAFNOSUPPORT;
#endif
}
int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
{
size_t skb_len = skb->len;
int ret = -EAFNOSUPPORT;
read_lock_bh(&peer->endpoint_lock);
if (peer->endpoint.addr.sa_family == AF_INET)
ret = send4(peer->device, skb, &peer->endpoint, ds,
&peer->endpoint_cache);
else if (peer->endpoint.addr.sa_family == AF_INET6)
ret = send6(peer->device, skb, &peer->endpoint, ds,
&peer->endpoint_cache);
else
dev_kfree_skb(skb);
if (likely(!ret))
peer->tx_bytes += skb_len;
read_unlock_bh(&peer->endpoint_lock);
return ret;
}
int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer,
size_t len, u8 ds)
{
struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
if (unlikely(!skb))
return -ENOMEM;
skb_reserve(skb, SKB_HEADER_LEN);
skb_set_inner_network_header(skb, 0);
skb_put_data(skb, buffer, len);
return wg_socket_send_skb_to_peer(peer, skb, ds);
}
int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
struct sk_buff *in_skb, void *buffer,
size_t len)
{
int ret = 0;
struct sk_buff *skb;
struct endpoint endpoint;
if (unlikely(!in_skb))
return -EINVAL;
ret = wg_socket_endpoint_from_skb(&endpoint, in_skb);
if (unlikely(ret < 0))
return ret;
skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
if (unlikely(!skb))
return -ENOMEM;
skb_reserve(skb, SKB_HEADER_LEN);
skb_set_inner_network_header(skb, 0);
skb_put_data(skb, buffer, len);
if (endpoint.addr.sa_family == AF_INET)
ret = send4(wg, skb, &endpoint, 0, NULL);
else if (endpoint.addr.sa_family == AF_INET6)
ret = send6(wg, skb, &endpoint, 0, NULL);
/* No other possibilities if the endpoint is valid, which it is,
* as we checked above.
*/
return ret;
}
int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
const struct sk_buff *skb)
{
memset(endpoint, 0, sizeof(*endpoint));
if (skb->protocol == htons(ETH_P_IP)) {
endpoint->addr4.sin_family = AF_INET;
endpoint->addr4.sin_port = udp_hdr(skb)->source;
endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
endpoint->src4.s_addr = ip_hdr(skb)->daddr;
endpoint->src_if4 = skb->skb_iif;
} else if (skb->protocol == htons(ETH_P_IPV6)) {
endpoint->addr6.sin6_family = AF_INET6;
endpoint->addr6.sin6_port = udp_hdr(skb)->source;
endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(
&ipv6_hdr(skb)->saddr, skb->skb_iif);
endpoint->src6 = ipv6_hdr(skb)->daddr;
} else {
return -EINVAL;
}
return 0;
}
static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b)
{
return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET &&
a->addr4.sin_port == b->addr4.sin_port &&
a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr &&
a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) ||
(a->addr.sa_family == AF_INET6 &&
b->addr.sa_family == AF_INET6 &&
a->addr6.sin6_port == b->addr6.sin6_port &&
ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) &&
a->addr6.sin6_scope_id == b->addr6.sin6_scope_id &&
ipv6_addr_equal(&a->src6, &b->src6)) ||
unlikely(!a->addr.sa_family && !b->addr.sa_family);
}
void wg_socket_set_peer_endpoint(struct wg_peer *peer,
const struct endpoint *endpoint)
{
/* First we check unlocked, in order to optimize, since it's pretty rare
* that an endpoint will change. If we happen to be mid-write, and two
* CPUs wind up writing the same thing or something slightly different,
* it doesn't really matter much either.
*/
if (endpoint_eq(endpoint, &peer->endpoint))
return;
write_lock_bh(&peer->endpoint_lock);
if (endpoint->addr.sa_family == AF_INET) {
peer->endpoint.addr4 = endpoint->addr4;
peer->endpoint.src4 = endpoint->src4;
peer->endpoint.src_if4 = endpoint->src_if4;
} else if (endpoint->addr.sa_family == AF_INET6) {
peer->endpoint.addr6 = endpoint->addr6;
peer->endpoint.src6 = endpoint->src6;
} else {
goto out;
}
dst_cache_reset(&peer->endpoint_cache);
out:
write_unlock_bh(&peer->endpoint_lock);
}
void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
const struct sk_buff *skb)
{
struct endpoint endpoint;
if (!wg_socket_endpoint_from_skb(&endpoint, skb))
wg_socket_set_peer_endpoint(peer, &endpoint);
}
void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
{
write_lock_bh(&peer->endpoint_lock);
memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
dst_cache_reset(&peer->endpoint_cache);
write_unlock_bh(&peer->endpoint_lock);
}
static int wg_receive(struct sock *sk, struct sk_buff *skb)
{
struct wg_device *wg;
if (unlikely(!sk))
goto err;
wg = sk->sk_user_data;
if (unlikely(!wg))
goto err;
skb_mark_not_on_list(skb);
wg_packet_receive(wg, skb);
return 0;
err:
kfree_skb(skb);
return 0;
}
static void sock_free(struct sock *sock)
{
if (unlikely(!sock))
return;
sk_clear_memalloc(sock);
udp_tunnel_sock_release(sock->sk_socket);
}
static void set_sock_opts(struct socket *sock)
{
sock->sk->sk_allocation = GFP_ATOMIC;
sock->sk->sk_sndbuf = INT_MAX;
sk_set_memalloc(sock->sk);
}
int wg_socket_init(struct wg_device *wg, u16 port)
{
struct net *net;
int ret;
struct udp_tunnel_sock_cfg cfg = {
.sk_user_data = wg,
.encap_type = 1,
.encap_rcv = wg_receive
};
struct socket *new4 = NULL, *new6 = NULL;
struct udp_port_cfg port4 = {
.family = AF_INET,
.local_ip.s_addr = htonl(INADDR_ANY),
.local_udp_port = htons(port),
.use_udp_checksums = true
};
#if IS_ENABLED(CONFIG_IPV6)
int retries = 0;
struct udp_port_cfg port6 = {
.family = AF_INET6,
.local_ip6 = IN6ADDR_ANY_INIT,
.use_udp6_tx_checksums = true,
.use_udp6_rx_checksums = true,
.ipv6_v6only = true
};
#endif
rcu_read_lock();
net = rcu_dereference(wg->creating_net);
net = net ? maybe_get_net(net) : NULL;
rcu_read_unlock();
if (unlikely(!net))
return -ENONET;
#if IS_ENABLED(CONFIG_IPV6)
retry:
#endif
ret = udp_sock_create(net, &port4, &new4);
if (ret < 0) {
pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
goto out;
}
set_sock_opts(new4);
setup_udp_tunnel_sock(net, new4, &cfg);
#if IS_ENABLED(CONFIG_IPV6)
if (ipv6_mod_enabled()) {
port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
ret = udp_sock_create(net, &port6, &new6);
if (ret < 0) {
udp_tunnel_sock_release(new4);
if (ret == -EADDRINUSE && !port && retries++ < 100)
goto retry;
pr_err("%s: Could not create IPv6 socket\n",
wg->dev->name);
goto out;
}
set_sock_opts(new6);
setup_udp_tunnel_sock(net, new6, &cfg);
}
#endif
wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
ret = 0;
out:
put_net(net);
return ret;
}
void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
struct sock *new6)
{
struct sock *old4, *old6;
mutex_lock(&wg->socket_update_lock);
old4 = rcu_dereference_protected(wg->sock4,
lockdep_is_held(&wg->socket_update_lock));
old6 = rcu_dereference_protected(wg->sock6,
lockdep_is_held(&wg->socket_update_lock));
rcu_assign_pointer(wg->sock4, new4);
rcu_assign_pointer(wg->sock6, new6);
if (new4)
wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
mutex_unlock(&wg->socket_update_lock);
synchronize_rcu();
sock_free(old4);
sock_free(old6);
}

Some files were not shown because too many files have changed in this diff Show More