mirror of
https://github.com/Qortal/Brooklyn.git
synced 2025-01-30 14:52:17 +00:00
T3Q being reported to gay IRS cuz he is gay
Update / Redo the entire stack to juice up like no other was juiced before.
This commit is contained in:
parent
4dac4c855c
commit
442d22459d
3
certs/.gitignore
vendored
3
certs/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
/x509_certificate_list
|
||||
/x509_revocation_list
|
304
init/do_mounts_md.c
Normal file
304
init/do_mounts_md.c
Normal file
@ -0,0 +1,304 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/delay.h>
|
||||
#include <linux/raid/md_u.h>
|
||||
#include <linux/raid/md_p.h>
|
||||
|
||||
#include "do_mounts.h"
|
||||
|
||||
/*
|
||||
* When md (and any require personalities) are compiled into the kernel
|
||||
* (not a module), arrays can be assembles are boot time using with AUTODETECT
|
||||
* where specially marked partitions are registered with md_autodetect_dev(),
|
||||
* and with MD_BOOT where devices to be collected are given on the boot line
|
||||
* with md=.....
|
||||
* The code for that is here.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_MD_AUTODETECT
|
||||
static int __initdata raid_noautodetect;
|
||||
#else
|
||||
static int __initdata raid_noautodetect=1;
|
||||
#endif
|
||||
static int __initdata raid_autopart;
|
||||
|
||||
static struct {
|
||||
int minor;
|
||||
int partitioned;
|
||||
int level;
|
||||
int chunk;
|
||||
char *device_names;
|
||||
} md_setup_args[256] __initdata;
|
||||
|
||||
static int md_setup_ents __initdata;
|
||||
|
||||
/*
|
||||
* Parse the command-line parameters given our kernel, but do not
|
||||
* actually try to invoke the MD device now; that is handled by
|
||||
* md_setup_drive after the low-level disk drivers have initialised.
|
||||
*
|
||||
* 27/11/1999: Fixed to work correctly with the 2.3 kernel (which
|
||||
* assigns the task of parsing integer arguments to the
|
||||
* invoked program now). Added ability to initialise all
|
||||
* the MD devices (by specifying multiple "md=" lines)
|
||||
* instead of just one. -- KTK
|
||||
* 18May2000: Added support for persistent-superblock arrays:
|
||||
* md=n,0,factor,fault,device-list uses RAID0 for device n
|
||||
* md=n,-1,factor,fault,device-list uses LINEAR for device n
|
||||
* md=n,device-list reads a RAID superblock from the devices
|
||||
* elements in device-list are read by name_to_kdev_t so can be
|
||||
* a hex number or something like /dev/hda1 /dev/sdb
|
||||
* 2001-06-03: Dave Cinege <dcinege@psychosis.com>
|
||||
* Shifted name_to_kdev_t() and related operations to md_set_drive()
|
||||
* for later execution. Rewrote section to make devfs compatible.
|
||||
*/
|
||||
static int __init md_setup(char *str)
|
||||
{
|
||||
int minor, level, factor, fault, partitioned = 0;
|
||||
char *pername = "";
|
||||
char *str1;
|
||||
int ent;
|
||||
|
||||
if (*str == 'd') {
|
||||
partitioned = 1;
|
||||
str++;
|
||||
}
|
||||
if (get_option(&str, &minor) != 2) { /* MD Number */
|
||||
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
|
||||
return 0;
|
||||
}
|
||||
str1 = str;
|
||||
for (ent=0 ; ent< md_setup_ents ; ent++)
|
||||
if (md_setup_args[ent].minor == minor &&
|
||||
md_setup_args[ent].partitioned == partitioned) {
|
||||
printk(KERN_WARNING "md: md=%s%d, Specified more than once. "
|
||||
"Replacing previous definition.\n", partitioned?"d":"", minor);
|
||||
break;
|
||||
}
|
||||
if (ent >= ARRAY_SIZE(md_setup_args)) {
|
||||
printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor);
|
||||
return 0;
|
||||
}
|
||||
if (ent >= md_setup_ents)
|
||||
md_setup_ents++;
|
||||
switch (get_option(&str, &level)) { /* RAID level */
|
||||
case 2: /* could be 0 or -1.. */
|
||||
if (level == 0 || level == LEVEL_LINEAR) {
|
||||
if (get_option(&str, &factor) != 2 || /* Chunk Size */
|
||||
get_option(&str, &fault) != 2) {
|
||||
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
|
||||
return 0;
|
||||
}
|
||||
md_setup_args[ent].level = level;
|
||||
md_setup_args[ent].chunk = 1 << (factor+12);
|
||||
if (level == LEVEL_LINEAR)
|
||||
pername = "linear";
|
||||
else
|
||||
pername = "raid0";
|
||||
break;
|
||||
}
|
||||
/* FALL THROUGH */
|
||||
case 1: /* the first device is numeric */
|
||||
str = str1;
|
||||
/* FALL THROUGH */
|
||||
case 0:
|
||||
md_setup_args[ent].level = LEVEL_NONE;
|
||||
pername="super-block";
|
||||
}
|
||||
|
||||
printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n",
|
||||
minor, pername, str);
|
||||
md_setup_args[ent].device_names = str;
|
||||
md_setup_args[ent].partitioned = partitioned;
|
||||
md_setup_args[ent].minor = minor;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __init md_setup_drive(void)
|
||||
{
|
||||
int minor, i, ent, partitioned;
|
||||
dev_t dev;
|
||||
dev_t devices[MD_SB_DISKS+1];
|
||||
|
||||
for (ent = 0; ent < md_setup_ents ; ent++) {
|
||||
int fd;
|
||||
int err = 0;
|
||||
char *devname;
|
||||
mdu_disk_info_t dinfo;
|
||||
char name[16];
|
||||
|
||||
minor = md_setup_args[ent].minor;
|
||||
partitioned = md_setup_args[ent].partitioned;
|
||||
devname = md_setup_args[ent].device_names;
|
||||
|
||||
sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
|
||||
if (partitioned)
|
||||
dev = MKDEV(mdp_major, minor << MdpMinorShift);
|
||||
else
|
||||
dev = MKDEV(MD_MAJOR, minor);
|
||||
create_dev(name, dev);
|
||||
for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
|
||||
char *p;
|
||||
char comp_name[64];
|
||||
u32 rdev;
|
||||
|
||||
p = strchr(devname, ',');
|
||||
if (p)
|
||||
*p++ = 0;
|
||||
|
||||
dev = name_to_dev_t(devname);
|
||||
if (strncmp(devname, "/dev/", 5) == 0)
|
||||
devname += 5;
|
||||
snprintf(comp_name, 63, "/dev/%s", devname);
|
||||
rdev = bstat(comp_name);
|
||||
if (rdev)
|
||||
dev = new_decode_dev(rdev);
|
||||
if (!dev) {
|
||||
printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
|
||||
break;
|
||||
}
|
||||
|
||||
devices[i] = dev;
|
||||
|
||||
devname = p;
|
||||
}
|
||||
devices[i] = 0;
|
||||
|
||||
if (!i)
|
||||
continue;
|
||||
|
||||
printk(KERN_INFO "md: Loading md%s%d: %s\n",
|
||||
partitioned ? "_d" : "", minor,
|
||||
md_setup_args[ent].device_names);
|
||||
|
||||
fd = ksys_open(name, 0, 0);
|
||||
if (fd < 0) {
|
||||
printk(KERN_ERR "md: open failed - cannot start "
|
||||
"array %s\n", name);
|
||||
continue;
|
||||
}
|
||||
if (ksys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
|
||||
printk(KERN_WARNING
|
||||
"md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
|
||||
minor);
|
||||
ksys_close(fd);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (md_setup_args[ent].level != LEVEL_NONE) {
|
||||
/* non-persistent */
|
||||
mdu_array_info_t ainfo;
|
||||
ainfo.level = md_setup_args[ent].level;
|
||||
ainfo.size = 0;
|
||||
ainfo.nr_disks =0;
|
||||
ainfo.raid_disks =0;
|
||||
while (devices[ainfo.raid_disks])
|
||||
ainfo.raid_disks++;
|
||||
ainfo.md_minor =minor;
|
||||
ainfo.not_persistent = 1;
|
||||
|
||||
ainfo.state = (1 << MD_SB_CLEAN);
|
||||
ainfo.layout = 0;
|
||||
ainfo.chunk_size = md_setup_args[ent].chunk;
|
||||
err = ksys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
|
||||
for (i = 0; !err && i <= MD_SB_DISKS; i++) {
|
||||
dev = devices[i];
|
||||
if (!dev)
|
||||
break;
|
||||
dinfo.number = i;
|
||||
dinfo.raid_disk = i;
|
||||
dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
|
||||
dinfo.major = MAJOR(dev);
|
||||
dinfo.minor = MINOR(dev);
|
||||
err = ksys_ioctl(fd, ADD_NEW_DISK,
|
||||
(long)&dinfo);
|
||||
}
|
||||
} else {
|
||||
/* persistent */
|
||||
for (i = 0; i <= MD_SB_DISKS; i++) {
|
||||
dev = devices[i];
|
||||
if (!dev)
|
||||
break;
|
||||
dinfo.major = MAJOR(dev);
|
||||
dinfo.minor = MINOR(dev);
|
||||
ksys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
|
||||
}
|
||||
}
|
||||
if (!err)
|
||||
err = ksys_ioctl(fd, RUN_ARRAY, 0);
|
||||
if (err)
|
||||
printk(KERN_WARNING "md: starting md%d failed\n", minor);
|
||||
else {
|
||||
/* reread the partition table.
|
||||
* I (neilb) and not sure why this is needed, but I cannot
|
||||
* boot a kernel with devfs compiled in from partitioned md
|
||||
* array without it
|
||||
*/
|
||||
ksys_close(fd);
|
||||
fd = ksys_open(name, 0, 0);
|
||||
ksys_ioctl(fd, BLKRRPART, 0);
|
||||
}
|
||||
ksys_close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init raid_setup(char *str)
|
||||
{
|
||||
int len, pos;
|
||||
|
||||
len = strlen(str) + 1;
|
||||
pos = 0;
|
||||
|
||||
while (pos < len) {
|
||||
char *comma = strchr(str+pos, ',');
|
||||
int wlen;
|
||||
if (comma)
|
||||
wlen = (comma-str)-pos;
|
||||
else wlen = (len-1)-pos;
|
||||
|
||||
if (!strncmp(str, "noautodetect", wlen))
|
||||
raid_noautodetect = 1;
|
||||
if (!strncmp(str, "autodetect", wlen))
|
||||
raid_noautodetect = 0;
|
||||
if (strncmp(str, "partitionable", wlen)==0)
|
||||
raid_autopart = 1;
|
||||
if (strncmp(str, "part", wlen)==0)
|
||||
raid_autopart = 1;
|
||||
pos += wlen+1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("raid=", raid_setup);
|
||||
__setup("md=", md_setup);
|
||||
|
||||
static void __init autodetect_raid(void)
|
||||
{
|
||||
int fd;
|
||||
|
||||
/*
|
||||
* Since we don't want to detect and use half a raid array, we need to
|
||||
* wait for the known devices to complete their probing
|
||||
*/
|
||||
printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n");
|
||||
printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n");
|
||||
|
||||
wait_for_device_probe();
|
||||
|
||||
fd = ksys_open("/dev/md0", 0, 0);
|
||||
if (fd >= 0) {
|
||||
ksys_ioctl(fd, RAID_AUTORUN, raid_autopart);
|
||||
ksys_close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
void __init md_run_setup(void)
|
||||
{
|
||||
create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
|
||||
|
||||
if (raid_noautodetect)
|
||||
printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
|
||||
else
|
||||
autodetect_raid();
|
||||
md_setup_drive();
|
||||
}
|
104
net/ceph/ceph_fs.c
Normal file
104
net/ceph/ceph_fs.c
Normal file
@ -0,0 +1,104 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Some non-inline ceph helpers
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/ceph/types.h>
|
||||
|
||||
/*
|
||||
* return true if @layout appears to be valid
|
||||
*/
|
||||
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout)
|
||||
{
|
||||
__u32 su = layout->stripe_unit;
|
||||
__u32 sc = layout->stripe_count;
|
||||
__u32 os = layout->object_size;
|
||||
|
||||
/* stripe unit, object size must be non-zero, 64k increment */
|
||||
if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1)))
|
||||
return 0;
|
||||
if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1)))
|
||||
return 0;
|
||||
/* object size must be a multiple of stripe unit */
|
||||
if (os < su || os % su)
|
||||
return 0;
|
||||
/* stripe count must be non-zero */
|
||||
if (!sc)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
|
||||
struct ceph_file_layout_legacy *legacy)
|
||||
{
|
||||
fl->stripe_unit = le32_to_cpu(legacy->fl_stripe_unit);
|
||||
fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
|
||||
fl->object_size = le32_to_cpu(legacy->fl_object_size);
|
||||
fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
|
||||
if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
|
||||
fl->stripe_count == 0 && fl->object_size == 0)
|
||||
fl->pool_id = -1;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_file_layout_from_legacy);
|
||||
|
||||
void ceph_file_layout_to_legacy(struct ceph_file_layout *fl,
|
||||
struct ceph_file_layout_legacy *legacy)
|
||||
{
|
||||
legacy->fl_stripe_unit = cpu_to_le32(fl->stripe_unit);
|
||||
legacy->fl_stripe_count = cpu_to_le32(fl->stripe_count);
|
||||
legacy->fl_object_size = cpu_to_le32(fl->object_size);
|
||||
if (fl->pool_id >= 0)
|
||||
legacy->fl_pg_pool = cpu_to_le32(fl->pool_id);
|
||||
else
|
||||
legacy->fl_pg_pool = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_file_layout_to_legacy);
|
||||
|
||||
int ceph_flags_to_mode(int flags)
|
||||
{
|
||||
int mode;
|
||||
|
||||
#ifdef O_DIRECTORY /* fixme */
|
||||
if ((flags & O_DIRECTORY) == O_DIRECTORY)
|
||||
return CEPH_FILE_MODE_PIN;
|
||||
#endif
|
||||
|
||||
switch (flags & O_ACCMODE) {
|
||||
case O_WRONLY:
|
||||
mode = CEPH_FILE_MODE_WR;
|
||||
break;
|
||||
case O_RDONLY:
|
||||
mode = CEPH_FILE_MODE_RD;
|
||||
break;
|
||||
case O_RDWR:
|
||||
case O_ACCMODE: /* this is what the VFS does */
|
||||
mode = CEPH_FILE_MODE_RDWR;
|
||||
break;
|
||||
}
|
||||
#ifdef O_LAZY
|
||||
if (flags & O_LAZY)
|
||||
mode |= CEPH_FILE_MODE_LAZY;
|
||||
#endif
|
||||
|
||||
return mode;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_flags_to_mode);
|
||||
|
||||
int ceph_caps_for_mode(int mode)
|
||||
{
|
||||
int caps = CEPH_CAP_PIN;
|
||||
|
||||
if (mode & CEPH_FILE_MODE_RD)
|
||||
caps |= CEPH_CAP_FILE_SHARED |
|
||||
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE;
|
||||
if (mode & CEPH_FILE_MODE_WR)
|
||||
caps |= CEPH_CAP_FILE_EXCL |
|
||||
CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |
|
||||
CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL |
|
||||
CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL;
|
||||
if (mode & CEPH_FILE_MODE_LAZY)
|
||||
caps |= CEPH_CAP_FILE_LAZYIO;
|
||||
|
||||
return caps;
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_caps_for_mode);
|
3116
net/core/ethtool.c
Normal file
3116
net/core/ethtool.c
Normal file
File diff suppressed because it is too large
Load Diff
226
net/ipv4/udp_tunnel.c
Normal file
226
net/ipv4/udp_tunnel.c
Normal file
@ -0,0 +1,226 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/module.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/udp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <net/dst_metadata.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/udp.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
|
||||
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
|
||||
struct socket **sockp)
|
||||
{
|
||||
int err;
|
||||
struct socket *sock = NULL;
|
||||
struct sockaddr_in udp_addr;
|
||||
|
||||
err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
|
||||
if (cfg->bind_ifindex) {
|
||||
err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
|
||||
(void *)&cfg->bind_ifindex,
|
||||
sizeof(cfg->bind_ifindex));
|
||||
if (err < 0)
|
||||
goto error;
|
||||
}
|
||||
|
||||
udp_addr.sin_family = AF_INET;
|
||||
udp_addr.sin_addr = cfg->local_ip;
|
||||
udp_addr.sin_port = cfg->local_udp_port;
|
||||
err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
|
||||
sizeof(udp_addr));
|
||||
if (err < 0)
|
||||
goto error;
|
||||
|
||||
if (cfg->peer_udp_port) {
|
||||
udp_addr.sin_family = AF_INET;
|
||||
udp_addr.sin_addr = cfg->peer_ip;
|
||||
udp_addr.sin_port = cfg->peer_udp_port;
|
||||
err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
|
||||
sizeof(udp_addr), 0);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
}
|
||||
|
||||
sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
|
||||
|
||||
*sockp = sock;
|
||||
return 0;
|
||||
|
||||
error:
|
||||
if (sock) {
|
||||
kernel_sock_shutdown(sock, SHUT_RDWR);
|
||||
sock_release(sock);
|
||||
}
|
||||
*sockp = NULL;
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(udp_sock_create4);
|
||||
|
||||
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
|
||||
struct udp_tunnel_sock_cfg *cfg)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
|
||||
/* Disable multicast loopback */
|
||||
inet_sk(sk)->mc_loop = 0;
|
||||
|
||||
/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
|
||||
inet_inc_convert_csum(sk);
|
||||
|
||||
rcu_assign_sk_user_data(sk, cfg->sk_user_data);
|
||||
|
||||
udp_sk(sk)->encap_type = cfg->encap_type;
|
||||
udp_sk(sk)->encap_rcv = cfg->encap_rcv;
|
||||
udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
|
||||
udp_sk(sk)->encap_destroy = cfg->encap_destroy;
|
||||
udp_sk(sk)->gro_receive = cfg->gro_receive;
|
||||
udp_sk(sk)->gro_complete = cfg->gro_complete;
|
||||
|
||||
udp_tunnel_encap_enable(sock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
|
||||
|
||||
void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
|
||||
unsigned short type)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct udp_tunnel_info ti;
|
||||
|
||||
if (!dev->netdev_ops->ndo_udp_tunnel_add ||
|
||||
!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
|
||||
return;
|
||||
|
||||
ti.type = type;
|
||||
ti.sa_family = sk->sk_family;
|
||||
ti.port = inet_sk(sk)->inet_sport;
|
||||
|
||||
dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
|
||||
|
||||
void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
|
||||
unsigned short type)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct udp_tunnel_info ti;
|
||||
|
||||
if (!dev->netdev_ops->ndo_udp_tunnel_del ||
|
||||
!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
|
||||
return;
|
||||
|
||||
ti.type = type;
|
||||
ti.sa_family = sk->sk_family;
|
||||
ti.port = inet_sk(sk)->inet_sport;
|
||||
|
||||
dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port);
|
||||
|
||||
/* Notify netdevs that UDP port started listening */
|
||||
void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct net *net = sock_net(sk);
|
||||
struct udp_tunnel_info ti;
|
||||
struct net_device *dev;
|
||||
|
||||
ti.type = type;
|
||||
ti.sa_family = sk->sk_family;
|
||||
ti.port = inet_sk(sk)->inet_sport;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_netdev_rcu(net, dev) {
|
||||
if (!dev->netdev_ops->ndo_udp_tunnel_add)
|
||||
continue;
|
||||
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
|
||||
continue;
|
||||
dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port);
|
||||
|
||||
/* Notify netdevs that UDP port is no more listening */
|
||||
void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
struct net *net = sock_net(sk);
|
||||
struct udp_tunnel_info ti;
|
||||
struct net_device *dev;
|
||||
|
||||
ti.type = type;
|
||||
ti.sa_family = sk->sk_family;
|
||||
ti.port = inet_sk(sk)->inet_sport;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_netdev_rcu(net, dev) {
|
||||
if (!dev->netdev_ops->ndo_udp_tunnel_del)
|
||||
continue;
|
||||
if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
|
||||
continue;
|
||||
dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port);
|
||||
|
||||
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
|
||||
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
|
||||
__be16 df, __be16 src_port, __be16 dst_port,
|
||||
bool xnet, bool nocheck)
|
||||
{
|
||||
struct udphdr *uh;
|
||||
|
||||
__skb_push(skb, sizeof(*uh));
|
||||
skb_reset_transport_header(skb);
|
||||
uh = udp_hdr(skb);
|
||||
|
||||
uh->dest = dst_port;
|
||||
uh->source = src_port;
|
||||
uh->len = htons(skb->len);
|
||||
|
||||
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
|
||||
|
||||
udp_set_csum(nocheck, skb, src, dst, skb->len);
|
||||
|
||||
iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
|
||||
|
||||
void udp_tunnel_sock_release(struct socket *sock)
|
||||
{
|
||||
rcu_assign_sk_user_data(sock->sk, NULL);
|
||||
kernel_sock_shutdown(sock, SHUT_RDWR);
|
||||
sock_release(sock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
|
||||
|
||||
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
|
||||
__be16 flags, __be64 tunnel_id, int md_size)
|
||||
{
|
||||
struct metadata_dst *tun_dst;
|
||||
struct ip_tunnel_info *info;
|
||||
|
||||
if (family == AF_INET)
|
||||
tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size);
|
||||
else
|
||||
tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size);
|
||||
if (!tun_dst)
|
||||
return NULL;
|
||||
|
||||
info = &tun_dst->u.tun_info;
|
||||
info->key.tp_src = udp_hdr(skb)->source;
|
||||
info->key.tp_dst = udp_hdr(skb)->dest;
|
||||
if (udp_hdr(skb)->check)
|
||||
info->key.tun_flags |= TUNNEL_CSUM;
|
||||
return tun_dst;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(udp_tun_rx_dst);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
29
net/netfilter/nf_tables_set_core.c
Normal file
29
net/netfilter/nf_tables_set_core.c
Normal file
@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/module.h>
|
||||
#include <net/netfilter/nf_tables_core.h>
|
||||
|
||||
static int __init nf_tables_set_module_init(void)
|
||||
{
|
||||
nft_register_set(&nft_set_hash_fast_type);
|
||||
nft_register_set(&nft_set_hash_type);
|
||||
nft_register_set(&nft_set_rhash_type);
|
||||
nft_register_set(&nft_set_bitmap_type);
|
||||
nft_register_set(&nft_set_rbtree_type);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit nf_tables_set_module_exit(void)
|
||||
{
|
||||
nft_unregister_set(&nft_set_rbtree_type);
|
||||
nft_unregister_set(&nft_set_bitmap_type);
|
||||
nft_unregister_set(&nft_set_rhash_type);
|
||||
nft_unregister_set(&nft_set_hash_type);
|
||||
nft_unregister_set(&nft_set_hash_fast_type);
|
||||
}
|
||||
|
||||
module_init(nf_tables_set_module_init);
|
||||
module_exit(nf_tables_set_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_NFT_SET();
|
269
net/rds/ib_fmr.c
Normal file
269
net/rds/ib_fmr.c
Normal file
@ -0,0 +1,269 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Oracle. All rights reserved.
|
||||
*
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the
|
||||
* OpenIB.org BSD license below:
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ib_mr.h"
|
||||
|
||||
struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
|
||||
{
|
||||
struct rds_ib_mr_pool *pool;
|
||||
struct rds_ib_mr *ibmr = NULL;
|
||||
struct rds_ib_fmr *fmr;
|
||||
int err = 0;
|
||||
|
||||
if (npages <= RDS_MR_8K_MSG_SIZE)
|
||||
pool = rds_ibdev->mr_8k_pool;
|
||||
else
|
||||
pool = rds_ibdev->mr_1m_pool;
|
||||
|
||||
if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
|
||||
queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
|
||||
|
||||
/* Switch pools if one of the pool is reaching upper limit */
|
||||
if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
|
||||
if (pool->pool_type == RDS_IB_MR_8K_POOL)
|
||||
pool = rds_ibdev->mr_1m_pool;
|
||||
else
|
||||
pool = rds_ibdev->mr_8k_pool;
|
||||
}
|
||||
|
||||
ibmr = rds_ib_try_reuse_ibmr(pool);
|
||||
if (ibmr)
|
||||
return ibmr;
|
||||
|
||||
ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
|
||||
rdsibdev_to_node(rds_ibdev));
|
||||
if (!ibmr) {
|
||||
err = -ENOMEM;
|
||||
goto out_no_cigar;
|
||||
}
|
||||
|
||||
fmr = &ibmr->u.fmr;
|
||||
fmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
|
||||
(IB_ACCESS_LOCAL_WRITE |
|
||||
IB_ACCESS_REMOTE_READ |
|
||||
IB_ACCESS_REMOTE_WRITE |
|
||||
IB_ACCESS_REMOTE_ATOMIC),
|
||||
&pool->fmr_attr);
|
||||
if (IS_ERR(fmr->fmr)) {
|
||||
err = PTR_ERR(fmr->fmr);
|
||||
fmr->fmr = NULL;
|
||||
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err);
|
||||
goto out_no_cigar;
|
||||
}
|
||||
|
||||
ibmr->pool = pool;
|
||||
if (pool->pool_type == RDS_IB_MR_8K_POOL)
|
||||
rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
|
||||
else
|
||||
rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
|
||||
|
||||
return ibmr;
|
||||
|
||||
out_no_cigar:
|
||||
kfree(ibmr);
|
||||
atomic_dec(&pool->item_count);
|
||||
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
|
||||
struct rds_ib_mr *ibmr, struct scatterlist *sg,
|
||||
unsigned int nents)
|
||||
{
|
||||
struct ib_device *dev = rds_ibdev->dev;
|
||||
struct rds_ib_fmr *fmr = &ibmr->u.fmr;
|
||||
struct scatterlist *scat = sg;
|
||||
u64 io_addr = 0;
|
||||
u64 *dma_pages;
|
||||
u32 len;
|
||||
int page_cnt, sg_dma_len;
|
||||
int i, j;
|
||||
int ret;
|
||||
|
||||
sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
|
||||
if (unlikely(!sg_dma_len)) {
|
||||
pr_warn("RDS/IB: %s failed!\n", __func__);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
len = 0;
|
||||
page_cnt = 0;
|
||||
|
||||
for (i = 0; i < sg_dma_len; ++i) {
|
||||
unsigned int dma_len = sg_dma_len(&scat[i]);
|
||||
u64 dma_addr = sg_dma_address(&scat[i]);
|
||||
|
||||
if (dma_addr & ~PAGE_MASK) {
|
||||
if (i > 0) {
|
||||
ib_dma_unmap_sg(dev, sg, nents,
|
||||
DMA_BIDIRECTIONAL);
|
||||
return -EINVAL;
|
||||
} else {
|
||||
++page_cnt;
|
||||
}
|
||||
}
|
||||
if ((dma_addr + dma_len) & ~PAGE_MASK) {
|
||||
if (i < sg_dma_len - 1) {
|
||||
ib_dma_unmap_sg(dev, sg, nents,
|
||||
DMA_BIDIRECTIONAL);
|
||||
return -EINVAL;
|
||||
} else {
|
||||
++page_cnt;
|
||||
}
|
||||
}
|
||||
|
||||
len += dma_len;
|
||||
}
|
||||
|
||||
page_cnt += len >> PAGE_SHIFT;
|
||||
if (page_cnt > ibmr->pool->fmr_attr.max_pages) {
|
||||
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
|
||||
rdsibdev_to_node(rds_ibdev));
|
||||
if (!dma_pages) {
|
||||
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
page_cnt = 0;
|
||||
for (i = 0; i < sg_dma_len; ++i) {
|
||||
unsigned int dma_len = sg_dma_len(&scat[i]);
|
||||
u64 dma_addr = sg_dma_address(&scat[i]);
|
||||
|
||||
for (j = 0; j < dma_len; j += PAGE_SIZE)
|
||||
dma_pages[page_cnt++] =
|
||||
(dma_addr & PAGE_MASK) + j;
|
||||
}
|
||||
|
||||
ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
|
||||
if (ret) {
|
||||
ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Success - we successfully remapped the MR, so we can
|
||||
* safely tear down the old mapping.
|
||||
*/
|
||||
rds_ib_teardown_mr(ibmr);
|
||||
|
||||
ibmr->sg = scat;
|
||||
ibmr->sg_len = nents;
|
||||
ibmr->sg_dma_len = sg_dma_len;
|
||||
ibmr->remap_count++;
|
||||
|
||||
if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
|
||||
rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
|
||||
else
|
||||
rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
kfree(dma_pages);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev,
|
||||
struct scatterlist *sg,
|
||||
unsigned long nents,
|
||||
u32 *key)
|
||||
{
|
||||
struct rds_ib_mr *ibmr = NULL;
|
||||
struct rds_ib_fmr *fmr;
|
||||
int ret;
|
||||
|
||||
ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
|
||||
if (IS_ERR(ibmr))
|
||||
return ibmr;
|
||||
|
||||
ibmr->device = rds_ibdev;
|
||||
fmr = &ibmr->u.fmr;
|
||||
ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
|
||||
if (ret == 0)
|
||||
*key = fmr->fmr->rkey;
|
||||
else
|
||||
rds_ib_free_mr(ibmr, 0);
|
||||
|
||||
return ibmr;
|
||||
}
|
||||
|
||||
void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed,
|
||||
unsigned long *unpinned, unsigned int goal)
|
||||
{
|
||||
struct rds_ib_mr *ibmr, *next;
|
||||
struct rds_ib_fmr *fmr;
|
||||
LIST_HEAD(fmr_list);
|
||||
int ret = 0;
|
||||
unsigned int freed = *nfreed;
|
||||
|
||||
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
|
||||
list_for_each_entry(ibmr, list, unmap_list) {
|
||||
fmr = &ibmr->u.fmr;
|
||||
list_add(&fmr->fmr->list, &fmr_list);
|
||||
}
|
||||
|
||||
ret = ib_unmap_fmr(&fmr_list);
|
||||
if (ret)
|
||||
pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret);
|
||||
|
||||
/* Now we can destroy the DMA mapping and unpin any pages */
|
||||
list_for_each_entry_safe(ibmr, next, list, unmap_list) {
|
||||
fmr = &ibmr->u.fmr;
|
||||
*unpinned += ibmr->sg_len;
|
||||
__rds_ib_teardown_mr(ibmr);
|
||||
if (freed < goal ||
|
||||
ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) {
|
||||
if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
|
||||
rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
|
||||
else
|
||||
rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
|
||||
list_del(&ibmr->unmap_list);
|
||||
ib_dealloc_fmr(fmr->fmr);
|
||||
kfree(ibmr);
|
||||
freed++;
|
||||
}
|
||||
}
|
||||
*nfreed = freed;
|
||||
}
|
||||
|
||||
void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr)
|
||||
{
|
||||
struct rds_ib_mr_pool *pool = ibmr->pool;
|
||||
|
||||
if (ibmr->remap_count >= pool->fmr_attr.max_maps)
|
||||
llist_add(&ibmr->llnode, &pool->drop_list);
|
||||
else
|
||||
llist_add(&ibmr->llnode, &pool->free_list);
|
||||
}
|
16
net/wireguard/Kbuild
Normal file
16
net/wireguard/Kbuild
Normal file
@ -0,0 +1,16 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
|
||||
ccflags-y := -O3
|
||||
ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g
|
||||
ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
|
||||
ccflags-y += -Wframe-larger-than=2048
|
||||
ccflags-$(if $(WIREGUARD_VERSION),y,) += -D'WIREGUARD_VERSION="$(WIREGUARD_VERSION)"'
|
||||
|
||||
wireguard-y := main.o noise.o device.o peer.o timers.o queueing.o send.o receive.o socket.o peerlookup.o allowedips.o ratelimiter.o cookie.o netlink.o
|
||||
|
||||
include $(src)/crypto/Kbuild.include
|
||||
include $(src)/compat/Kbuild.include
|
||||
|
||||
obj-$(if $(KBUILD_EXTMOD),m,$(CONFIG_WIREGUARD)) := wireguard.o
|
33
net/wireguard/Kconfig
Normal file
33
net/wireguard/Kconfig
Normal file
@ -0,0 +1,33 @@
|
||||
config WIREGUARD
|
||||
tristate "IP: WireGuard secure network tunnel"
|
||||
depends on NET && INET
|
||||
depends on IPV6 || !IPV6
|
||||
select NET_UDP_TUNNEL
|
||||
select DST_CACHE
|
||||
select CRYPTO
|
||||
select CRYPTO_ALGAPI
|
||||
select VFP
|
||||
select VFPv3 if CPU_V7
|
||||
select NEON if CPU_V7
|
||||
select KERNEL_MODE_NEON if CPU_V7
|
||||
default m
|
||||
help
|
||||
WireGuard is a secure, fast, and easy to use replacement for IPsec
|
||||
that uses modern cryptography and clever networking tricks. It's
|
||||
designed to be fairly general purpose and abstract enough to fit most
|
||||
use cases, while at the same time remaining extremely simple to
|
||||
configure. See www.wireguard.com for more info.
|
||||
|
||||
It's safe to say Y or M here, as the driver is very lightweight and
|
||||
is only in use when an administrator chooses to add an interface.
|
||||
|
||||
config WIREGUARD_DEBUG
|
||||
bool "Debugging checks and verbose messages"
|
||||
depends on WIREGUARD
|
||||
help
|
||||
This will write log messages for handshake and other events
|
||||
that occur for a WireGuard interface. It will also perform some
|
||||
extra validation checks and unit tests at various points. This is
|
||||
only useful for debugging.
|
||||
|
||||
Say N here unless you know what you're doing.
|
59
net/wireguard/Makefile
Normal file
59
net/wireguard/Makefile
Normal file
@ -0,0 +1,59 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
|
||||
KERNELRELEASE ?= $(shell uname -r)
|
||||
KERNELDIR ?= /lib/modules/$(KERNELRELEASE)/build
|
||||
PREFIX ?= /usr
|
||||
DESTDIR ?=
|
||||
SRCDIR ?= $(PREFIX)/src
|
||||
DKMSDIR ?= $(SRCDIR)/wireguard
|
||||
DEPMOD ?= depmod
|
||||
DEPMODBASEDIR ?= /
|
||||
|
||||
PWD := $(shell pwd)
|
||||
|
||||
all: module
|
||||
debug: module-debug
|
||||
|
||||
ifneq ($(V),1)
|
||||
MAKEFLAGS += --no-print-directory
|
||||
endif
|
||||
|
||||
WIREGUARD_VERSION = $(patsubst v%,%,$(shell GIT_CEILING_DIRECTORIES="$(PWD)/../.." git describe --dirty 2>/dev/null))
|
||||
|
||||
module:
|
||||
@$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules
|
||||
|
||||
module-debug:
|
||||
@$(MAKE) -C $(KERNELDIR) M=$(PWD) V=1 CONFIG_WIREGUARD_DEBUG=y WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules
|
||||
|
||||
clean:
|
||||
@$(MAKE) -C $(KERNELDIR) M=$(PWD) clean
|
||||
|
||||
module-install:
|
||||
@$(MAKE) -C $(KERNELDIR) M=$(PWD) WIREGUARD_VERSION="$(WIREGUARD_VERSION)" modules_install
|
||||
$(DEPMOD) -b "$(DEPMODBASEDIR)" -a $(KERNELRELEASE)
|
||||
|
||||
install: module-install
|
||||
|
||||
rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
|
||||
DKMS_SOURCES := version.h Makefile Kbuild Kconfig dkms.conf $(filter-out version.h wireguard.mod.c tests/%,$(call rwildcard,,*.c *.h *.S *.pl *.include))
|
||||
dkms-install: $(DKMS_SOURCES)
|
||||
@$(foreach f,$(DKMS_SOURCES),install -v -m0644 -D $(f) $(DESTDIR)$(DKMSDIR)/$(f);)
|
||||
|
||||
style:
|
||||
$(KERNELDIR)/scripts/checkpatch.pl -f --max-line-length=4000 --codespell --color=always $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h) $(wildcard selftest/*.c)
|
||||
|
||||
check: clean
|
||||
scan-build --html-title=wireguard-linux-compat -maxloop 100 --view --keep-going $(MAKE) module CONFIG_WIREGUARD_DEBUG=y C=2 CF="-D__CHECK_ENDIAN__"
|
||||
|
||||
coccicheck: clean
|
||||
@$(MAKE) -C $(KERNELDIR) M=$(PWD) CONFIG_WIREGUARD_DEBUG=y coccicheck MODE=report
|
||||
|
||||
cloc:
|
||||
@cloc --skip-uniqueness --by-file --extract-with="$$(readlink -f ../kernel-tree-scripts/filter-compat-defines.sh) >FILE< > \$$(basename >FILE<)" $(filter-out wireguard.mod.c,$(wildcard *.c)) $(wildcard *.h)
|
||||
|
||||
-include tests/debug.mk
|
||||
|
||||
.PHONY: all module module-debug module-install install dkms-install clean cloc check style
|
382
net/wireguard/allowedips.c
Normal file
382
net/wireguard/allowedips.c
Normal file
@ -0,0 +1,382 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "allowedips.h"
|
||||
#include "peer.h"
|
||||
|
||||
static void swap_endian(u8 *dst, const u8 *src, u8 bits)
|
||||
{
|
||||
if (bits == 32) {
|
||||
*(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
|
||||
} else if (bits == 128) {
|
||||
((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
|
||||
((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
|
||||
u8 cidr, u8 bits)
|
||||
{
|
||||
node->cidr = cidr;
|
||||
node->bit_at_a = cidr / 8U;
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
node->bit_at_a ^= (bits / 8U - 1U) % 8U;
|
||||
#endif
|
||||
node->bit_at_b = 7U - (cidr % 8U);
|
||||
node->bitlen = bits;
|
||||
memcpy(node->bits, src, bits / 8U);
|
||||
}
|
||||
#define CHOOSE_NODE(parent, key) \
|
||||
parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
|
||||
|
||||
static void node_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
kfree(container_of(rcu, struct allowedips_node, rcu));
|
||||
}
|
||||
|
||||
static void push_rcu(struct allowedips_node **stack,
|
||||
struct allowedips_node __rcu *p, unsigned int *len)
|
||||
{
|
||||
if (rcu_access_pointer(p)) {
|
||||
WARN_ON(IS_ENABLED(DEBUG) && *len >= 128);
|
||||
stack[(*len)++] = rcu_dereference_raw(p);
|
||||
}
|
||||
}
|
||||
|
||||
static void root_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
struct allowedips_node *node, *stack[128] = {
|
||||
container_of(rcu, struct allowedips_node, rcu) };
|
||||
unsigned int len = 1;
|
||||
|
||||
while (len > 0 && (node = stack[--len])) {
|
||||
push_rcu(stack, node->bit[0], &len);
|
||||
push_rcu(stack, node->bit[1], &len);
|
||||
kfree(node);
|
||||
}
|
||||
}
|
||||
|
||||
static void root_remove_peer_lists(struct allowedips_node *root)
|
||||
{
|
||||
struct allowedips_node *node, *stack[128] = { root };
|
||||
unsigned int len = 1;
|
||||
|
||||
while (len > 0 && (node = stack[--len])) {
|
||||
push_rcu(stack, node->bit[0], &len);
|
||||
push_rcu(stack, node->bit[1], &len);
|
||||
if (rcu_access_pointer(node->peer))
|
||||
list_del(&node->peer_list);
|
||||
}
|
||||
}
|
||||
|
||||
static void walk_remove_by_peer(struct allowedips_node __rcu **top,
|
||||
struct wg_peer *peer, struct mutex *lock)
|
||||
{
|
||||
#define REF(p) rcu_access_pointer(p)
|
||||
#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
|
||||
#define PUSH(p) ({ \
|
||||
WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
|
||||
stack[len++] = p; \
|
||||
})
|
||||
|
||||
struct allowedips_node __rcu **stack[128], **nptr;
|
||||
struct allowedips_node *node, *prev;
|
||||
unsigned int len;
|
||||
|
||||
if (unlikely(!peer || !REF(*top)))
|
||||
return;
|
||||
|
||||
for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
|
||||
nptr = stack[len - 1];
|
||||
node = DEREF(nptr);
|
||||
if (!node) {
|
||||
--len;
|
||||
continue;
|
||||
}
|
||||
if (!prev || REF(prev->bit[0]) == node ||
|
||||
REF(prev->bit[1]) == node) {
|
||||
if (REF(node->bit[0]))
|
||||
PUSH(&node->bit[0]);
|
||||
else if (REF(node->bit[1]))
|
||||
PUSH(&node->bit[1]);
|
||||
} else if (REF(node->bit[0]) == prev) {
|
||||
if (REF(node->bit[1]))
|
||||
PUSH(&node->bit[1]);
|
||||
} else {
|
||||
if (rcu_dereference_protected(node->peer,
|
||||
lockdep_is_held(lock)) == peer) {
|
||||
RCU_INIT_POINTER(node->peer, NULL);
|
||||
list_del_init(&node->peer_list);
|
||||
if (!node->bit[0] || !node->bit[1]) {
|
||||
rcu_assign_pointer(*nptr, DEREF(
|
||||
&node->bit[!REF(node->bit[0])]));
|
||||
call_rcu(&node->rcu, node_free_rcu);
|
||||
node = DEREF(nptr);
|
||||
}
|
||||
}
|
||||
--len;
|
||||
}
|
||||
}
|
||||
|
||||
#undef REF
|
||||
#undef DEREF
|
||||
#undef PUSH
|
||||
}
|
||||
|
||||
static unsigned int fls128(u64 a, u64 b)
|
||||
{
|
||||
return a ? fls64(a) + 64U : fls64(b);
|
||||
}
|
||||
|
||||
static u8 common_bits(const struct allowedips_node *node, const u8 *key,
|
||||
u8 bits)
|
||||
{
|
||||
if (bits == 32)
|
||||
return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key);
|
||||
else if (bits == 128)
|
||||
return 128U - fls128(
|
||||
*(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0],
|
||||
*(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool prefix_matches(const struct allowedips_node *node, const u8 *key,
|
||||
u8 bits)
|
||||
{
|
||||
/* This could be much faster if it actually just compared the common
|
||||
* bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and
|
||||
* the rest, but it turns out that common_bits is already super fast on
|
||||
* modern processors, even taking into account the unfortunate bswap.
|
||||
* So, we just inline it like this instead.
|
||||
*/
|
||||
return common_bits(node, key, bits) >= node->cidr;
|
||||
}
|
||||
|
||||
static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
|
||||
const u8 *key)
|
||||
{
|
||||
struct allowedips_node *node = trie, *found = NULL;
|
||||
|
||||
while (node && prefix_matches(node, key, bits)) {
|
||||
if (rcu_access_pointer(node->peer))
|
||||
found = node;
|
||||
if (node->cidr == bits)
|
||||
break;
|
||||
node = rcu_dereference_bh(CHOOSE_NODE(node, key));
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
/* Returns a strong reference to a peer */
|
||||
static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits,
|
||||
const void *be_ip)
|
||||
{
|
||||
/* Aligned so it can be passed to fls/fls64 */
|
||||
u8 ip[16] __aligned(__alignof(u64));
|
||||
struct allowedips_node *node;
|
||||
struct wg_peer *peer = NULL;
|
||||
|
||||
swap_endian(ip, be_ip, bits);
|
||||
|
||||
rcu_read_lock_bh();
|
||||
retry:
|
||||
node = find_node(rcu_dereference_bh(root), bits, ip);
|
||||
if (node) {
|
||||
peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer));
|
||||
if (!peer)
|
||||
goto retry;
|
||||
}
|
||||
rcu_read_unlock_bh();
|
||||
return peer;
|
||||
}
|
||||
|
||||
static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
|
||||
u8 cidr, u8 bits, struct allowedips_node **rnode,
|
||||
struct mutex *lock)
|
||||
{
|
||||
struct allowedips_node *node = rcu_dereference_protected(trie,
|
||||
lockdep_is_held(lock));
|
||||
struct allowedips_node *parent = NULL;
|
||||
bool exact = false;
|
||||
|
||||
while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) {
|
||||
parent = node;
|
||||
if (parent->cidr == cidr) {
|
||||
exact = true;
|
||||
break;
|
||||
}
|
||||
node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
|
||||
lockdep_is_held(lock));
|
||||
}
|
||||
*rnode = parent;
|
||||
return exact;
|
||||
}
|
||||
|
||||
static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
|
||||
u8 cidr, struct wg_peer *peer, struct mutex *lock)
|
||||
{
|
||||
struct allowedips_node *node, *parent, *down, *newnode;
|
||||
|
||||
if (unlikely(cidr > bits || !peer))
|
||||
return -EINVAL;
|
||||
|
||||
if (!rcu_access_pointer(*trie)) {
|
||||
node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||
if (unlikely(!node))
|
||||
return -ENOMEM;
|
||||
RCU_INIT_POINTER(node->peer, peer);
|
||||
list_add_tail(&node->peer_list, &peer->allowedips_list);
|
||||
copy_and_assign_cidr(node, key, cidr, bits);
|
||||
rcu_assign_pointer(*trie, node);
|
||||
return 0;
|
||||
}
|
||||
if (node_placement(*trie, key, cidr, bits, &node, lock)) {
|
||||
rcu_assign_pointer(node->peer, peer);
|
||||
list_move_tail(&node->peer_list, &peer->allowedips_list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
|
||||
if (unlikely(!newnode))
|
||||
return -ENOMEM;
|
||||
RCU_INIT_POINTER(newnode->peer, peer);
|
||||
list_add_tail(&newnode->peer_list, &peer->allowedips_list);
|
||||
copy_and_assign_cidr(newnode, key, cidr, bits);
|
||||
|
||||
if (!node) {
|
||||
down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
|
||||
} else {
|
||||
down = rcu_dereference_protected(CHOOSE_NODE(node, key),
|
||||
lockdep_is_held(lock));
|
||||
if (!down) {
|
||||
rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
cidr = min(cidr, common_bits(down, key, bits));
|
||||
parent = node;
|
||||
|
||||
if (newnode->cidr == cidr) {
|
||||
rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
|
||||
if (!parent)
|
||||
rcu_assign_pointer(*trie, newnode);
|
||||
else
|
||||
rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
|
||||
newnode);
|
||||
} else {
|
||||
node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||
if (unlikely(!node)) {
|
||||
list_del(&newnode->peer_list);
|
||||
kfree(newnode);
|
||||
return -ENOMEM;
|
||||
}
|
||||
INIT_LIST_HEAD(&node->peer_list);
|
||||
copy_and_assign_cidr(node, newnode->bits, cidr, bits);
|
||||
|
||||
rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
|
||||
rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
|
||||
if (!parent)
|
||||
rcu_assign_pointer(*trie, node);
|
||||
else
|
||||
rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
|
||||
node);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void wg_allowedips_init(struct allowedips *table)
|
||||
{
|
||||
table->root4 = table->root6 = NULL;
|
||||
table->seq = 1;
|
||||
}
|
||||
|
||||
void wg_allowedips_free(struct allowedips *table, struct mutex *lock)
|
||||
{
|
||||
struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6;
|
||||
|
||||
++table->seq;
|
||||
RCU_INIT_POINTER(table->root4, NULL);
|
||||
RCU_INIT_POINTER(table->root6, NULL);
|
||||
if (rcu_access_pointer(old4)) {
|
||||
struct allowedips_node *node = rcu_dereference_protected(old4,
|
||||
lockdep_is_held(lock));
|
||||
|
||||
root_remove_peer_lists(node);
|
||||
call_rcu(&node->rcu, root_free_rcu);
|
||||
}
|
||||
if (rcu_access_pointer(old6)) {
|
||||
struct allowedips_node *node = rcu_dereference_protected(old6,
|
||||
lockdep_is_held(lock));
|
||||
|
||||
root_remove_peer_lists(node);
|
||||
call_rcu(&node->rcu, root_free_rcu);
|
||||
}
|
||||
}
|
||||
|
||||
int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
|
||||
u8 cidr, struct wg_peer *peer, struct mutex *lock)
|
||||
{
|
||||
/* Aligned so it can be passed to fls */
|
||||
u8 key[4] __aligned(__alignof(u32));
|
||||
|
||||
++table->seq;
|
||||
swap_endian(key, (const u8 *)ip, 32);
|
||||
return add(&table->root4, 32, key, cidr, peer, lock);
|
||||
}
|
||||
|
||||
int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
|
||||
u8 cidr, struct wg_peer *peer, struct mutex *lock)
|
||||
{
|
||||
/* Aligned so it can be passed to fls64 */
|
||||
u8 key[16] __aligned(__alignof(u64));
|
||||
|
||||
++table->seq;
|
||||
swap_endian(key, (const u8 *)ip, 128);
|
||||
return add(&table->root6, 128, key, cidr, peer, lock);
|
||||
}
|
||||
|
||||
void wg_allowedips_remove_by_peer(struct allowedips *table,
|
||||
struct wg_peer *peer, struct mutex *lock)
|
||||
{
|
||||
++table->seq;
|
||||
walk_remove_by_peer(&table->root4, peer, lock);
|
||||
walk_remove_by_peer(&table->root6, peer, lock);
|
||||
}
|
||||
|
||||
int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
|
||||
{
|
||||
const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U);
|
||||
swap_endian(ip, node->bits, node->bitlen);
|
||||
memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes);
|
||||
if (node->cidr)
|
||||
ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U);
|
||||
|
||||
*cidr = node->cidr;
|
||||
return node->bitlen == 32 ? AF_INET : AF_INET6;
|
||||
}
|
||||
|
||||
/* Returns a strong reference to a peer */
|
||||
struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
|
||||
else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Returns a strong reference to a peer */
|
||||
struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
|
||||
else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#include "selftest/allowedips.c"
|
59
net/wireguard/allowedips.h
Normal file
59
net/wireguard/allowedips.h
Normal file
@ -0,0 +1,59 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_ALLOWEDIPS_H
|
||||
#define _WG_ALLOWEDIPS_H
|
||||
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
|
||||
struct wg_peer;
|
||||
|
||||
struct allowedips_node {
|
||||
struct wg_peer __rcu *peer;
|
||||
struct allowedips_node __rcu *bit[2];
|
||||
/* While it may seem scandalous that we waste space for v4,
|
||||
* we're alloc'ing to the nearest power of 2 anyway, so this
|
||||
* doesn't actually make a difference.
|
||||
*/
|
||||
u8 bits[16] __aligned(__alignof(u64));
|
||||
u8 cidr, bit_at_a, bit_at_b, bitlen;
|
||||
|
||||
/* Keep rarely used list at bottom to be beyond cache line. */
|
||||
union {
|
||||
struct list_head peer_list;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
};
|
||||
|
||||
struct allowedips {
|
||||
struct allowedips_node __rcu *root4;
|
||||
struct allowedips_node __rcu *root6;
|
||||
u64 seq;
|
||||
};
|
||||
|
||||
void wg_allowedips_init(struct allowedips *table);
|
||||
void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
|
||||
int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
|
||||
u8 cidr, struct wg_peer *peer, struct mutex *lock);
|
||||
int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
|
||||
u8 cidr, struct wg_peer *peer, struct mutex *lock);
|
||||
void wg_allowedips_remove_by_peer(struct allowedips *table,
|
||||
struct wg_peer *peer, struct mutex *lock);
|
||||
/* The ip input pointer should be __aligned(__alignof(u64))) */
|
||||
int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr);
|
||||
|
||||
/* These return a strong reference to a peer: */
|
||||
struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
|
||||
struct sk_buff *skb);
|
||||
struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
|
||||
struct sk_buff *skb);
|
||||
|
||||
#ifdef DEBUG
|
||||
bool wg_allowedips_selftest(void);
|
||||
#endif
|
||||
|
||||
#endif /* _WG_ALLOWEDIPS_H */
|
102
net/wireguard/compat/Kbuild.include
Normal file
102
net/wireguard/compat/Kbuild.include
Normal file
@ -0,0 +1,102 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
|
||||
kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
|
||||
|
||||
ccflags-y += -include $(kbuild-dir)/compat/compat.h
|
||||
asflags-y += -include $(kbuild-dir)/compat/compat-asm.h
|
||||
|
||||
ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/ptr_ring/include
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/include/linux/siphash.h),)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/siphash/include
|
||||
wireguard-y += compat/siphash/siphash.o
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/include/net/dst_cache.h),)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/dst_cache/include
|
||||
wireguard-y += compat/dst_cache/dst_cache.o
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/arch/x86/include/asm/intel-family.h)$(CONFIG_X86),y)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/intel-family-x86/include
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h)$(CONFIG_X86),y)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/fpu-x86/include
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/simd.h)$(shell grep -s -F "generic-y += simd.h" "$(srctree)/arch/$(SRCARCH)/Kbuild" "$(srctree)/arch/$(SRCARCH)/Makefile"),)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/simd-asm/include
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/include/linux/simd.h),)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/simd/include
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/include/net/udp_tunnel.h),)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/udp_tunnel/include
|
||||
wireguard-y += compat/udp_tunnel/udp_tunnel.o
|
||||
endif
|
||||
|
||||
ifeq ($(shell grep -s -F "int crypto_memneq" "$(srctree)/include/crypto/algapi.h"),)
|
||||
ccflags-y += -include $(kbuild-dir)/compat/memneq/include.h
|
||||
wireguard-y += compat/memneq/memneq.o
|
||||
endif
|
||||
|
||||
ifeq ($(shell grep -s -F "addr_gen_mode" "$(srctree)/include/linux/ipv6.h"),)
|
||||
ccflags-y += -DCOMPAT_CANNOT_USE_DEV_CNF
|
||||
endif
|
||||
|
||||
ifdef CONFIG_HZ
|
||||
ifeq ($(wildcard $(CURDIR)/include/generated/timeconst.h),)
|
||||
ccflags-y += $(shell bash -c '((a=$(CONFIG_HZ), b=1000000)); while ((b > 0)); do ((t=b, b=a%b, a=t)); done; echo "-DHZ_TO_USEC_NUM=$$((1000000/a)) -DHZ_TO_USEC_DEN=$$(($(CONFIG_HZ)/a))";')
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(wildcard $(srctree)/arch/arm/include/asm/neon.h)$(CONFIG_ARM),y)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include
|
||||
endif
|
||||
ifeq ($(wildcard $(srctree)/arch/arm64/include/asm/neon.h)$(CONFIG_ARM64),y)
|
||||
ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
ifeq ($(ssse3_instr),)
|
||||
ssse3_instr := $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
|
||||
ccflags-y += $(ssse3_instr)
|
||||
asflags-y += $(ssse3_instr)
|
||||
endif
|
||||
ifeq ($(avx_instr),)
|
||||
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
|
||||
ccflags-y += $(avx_instr)
|
||||
asflags-y += $(avx_instr)
|
||||
endif
|
||||
ifeq ($(avx2_instr),)
|
||||
avx2_instr := $(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
|
||||
ccflags-y += $(avx2_instr)
|
||||
asflags-y += $(avx2_instr)
|
||||
endif
|
||||
ifeq ($(avx512_instr),)
|
||||
avx512_instr := $(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
|
||||
ccflags-y += $(avx512_instr)
|
||||
asflags-y += $(avx512_instr)
|
||||
endif
|
||||
ifeq ($(bmi2_instr),)
|
||||
bmi2_instr :=$(call as-instr,mulx %rax$(comma)%rax$(comma)%rax,-DCONFIG_AS_BMI2=1)
|
||||
ccflags-y += $(bmi2_instr)
|
||||
asflags-y += $(bmi2_instr)
|
||||
endif
|
||||
ifeq ($(adx_instr),)
|
||||
adx_instr :=$(call as-instr,adcx %rax$(comma)%rax,-DCONFIG_AS_ADX=1)
|
||||
ccflags-y += $(adx_instr)
|
||||
asflags-y += $(adx_instr)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(shell grep -s -F "\#define LINUX_PACKAGE_ID \" Debian " "$(CURDIR)/include/generated/package.h"),)
|
||||
ccflags-y += -DISDEBIAN
|
||||
endif
|
208
net/wireguard/compat/checksum/checksum_partial_compat.h
Normal file
208
net/wireguard/compat/checksum/checksum_partial_compat.h
Normal file
@ -0,0 +1,208 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <net/route.h>
|
||||
#include <net/esp.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/ip6_checksum.h>
|
||||
|
||||
#define IP6_MF 0x0001
|
||||
#define IP6_OFFSET 0xFFF8
|
||||
static inline int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max)
|
||||
{
|
||||
if (skb_headlen(skb) >= len)
|
||||
return 0;
|
||||
if (max > skb->len)
|
||||
max = skb->len;
|
||||
if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
|
||||
return -ENOMEM;
|
||||
if (skb_headlen(skb) < len)
|
||||
return -EPROTO;
|
||||
return 0;
|
||||
}
|
||||
#define MAX_IP_HDR_LEN 128
|
||||
static inline int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
|
||||
{
|
||||
unsigned int off;
|
||||
bool fragment;
|
||||
int err;
|
||||
fragment = false;
|
||||
err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
|
||||
fragment = true;
|
||||
off = ip_hdrlen(skb);
|
||||
err = -EPROTO;
|
||||
if (fragment)
|
||||
goto out;
|
||||
switch (ip_hdr(skb)->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
err = skb_maybe_pull_tail(skb,
|
||||
off + sizeof(struct tcphdr),
|
||||
MAX_IP_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
if (!skb_partial_csum_set(skb, off,
|
||||
offsetof(struct tcphdr, check))) {
|
||||
err = -EPROTO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (recalculate)
|
||||
tcp_hdr(skb)->check =
|
||||
~csum_tcpudp_magic(ip_hdr(skb)->saddr,
|
||||
ip_hdr(skb)->daddr,
|
||||
skb->len - off,
|
||||
IPPROTO_TCP, 0);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
err = skb_maybe_pull_tail(skb,
|
||||
off + sizeof(struct udphdr),
|
||||
MAX_IP_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
if (!skb_partial_csum_set(skb, off,
|
||||
offsetof(struct udphdr, check))) {
|
||||
err = -EPROTO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (recalculate)
|
||||
udp_hdr(skb)->check =
|
||||
~csum_tcpudp_magic(ip_hdr(skb)->saddr,
|
||||
ip_hdr(skb)->daddr,
|
||||
skb->len - off,
|
||||
IPPROTO_UDP, 0);
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
err = 0;
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
#define MAX_IPV6_HDR_LEN 256
|
||||
#define OPT_HDR(type, skb, off) \
|
||||
(type *)(skb_network_header(skb) + (off))
|
||||
static inline int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
|
||||
{
|
||||
int err;
|
||||
u8 nexthdr;
|
||||
unsigned int off;
|
||||
unsigned int len;
|
||||
bool fragment;
|
||||
bool done;
|
||||
fragment = false;
|
||||
done = false;
|
||||
off = sizeof(struct ipv6hdr);
|
||||
err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
nexthdr = ipv6_hdr(skb)->nexthdr;
|
||||
len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
|
||||
while (off <= len && !done) {
|
||||
switch (nexthdr) {
|
||||
case IPPROTO_DSTOPTS:
|
||||
case IPPROTO_HOPOPTS:
|
||||
case IPPROTO_ROUTING: {
|
||||
struct ipv6_opt_hdr *hp;
|
||||
|
||||
err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
|
||||
nexthdr = hp->nexthdr;
|
||||
off += ipv6_optlen(hp);
|
||||
break;
|
||||
}
|
||||
case IPPROTO_FRAGMENT: {
|
||||
struct frag_hdr *hp;
|
||||
err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
hp = OPT_HDR(struct frag_hdr, skb, off);
|
||||
if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
|
||||
fragment = true;
|
||||
nexthdr = hp->nexthdr;
|
||||
off += sizeof(struct frag_hdr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
err = -EPROTO;
|
||||
if (!done || fragment)
|
||||
goto out;
|
||||
switch (nexthdr) {
|
||||
case IPPROTO_TCP:
|
||||
err = skb_maybe_pull_tail(skb,
|
||||
off + sizeof(struct tcphdr),
|
||||
MAX_IPV6_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
if (!skb_partial_csum_set(skb, off,
|
||||
offsetof(struct tcphdr, check))) {
|
||||
err = -EPROTO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (recalculate)
|
||||
tcp_hdr(skb)->check =
|
||||
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
|
||||
&ipv6_hdr(skb)->daddr,
|
||||
skb->len - off,
|
||||
IPPROTO_TCP, 0);
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
err = skb_maybe_pull_tail(skb,
|
||||
off + sizeof(struct udphdr),
|
||||
MAX_IPV6_HDR_LEN);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
if (!skb_partial_csum_set(skb, off,
|
||||
offsetof(struct udphdr, check))) {
|
||||
err = -EPROTO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (recalculate)
|
||||
udp_hdr(skb)->check =
|
||||
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
|
||||
&ipv6_hdr(skb)->daddr,
|
||||
skb->len - off,
|
||||
IPPROTO_UDP, 0);
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
err = 0;
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
static inline int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
|
||||
{
|
||||
int err;
|
||||
switch (skb->protocol) {
|
||||
case htons(ETH_P_IP):
|
||||
err = skb_checksum_setup_ip(skb, recalculate);
|
||||
break;
|
||||
|
||||
case htons(ETH_P_IPV6):
|
||||
err = skb_checksum_setup_ipv6(skb, recalculate);
|
||||
break;
|
||||
default:
|
||||
err = -EPROTO;
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
78
net/wireguard/compat/compat-asm.h
Normal file
78
net/wireguard/compat/compat-asm.h
Normal file
@ -0,0 +1,78 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_COMPATASM_H
|
||||
#define _WG_COMPATASM_H
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/kconfig.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
/* PaX compatibility */
|
||||
#if defined(RAP_PLUGIN)
|
||||
#undef ENTRY
|
||||
#define ENTRY RAP_ENTRY
|
||||
#endif
|
||||
|
||||
#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
|
||||
.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
|
||||
.macro ret\c, reg
|
||||
#if __LINUX_ARM_ARCH__ < 6
|
||||
mov\c pc, \reg
|
||||
#else
|
||||
.ifeqs "\reg", "lr"
|
||||
bx\c \reg
|
||||
.else
|
||||
mov\c pc, \reg
|
||||
.endif
|
||||
#endif
|
||||
.endm
|
||||
.endr
|
||||
#endif
|
||||
|
||||
#if defined(__LINUX_ARM_ARCH__) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
|
||||
#include <asm/assembler.h>
|
||||
#define lspush push
|
||||
#define lspull pull
|
||||
#undef push
|
||||
#undef pull
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 5, 0)
|
||||
#define SYM_FUNC_START ENTRY
|
||||
#define SYM_FUNC_END ENDPROC
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0)
|
||||
#define blake2s_compress_ssse3 zinc_blake2s_compress_ssse3
|
||||
#define blake2s_compress_avx512 zinc_blake2s_compress_avx512
|
||||
#define poly1305_init_arm zinc_poly1305_init_arm
|
||||
#define poly1305_blocks_arm zinc_poly1305_blocks_arm
|
||||
#define poly1305_emit_arm zinc_poly1305_emit_arm
|
||||
#define poly1305_blocks_neon zinc_poly1305_blocks_neon
|
||||
#define poly1305_emit_neon zinc_poly1305_emit_neon
|
||||
#define poly1305_init_mips zinc_poly1305_init_mips
|
||||
#define poly1305_blocks_mips zinc_poly1305_blocks_mips
|
||||
#define poly1305_emit_mips zinc_poly1305_emit_mips
|
||||
#define poly1305_init_x86_64 zinc_poly1305_init_x86_64
|
||||
#define poly1305_blocks_x86_64 zinc_poly1305_blocks_x86_64
|
||||
#define poly1305_emit_x86_64 zinc_poly1305_emit_x86_64
|
||||
#define poly1305_emit_avx zinc_poly1305_emit_avx
|
||||
#define poly1305_blocks_avx zinc_poly1305_blocks_avx
|
||||
#define poly1305_blocks_avx2 zinc_poly1305_blocks_avx2
|
||||
#define poly1305_blocks_avx512 zinc_poly1305_blocks_avx512
|
||||
#define curve25519_neon zinc_curve25519_neon
|
||||
#define hchacha20_ssse3 zinc_hchacha20_ssse3
|
||||
#define chacha20_ssse3 zinc_chacha20_ssse3
|
||||
#define chacha20_avx2 zinc_chacha20_avx2
|
||||
#define chacha20_avx512 zinc_chacha20_avx512
|
||||
#define chacha20_avx512vl zinc_chacha20_avx512vl
|
||||
#define chacha20_mips zinc_chacha20_mips
|
||||
#define chacha20_arm zinc_chacha20_arm
|
||||
#define hchacha20_arm zinc_hchacha20_arm
|
||||
#define chacha20_neon zinc_chacha20_neon
|
||||
#endif
|
||||
|
||||
#endif /* _WG_COMPATASM_H */
|
1118
net/wireguard/compat/compat.h
Normal file
1118
net/wireguard/compat/compat.h
Normal file
File diff suppressed because it is too large
Load Diff
175
net/wireguard/compat/dst_cache/dst_cache.c
Normal file
175
net/wireguard/compat/dst_cache/dst_cache.c
Normal file
@ -0,0 +1,175 @@
|
||||
/*
|
||||
* net/core/dst_cache.c - dst entry cache
|
||||
*
|
||||
* Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <net/dst_cache.h>
|
||||
#include <net/route.h>
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#include <net/ip6_fib.h>
|
||||
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 50)
|
||||
static inline u32 rt6_get_cookie(const struct rt6_info *rt)
|
||||
{
|
||||
if ((unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
|
||||
rt = (struct rt6_info *)(rt->dst.from);
|
||||
|
||||
return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#include <uapi/linux/in.h>
|
||||
|
||||
struct dst_cache_pcpu {
|
||||
unsigned long refresh_ts;
|
||||
struct dst_entry *dst;
|
||||
u32 cookie;
|
||||
union {
|
||||
struct in_addr in_saddr;
|
||||
struct in6_addr in6_saddr;
|
||||
};
|
||||
};
|
||||
|
||||
static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
|
||||
struct dst_entry *dst, u32 cookie)
|
||||
{
|
||||
dst_release(dst_cache->dst);
|
||||
if (dst)
|
||||
dst_hold(dst);
|
||||
|
||||
dst_cache->cookie = cookie;
|
||||
dst_cache->dst = dst;
|
||||
}
|
||||
|
||||
static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
|
||||
struct dst_cache_pcpu *idst)
|
||||
{
|
||||
struct dst_entry *dst;
|
||||
|
||||
dst = idst->dst;
|
||||
if (!dst)
|
||||
goto fail;
|
||||
|
||||
/* the cache already hold a dst reference; it can't go away */
|
||||
dst_hold(dst);
|
||||
|
||||
if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
|
||||
(dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
|
||||
dst_cache_per_cpu_dst_set(idst, NULL, 0);
|
||||
dst_release(dst);
|
||||
goto fail;
|
||||
}
|
||||
return dst;
|
||||
|
||||
fail:
|
||||
idst->refresh_ts = jiffies;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
|
||||
{
|
||||
if (!dst_cache->cache)
|
||||
return NULL;
|
||||
|
||||
return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
|
||||
}
|
||||
|
||||
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
|
||||
{
|
||||
struct dst_cache_pcpu *idst;
|
||||
struct dst_entry *dst;
|
||||
|
||||
if (!dst_cache->cache)
|
||||
return NULL;
|
||||
|
||||
idst = this_cpu_ptr(dst_cache->cache);
|
||||
dst = dst_cache_per_cpu_get(dst_cache, idst);
|
||||
if (!dst)
|
||||
return NULL;
|
||||
|
||||
*saddr = idst->in_saddr.s_addr;
|
||||
return container_of(dst, struct rtable, dst);
|
||||
}
|
||||
|
||||
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
|
||||
__be32 saddr)
|
||||
{
|
||||
struct dst_cache_pcpu *idst;
|
||||
|
||||
if (!dst_cache->cache)
|
||||
return;
|
||||
|
||||
idst = this_cpu_ptr(dst_cache->cache);
|
||||
dst_cache_per_cpu_dst_set(idst, dst, 0);
|
||||
idst->in_saddr.s_addr = saddr;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
|
||||
const struct in6_addr *addr)
|
||||
{
|
||||
struct dst_cache_pcpu *idst;
|
||||
|
||||
if (!dst_cache->cache)
|
||||
return;
|
||||
|
||||
idst = this_cpu_ptr(dst_cache->cache);
|
||||
dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
|
||||
rt6_get_cookie((struct rt6_info *)dst));
|
||||
idst->in6_saddr = *addr;
|
||||
}
|
||||
|
||||
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
|
||||
struct in6_addr *saddr)
|
||||
{
|
||||
struct dst_cache_pcpu *idst;
|
||||
struct dst_entry *dst;
|
||||
|
||||
if (!dst_cache->cache)
|
||||
return NULL;
|
||||
|
||||
idst = this_cpu_ptr(dst_cache->cache);
|
||||
dst = dst_cache_per_cpu_get(dst_cache, idst);
|
||||
if (!dst)
|
||||
return NULL;
|
||||
|
||||
*saddr = idst->in6_saddr;
|
||||
return dst;
|
||||
}
|
||||
#endif
|
||||
|
||||
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
|
||||
{
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
|
||||
BUG_ON(gfp & GFP_ATOMIC);
|
||||
dst_cache->cache = alloc_percpu(struct dst_cache_pcpu);
|
||||
#else
|
||||
dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
|
||||
gfp | __GFP_ZERO);
|
||||
#endif
|
||||
if (!dst_cache->cache)
|
||||
return -ENOMEM;
|
||||
|
||||
dst_cache_reset(dst_cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dst_cache_destroy(struct dst_cache *dst_cache)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!dst_cache->cache)
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
|
||||
|
||||
free_percpu(dst_cache->cache);
|
||||
}
|
97
net/wireguard/compat/dst_cache/include/net/dst_cache.h
Normal file
97
net/wireguard/compat/dst_cache/include/net/dst_cache.h
Normal file
@ -0,0 +1,97 @@
|
||||
#ifndef _WG_NET_DST_CACHE_H
|
||||
#define _WG_NET_DST_CACHE_H
|
||||
|
||||
#include <linux/jiffies.h>
|
||||
#include <net/dst.h>
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#include <net/ip6_fib.h>
|
||||
#endif
|
||||
|
||||
struct dst_cache {
|
||||
struct dst_cache_pcpu __percpu *cache;
|
||||
unsigned long reset_ts;
|
||||
};
|
||||
|
||||
/**
|
||||
* dst_cache_get - perform cache lookup
|
||||
* @dst_cache: the cache
|
||||
*
|
||||
* The caller should use dst_cache_get_ip4() if it need to retrieve the
|
||||
* source address to be used when xmitting to the cached dst.
|
||||
* local BH must be disabled.
|
||||
*/
|
||||
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);
|
||||
|
||||
/**
|
||||
* dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
|
||||
* @dst_cache: the cache
|
||||
* @saddr: return value for the retrieved source address
|
||||
*
|
||||
* local BH must be disabled.
|
||||
*/
|
||||
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
|
||||
|
||||
/**
|
||||
* dst_cache_set_ip4 - store the ipv4 dst into the cache
|
||||
* @dst_cache: the cache
|
||||
* @dst: the entry to be cached
|
||||
* @saddr: the source address to be stored inside the cache
|
||||
*
|
||||
* local BH must be disabled.
|
||||
*/
|
||||
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
|
||||
__be32 saddr);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
|
||||
/**
|
||||
* dst_cache_set_ip6 - store the ipv6 dst into the cache
|
||||
* @dst_cache: the cache
|
||||
* @dst: the entry to be cached
|
||||
* @saddr: the source address to be stored inside the cache
|
||||
*
|
||||
* local BH must be disabled.
|
||||
*/
|
||||
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
|
||||
const struct in6_addr *addr);
|
||||
|
||||
/**
|
||||
* dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
|
||||
* @dst_cache: the cache
|
||||
* @saddr: return value for the retrieved source address
|
||||
*
|
||||
* local BH must be disabled.
|
||||
*/
|
||||
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
|
||||
struct in6_addr *saddr);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* dst_cache_reset - invalidate the cache contents
|
||||
* @dst_cache: the cache
|
||||
*
|
||||
* This do not free the cached dst to avoid races and contentions.
|
||||
* the dst will be freed on later cache lookup.
|
||||
*/
|
||||
static inline void dst_cache_reset(struct dst_cache *dst_cache)
|
||||
{
|
||||
dst_cache->reset_ts = jiffies;
|
||||
}
|
||||
|
||||
/**
|
||||
* dst_cache_init - initialize the cache, allocating the required storage
|
||||
* @dst_cache: the cache
|
||||
* @gfp: allocation flags
|
||||
*/
|
||||
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
|
||||
|
||||
/**
|
||||
* dst_cache_destroy - empty the cache and free the allocated storage
|
||||
* @dst_cache: the cache
|
||||
*
|
||||
* No synchronization is enforced: it must be called only when the cache
|
||||
* is unused.
|
||||
*/
|
||||
void dst_cache_destroy(struct dst_cache *dst_cache);
|
||||
|
||||
#endif /* _WG_NET_DST_CACHE_H */
|
1
net/wireguard/compat/fpu-x86/include/asm/fpu/api.h
Normal file
1
net/wireguard/compat/fpu-x86/include/asm/fpu/api.h
Normal file
@ -0,0 +1 @@
|
||||
#include <asm/i387.h>
|
@ -0,0 +1,73 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_X86_INTEL_FAMILY_H
|
||||
#define _ASM_X86_INTEL_FAMILY_H
|
||||
|
||||
/*
|
||||
* "Big Core" Processors (Branded as Core, Xeon, etc...)
|
||||
*
|
||||
* The "_X" parts are generally the EP and EX Xeons, or the
|
||||
* "Extreme" ones, like Broadwell-E.
|
||||
*
|
||||
* Things ending in "2" are usually because we have no better
|
||||
* name for them. There's no processor called "SILVERMONT2".
|
||||
*/
|
||||
|
||||
#define INTEL_FAM6_CORE_YONAH 0x0E
|
||||
|
||||
#define INTEL_FAM6_CORE2_MEROM 0x0F
|
||||
#define INTEL_FAM6_CORE2_MEROM_L 0x16
|
||||
#define INTEL_FAM6_CORE2_PENRYN 0x17
|
||||
#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
|
||||
|
||||
#define INTEL_FAM6_NEHALEM 0x1E
|
||||
#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
|
||||
#define INTEL_FAM6_NEHALEM_EP 0x1A
|
||||
#define INTEL_FAM6_NEHALEM_EX 0x2E
|
||||
|
||||
#define INTEL_FAM6_WESTMERE 0x25
|
||||
#define INTEL_FAM6_WESTMERE_EP 0x2C
|
||||
#define INTEL_FAM6_WESTMERE_EX 0x2F
|
||||
|
||||
#define INTEL_FAM6_SANDYBRIDGE 0x2A
|
||||
#define INTEL_FAM6_SANDYBRIDGE_X 0x2D
|
||||
#define INTEL_FAM6_IVYBRIDGE 0x3A
|
||||
#define INTEL_FAM6_IVYBRIDGE_X 0x3E
|
||||
|
||||
#define INTEL_FAM6_HASWELL_CORE 0x3C
|
||||
#define INTEL_FAM6_HASWELL_X 0x3F
|
||||
#define INTEL_FAM6_HASWELL_ULT 0x45
|
||||
#define INTEL_FAM6_HASWELL_GT3E 0x46
|
||||
|
||||
#define INTEL_FAM6_BROADWELL_CORE 0x3D
|
||||
#define INTEL_FAM6_BROADWELL_GT3E 0x47
|
||||
#define INTEL_FAM6_BROADWELL_X 0x4F
|
||||
#define INTEL_FAM6_BROADWELL_XEON_D 0x56
|
||||
|
||||
#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
|
||||
#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
|
||||
#define INTEL_FAM6_SKYLAKE_X 0x55
|
||||
#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E
|
||||
#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E
|
||||
|
||||
/* "Small Core" Processors (Atom) */
|
||||
|
||||
#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
|
||||
#define INTEL_FAM6_ATOM_LINCROFT 0x26
|
||||
#define INTEL_FAM6_ATOM_PENWELL 0x27
|
||||
#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
|
||||
#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
|
||||
#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
|
||||
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
|
||||
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
|
||||
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
|
||||
#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
|
||||
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
|
||||
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
|
||||
#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
|
||||
|
||||
/* Xeon Phi */
|
||||
|
||||
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
|
||||
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
|
||||
|
||||
#endif /* _ASM_X86_INTEL_FAMILY_H */
|
5
net/wireguard/compat/memneq/include.h
Normal file
5
net/wireguard/compat/memneq/include.h
Normal file
@ -0,0 +1,5 @@
|
||||
extern noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
|
||||
static inline int crypto_memneq(const void *a, const void *b, size_t size)
|
||||
{
|
||||
return __crypto_memneq(a, b, size) != 0UL ? 1 : 0;
|
||||
}
|
170
net/wireguard/compat/memneq/memneq.c
Normal file
170
net/wireguard/compat/memneq/memneq.c
Normal file
@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Constant-time equality testing of memory regions.
|
||||
*
|
||||
* Authors:
|
||||
*
|
||||
* James Yonan <james@openvpn.net>
|
||||
* Daniel Borkmann <dborkman@redhat.com>
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
* The full GNU General Public License is included in this distribution
|
||||
* in the file called LICENSE.GPL.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of OpenVPN Technologies nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
|
||||
/* Make the optimizer believe the variable can be manipulated arbitrarily. */
|
||||
#define COMPILER_OPTIMIZER_HIDE_VAR(var) asm("" : "=r" (var) : "0" (var))
|
||||
|
||||
#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
|
||||
|
||||
/* Generic path for arbitrary size */
|
||||
static inline unsigned long
|
||||
__crypto_memneq_generic(const void *a, const void *b, size_t size)
|
||||
{
|
||||
unsigned long neq = 0;
|
||||
|
||||
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
|
||||
while (size >= sizeof(unsigned long)) {
|
||||
neq |= *(unsigned long *)a ^ *(unsigned long *)b;
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
a += sizeof(unsigned long);
|
||||
b += sizeof(unsigned long);
|
||||
size -= sizeof(unsigned long);
|
||||
}
|
||||
#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
|
||||
while (size > 0) {
|
||||
neq |= *(unsigned char *)a ^ *(unsigned char *)b;
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
a += 1;
|
||||
b += 1;
|
||||
size -= 1;
|
||||
}
|
||||
return neq;
|
||||
}
|
||||
|
||||
/* Loop-free fast-path for frequently used 16-byte size */
|
||||
static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
|
||||
{
|
||||
unsigned long neq = 0;
|
||||
|
||||
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
if (sizeof(unsigned long) == 8) {
|
||||
neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
} else if (sizeof(unsigned int) == 4) {
|
||||
neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
} else
|
||||
#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
|
||||
{
|
||||
neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
|
||||
COMPILER_OPTIMIZER_HIDE_VAR(neq);
|
||||
}
|
||||
|
||||
return neq;
|
||||
}
|
||||
|
||||
/* Compare two areas of memory without leaking timing information,
|
||||
* and with special optimizations for common sizes. Users should
|
||||
* not call this function directly, but should instead use
|
||||
* crypto_memneq defined in crypto/algapi.h.
|
||||
*/
|
||||
noinline unsigned long __crypto_memneq(const void *a, const void *b,
|
||||
size_t size)
|
||||
{
|
||||
switch (size) {
|
||||
case 16:
|
||||
return __crypto_memneq_16(a, b);
|
||||
default:
|
||||
return __crypto_memneq_generic(a, b, size);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
|
7
net/wireguard/compat/neon-arm/include/asm/neon.h
Normal file
7
net/wireguard/compat/neon-arm/include/asm/neon.h
Normal file
@ -0,0 +1,7 @@
|
||||
#ifndef _ARCH_ARM_ASM_NEON
|
||||
#define _ARCH_ARM_ASM_NEON
|
||||
#define kernel_neon_begin() \
|
||||
BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON")
|
||||
#define kernel_neon_end() \
|
||||
BUILD_BUG_ON_MSG(1, "This kernel does not support ARM NEON")
|
||||
#endif
|
674
net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h
Normal file
674
net/wireguard/compat/ptr_ring/include/linux/ptr_ring.h
Normal file
@ -0,0 +1,674 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* Definitions for the 'struct ptr_ring' datastructure.
|
||||
*
|
||||
* Author:
|
||||
* Michael S. Tsirkin <mst@redhat.com>
|
||||
*
|
||||
* Copyright (C) 2016 Red Hat, Inc.
|
||||
*
|
||||
* This is a limited-size FIFO maintaining pointers in FIFO order, with
|
||||
* one CPU producing entries and another consuming entries from a FIFO.
|
||||
*
|
||||
* This implementation tries to minimize cache-contention when there is a
|
||||
* single producer and a single consumer CPU.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_PTR_RING_H
|
||||
#define _LINUX_PTR_RING_H 1
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/cache.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/errno.h>
|
||||
#endif
|
||||
|
||||
struct ptr_ring {
|
||||
int producer ____cacheline_aligned_in_smp;
|
||||
spinlock_t producer_lock;
|
||||
int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
|
||||
int consumer_tail; /* next entry to invalidate */
|
||||
spinlock_t consumer_lock;
|
||||
/* Shared consumer/producer data */
|
||||
/* Read-only by both the producer and the consumer */
|
||||
int size ____cacheline_aligned_in_smp; /* max entries in queue */
|
||||
int batch; /* number of entries to consume in a batch */
|
||||
void **queue;
|
||||
};
|
||||
|
||||
/* Note: callers invoking this in a loop must use a compiler barrier,
|
||||
* for example cpu_relax().
|
||||
*
|
||||
* NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
|
||||
* see e.g. ptr_ring_full.
|
||||
*/
|
||||
static inline bool __ptr_ring_full(struct ptr_ring *r)
|
||||
{
|
||||
return r->queue[r->producer];
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_full(struct ptr_ring *r)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&r->producer_lock);
|
||||
ret = __ptr_ring_full(r);
|
||||
spin_unlock(&r->producer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_full_irq(struct ptr_ring *r)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock_irq(&r->producer_lock);
|
||||
ret = __ptr_ring_full(r);
|
||||
spin_unlock_irq(&r->producer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_full_any(struct ptr_ring *r)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
spin_lock_irqsave(&r->producer_lock, flags);
|
||||
ret = __ptr_ring_full(r);
|
||||
spin_unlock_irqrestore(&r->producer_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_full_bh(struct ptr_ring *r)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock_bh(&r->producer_lock);
|
||||
ret = __ptr_ring_full(r);
|
||||
spin_unlock_bh(&r->producer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Note: callers invoking this in a loop must use a compiler barrier,
|
||||
* for example cpu_relax(). Callers must hold producer_lock.
|
||||
* Callers are responsible for making sure pointer that is being queued
|
||||
* points to a valid data.
|
||||
*/
|
||||
static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
|
||||
{
|
||||
if (unlikely(!r->size) || r->queue[r->producer])
|
||||
return -ENOSPC;
|
||||
|
||||
/* Make sure the pointer we are storing points to a valid data. */
|
||||
/* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
|
||||
smp_wmb();
|
||||
|
||||
WRITE_ONCE(r->queue[r->producer++], ptr);
|
||||
if (unlikely(r->producer >= r->size))
|
||||
r->producer = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: resize (below) nests producer lock within consumer lock, so if you
|
||||
* consume in interrupt or BH context, you must disable interrupts/BH when
|
||||
* calling this.
|
||||
*/
|
||||
static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock(&r->producer_lock);
|
||||
ret = __ptr_ring_produce(r, ptr);
|
||||
spin_unlock(&r->producer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock_irq(&r->producer_lock);
|
||||
ret = __ptr_ring_produce(r, ptr);
|
||||
spin_unlock_irq(&r->producer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&r->producer_lock, flags);
|
||||
ret = __ptr_ring_produce(r, ptr);
|
||||
spin_unlock_irqrestore(&r->producer_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock_bh(&r->producer_lock);
|
||||
ret = __ptr_ring_produce(r, ptr);
|
||||
spin_unlock_bh(&r->producer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void *__ptr_ring_peek(struct ptr_ring *r)
|
||||
{
|
||||
if (likely(r->size))
|
||||
return READ_ONCE(r->queue[r->consumer_head]);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test ring empty status without taking any locks.
|
||||
*
|
||||
* NB: This is only safe to call if ring is never resized.
|
||||
*
|
||||
* However, if some other CPU consumes ring entries at the same time, the value
|
||||
* returned is not guaranteed to be correct.
|
||||
*
|
||||
* In this case - to avoid incorrectly detecting the ring
|
||||
* as empty - the CPU consuming the ring entries is responsible
|
||||
* for either consuming all ring entries until the ring is empty,
|
||||
* or synchronizing with some other CPU and causing it to
|
||||
* re-test __ptr_ring_empty and/or consume the ring enteries
|
||||
* after the synchronization point.
|
||||
*
|
||||
* Note: callers invoking this in a loop must use a compiler barrier,
|
||||
* for example cpu_relax().
|
||||
*/
|
||||
static inline bool __ptr_ring_empty(struct ptr_ring *r)
|
||||
{
|
||||
if (likely(r->size))
|
||||
return !r->queue[READ_ONCE(r->consumer_head)];
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_empty(struct ptr_ring *r)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock(&r->consumer_lock);
|
||||
ret = __ptr_ring_empty(r);
|
||||
spin_unlock(&r->consumer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock_irq(&r->consumer_lock);
|
||||
ret = __ptr_ring_empty(r);
|
||||
spin_unlock_irq(&r->consumer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_empty_any(struct ptr_ring *r)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
spin_lock_irqsave(&r->consumer_lock, flags);
|
||||
ret = __ptr_ring_empty(r);
|
||||
spin_unlock_irqrestore(&r->consumer_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock_bh(&r->consumer_lock);
|
||||
ret = __ptr_ring_empty(r);
|
||||
spin_unlock_bh(&r->consumer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Must only be called after __ptr_ring_peek returned !NULL */
|
||||
static inline void __ptr_ring_discard_one(struct ptr_ring *r)
|
||||
{
|
||||
/* Fundamentally, what we want to do is update consumer
|
||||
* index and zero out the entry so producer can reuse it.
|
||||
* Doing it naively at each consume would be as simple as:
|
||||
* consumer = r->consumer;
|
||||
* r->queue[consumer++] = NULL;
|
||||
* if (unlikely(consumer >= r->size))
|
||||
* consumer = 0;
|
||||
* r->consumer = consumer;
|
||||
* but that is suboptimal when the ring is full as producer is writing
|
||||
* out new entries in the same cache line. Defer these updates until a
|
||||
* batch of entries has been consumed.
|
||||
*/
|
||||
/* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
|
||||
* to work correctly.
|
||||
*/
|
||||
int consumer_head = r->consumer_head;
|
||||
int head = consumer_head++;
|
||||
|
||||
/* Once we have processed enough entries invalidate them in
|
||||
* the ring all at once so producer can reuse their space in the ring.
|
||||
* We also do this when we reach end of the ring - not mandatory
|
||||
* but helps keep the implementation simple.
|
||||
*/
|
||||
if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
|
||||
consumer_head >= r->size)) {
|
||||
/* Zero out entries in the reverse order: this way we touch the
|
||||
* cache line that producer might currently be reading the last;
|
||||
* producer won't make progress and touch other cache lines
|
||||
* besides the first one until we write out all entries.
|
||||
*/
|
||||
while (likely(head >= r->consumer_tail))
|
||||
r->queue[head--] = NULL;
|
||||
r->consumer_tail = consumer_head;
|
||||
}
|
||||
if (unlikely(consumer_head >= r->size)) {
|
||||
consumer_head = 0;
|
||||
r->consumer_tail = 0;
|
||||
}
|
||||
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
|
||||
WRITE_ONCE(r->consumer_head, consumer_head);
|
||||
}
|
||||
|
||||
static inline void *__ptr_ring_consume(struct ptr_ring *r)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
ptr = __ptr_ring_peek(r);
|
||||
if (ptr)
|
||||
__ptr_ring_discard_one(r);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 15, 0)
|
||||
/* The READ_ONCE in __ptr_ring_peek doesn't imply a barrier on old kernels. */
|
||||
smp_read_barrier_depends();
|
||||
#endif
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
|
||||
void **array, int n)
|
||||
{
|
||||
void *ptr;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ptr = __ptr_ring_consume(r);
|
||||
if (!ptr)
|
||||
break;
|
||||
array[i] = ptr;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: resize (below) nests producer lock within consumer lock, so if you
|
||||
* call this in interrupt or BH context, you must disable interrupts/BH when
|
||||
* producing.
|
||||
*/
|
||||
static inline void *ptr_ring_consume(struct ptr_ring *r)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
spin_lock(&r->consumer_lock);
|
||||
ptr = __ptr_ring_consume(r);
|
||||
spin_unlock(&r->consumer_lock);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
spin_lock_irq(&r->consumer_lock);
|
||||
ptr = __ptr_ring_consume(r);
|
||||
spin_unlock_irq(&r->consumer_lock);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void *ptr_ring_consume_any(struct ptr_ring *r)
|
||||
{
|
||||
unsigned long flags;
|
||||
void *ptr;
|
||||
|
||||
spin_lock_irqsave(&r->consumer_lock, flags);
|
||||
ptr = __ptr_ring_consume(r);
|
||||
spin_unlock_irqrestore(&r->consumer_lock, flags);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
spin_lock_bh(&r->consumer_lock);
|
||||
ptr = __ptr_ring_consume(r);
|
||||
spin_unlock_bh(&r->consumer_lock);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_consume_batched(struct ptr_ring *r,
|
||||
void **array, int n)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock(&r->consumer_lock);
|
||||
ret = __ptr_ring_consume_batched(r, array, n);
|
||||
spin_unlock(&r->consumer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
|
||||
void **array, int n)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock_irq(&r->consumer_lock);
|
||||
ret = __ptr_ring_consume_batched(r, array, n);
|
||||
spin_unlock_irq(&r->consumer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
|
||||
void **array, int n)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&r->consumer_lock, flags);
|
||||
ret = __ptr_ring_consume_batched(r, array, n);
|
||||
spin_unlock_irqrestore(&r->consumer_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
|
||||
void **array, int n)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock_bh(&r->consumer_lock);
|
||||
ret = __ptr_ring_consume_batched(r, array, n);
|
||||
spin_unlock_bh(&r->consumer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Cast to structure type and call a function without discarding from FIFO.
|
||||
* Function must return a value.
|
||||
* Callers must take consumer_lock.
|
||||
*/
|
||||
#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
|
||||
|
||||
#define PTR_RING_PEEK_CALL(r, f) ({ \
|
||||
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
|
||||
\
|
||||
spin_lock(&(r)->consumer_lock); \
|
||||
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
|
||||
spin_unlock(&(r)->consumer_lock); \
|
||||
__PTR_RING_PEEK_CALL_v; \
|
||||
})
|
||||
|
||||
#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
|
||||
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
|
||||
\
|
||||
spin_lock_irq(&(r)->consumer_lock); \
|
||||
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
|
||||
spin_unlock_irq(&(r)->consumer_lock); \
|
||||
__PTR_RING_PEEK_CALL_v; \
|
||||
})
|
||||
|
||||
#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
|
||||
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
|
||||
\
|
||||
spin_lock_bh(&(r)->consumer_lock); \
|
||||
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
|
||||
spin_unlock_bh(&(r)->consumer_lock); \
|
||||
__PTR_RING_PEEK_CALL_v; \
|
||||
})
|
||||
|
||||
#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
|
||||
typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
|
||||
unsigned long __PTR_RING_PEEK_CALL_f;\
|
||||
\
|
||||
spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
|
||||
__PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
|
||||
spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
|
||||
__PTR_RING_PEEK_CALL_v; \
|
||||
})
|
||||
|
||||
/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
|
||||
* documentation for vmalloc for which of them are legal.
|
||||
*/
|
||||
static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
|
||||
{
|
||||
if (size > KMALLOC_MAX_SIZE / sizeof(void *))
|
||||
return NULL;
|
||||
return kvmalloc(size * sizeof(void *), gfp | __GFP_ZERO);
|
||||
}
|
||||
|
||||
static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
|
||||
{
|
||||
r->size = size;
|
||||
r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
|
||||
/* We need to set batch at least to 1 to make logic
|
||||
* in __ptr_ring_discard_one work correctly.
|
||||
* Batching too much (because ring is small) would cause a lot of
|
||||
* burstiness. Needs tuning, for now disable batching.
|
||||
*/
|
||||
if (r->batch > r->size / 2 || !r->batch)
|
||||
r->batch = 1;
|
||||
}
|
||||
|
||||
static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
|
||||
{
|
||||
r->queue = __ptr_ring_init_queue_alloc(size, gfp);
|
||||
if (!r->queue)
|
||||
return -ENOMEM;
|
||||
|
||||
__ptr_ring_set_size(r, size);
|
||||
r->producer = r->consumer_head = r->consumer_tail = 0;
|
||||
spin_lock_init(&r->producer_lock);
|
||||
spin_lock_init(&r->consumer_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return entries into ring. Destroy entries that don't fit.
|
||||
*
|
||||
* Note: this is expected to be a rare slow path operation.
|
||||
*
|
||||
* Note: producer lock is nested within consumer lock, so if you
|
||||
* resize you must make sure all uses nest correctly.
|
||||
* In particular if you consume ring in interrupt or BH context, you must
|
||||
* disable interrupts/BH when doing so.
|
||||
*/
|
||||
static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
|
||||
void (*destroy)(void *))
|
||||
{
|
||||
unsigned long flags;
|
||||
int head;
|
||||
|
||||
spin_lock_irqsave(&r->consumer_lock, flags);
|
||||
spin_lock(&r->producer_lock);
|
||||
|
||||
if (!r->size)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Clean out buffered entries (for simplicity). This way following code
|
||||
* can test entries for NULL and if not assume they are valid.
|
||||
*/
|
||||
head = r->consumer_head - 1;
|
||||
while (likely(head >= r->consumer_tail))
|
||||
r->queue[head--] = NULL;
|
||||
r->consumer_tail = r->consumer_head;
|
||||
|
||||
/*
|
||||
* Go over entries in batch, start moving head back and copy entries.
|
||||
* Stop when we run into previously unconsumed entries.
|
||||
*/
|
||||
while (n) {
|
||||
head = r->consumer_head - 1;
|
||||
if (head < 0)
|
||||
head = r->size - 1;
|
||||
if (r->queue[head]) {
|
||||
/* This batch entry will have to be destroyed. */
|
||||
goto done;
|
||||
}
|
||||
r->queue[head] = batch[--n];
|
||||
r->consumer_tail = head;
|
||||
/* matching READ_ONCE in __ptr_ring_empty for lockless tests */
|
||||
WRITE_ONCE(r->consumer_head, head);
|
||||
}
|
||||
|
||||
done:
|
||||
/* Destroy all entries left in the batch. */
|
||||
while (n)
|
||||
destroy(batch[--n]);
|
||||
spin_unlock(&r->producer_lock);
|
||||
spin_unlock_irqrestore(&r->consumer_lock, flags);
|
||||
}
|
||||
|
||||
static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
|
||||
int size, gfp_t gfp,
|
||||
void (*destroy)(void *))
|
||||
{
|
||||
int producer = 0;
|
||||
void **old;
|
||||
void *ptr;
|
||||
|
||||
while ((ptr = __ptr_ring_consume(r)))
|
||||
if (producer < size)
|
||||
queue[producer++] = ptr;
|
||||
else if (destroy)
|
||||
destroy(ptr);
|
||||
|
||||
if (producer >= size)
|
||||
producer = 0;
|
||||
__ptr_ring_set_size(r, size);
|
||||
r->producer = producer;
|
||||
r->consumer_head = 0;
|
||||
r->consumer_tail = 0;
|
||||
old = r->queue;
|
||||
r->queue = queue;
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: producer lock is nested within consumer lock, so if you
|
||||
* resize you must make sure all uses nest correctly.
|
||||
* In particular if you consume ring in interrupt or BH context, you must
|
||||
* disable interrupts/BH when doing so.
|
||||
*/
|
||||
static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
|
||||
void (*destroy)(void *))
|
||||
{
|
||||
unsigned long flags;
|
||||
void **queue = __ptr_ring_init_queue_alloc(size, gfp);
|
||||
void **old;
|
||||
|
||||
if (!queue)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_irqsave(&(r)->consumer_lock, flags);
|
||||
spin_lock(&(r)->producer_lock);
|
||||
|
||||
old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
|
||||
|
||||
spin_unlock(&(r)->producer_lock);
|
||||
spin_unlock_irqrestore(&(r)->consumer_lock, flags);
|
||||
|
||||
kvfree(old);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: producer lock is nested within consumer lock, so if you
|
||||
* resize you must make sure all uses nest correctly.
|
||||
* In particular if you consume ring in interrupt or BH context, you must
|
||||
* disable interrupts/BH when doing so.
|
||||
*/
|
||||
static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
|
||||
unsigned int nrings,
|
||||
int size,
|
||||
gfp_t gfp, void (*destroy)(void *))
|
||||
{
|
||||
unsigned long flags;
|
||||
void ***queues;
|
||||
int i;
|
||||
|
||||
queues = kmalloc_array(nrings, sizeof(*queues), gfp);
|
||||
if (!queues)
|
||||
goto noqueues;
|
||||
|
||||
for (i = 0; i < nrings; ++i) {
|
||||
queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
|
||||
if (!queues[i])
|
||||
goto nomem;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrings; ++i) {
|
||||
spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
|
||||
spin_lock(&(rings[i])->producer_lock);
|
||||
queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
|
||||
size, gfp, destroy);
|
||||
spin_unlock(&(rings[i])->producer_lock);
|
||||
spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
|
||||
}
|
||||
|
||||
for (i = 0; i < nrings; ++i)
|
||||
kvfree(queues[i]);
|
||||
|
||||
kfree(queues);
|
||||
|
||||
return 0;
|
||||
|
||||
nomem:
|
||||
while (--i >= 0)
|
||||
kvfree(queues[i]);
|
||||
|
||||
kfree(queues);
|
||||
|
||||
noqueues:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
if (destroy)
|
||||
while ((ptr = ptr_ring_consume(r)))
|
||||
destroy(ptr);
|
||||
kvfree(r->queue);
|
||||
}
|
||||
|
||||
#endif /* _LINUX_PTR_RING_H */
|
21
net/wireguard/compat/simd-asm/include/asm/simd.h
Normal file
21
net/wireguard/compat/simd-asm/include/asm/simd.h
Normal file
@ -0,0 +1,21 @@
|
||||
#ifndef _COMPAT_ASM_SIMD_H
|
||||
#define _COMPAT_ASM_SIMD_H
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
#include <asm/fpu/api.h>
|
||||
#endif
|
||||
|
||||
static __must_check inline bool may_use_simd(void)
|
||||
{
|
||||
#if defined(CONFIG_X86_64)
|
||||
return irq_fpu_usable();
|
||||
#elif defined(CONFIG_ARM64) && defined(CONFIG_KERNEL_MODE_NEON)
|
||||
return true;
|
||||
#elif defined(CONFIG_ARM) && defined(CONFIG_KERNEL_MODE_NEON)
|
||||
return !in_nmi() && !in_irq() && !in_serving_softirq();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
70
net/wireguard/compat/simd/include/linux/simd.h
Normal file
70
net/wireguard/compat/simd/include/linux/simd.h
Normal file
@ -0,0 +1,70 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_SIMD_H
|
||||
#define _WG_SIMD_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <asm/simd.h>
|
||||
#if defined(CONFIG_X86_64)
|
||||
#include <linux/version.h>
|
||||
#include <asm/fpu/api.h>
|
||||
#elif defined(CONFIG_KERNEL_MODE_NEON)
|
||||
#include <asm/neon.h>
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
HAVE_NO_SIMD = 1 << 0,
|
||||
HAVE_FULL_SIMD = 1 << 1,
|
||||
HAVE_SIMD_IN_USE = 1 << 31
|
||||
} simd_context_t;
|
||||
|
||||
#define DONT_USE_SIMD ((simd_context_t []){ HAVE_NO_SIMD })
|
||||
|
||||
static inline void simd_get(simd_context_t *ctx)
|
||||
{
|
||||
*ctx = !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
|
||||
}
|
||||
|
||||
static inline void simd_put(simd_context_t *ctx)
|
||||
{
|
||||
#if defined(CONFIG_X86_64)
|
||||
if (*ctx & HAVE_SIMD_IN_USE)
|
||||
kernel_fpu_end();
|
||||
#elif defined(CONFIG_KERNEL_MODE_NEON)
|
||||
if (*ctx & HAVE_SIMD_IN_USE)
|
||||
kernel_neon_end();
|
||||
#endif
|
||||
*ctx = HAVE_NO_SIMD;
|
||||
}
|
||||
|
||||
static inline bool simd_relax(simd_context_t *ctx)
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT
|
||||
if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
|
||||
simd_put(ctx);
|
||||
simd_get(ctx);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
static __must_check inline bool simd_use(simd_context_t *ctx)
|
||||
{
|
||||
if (!(*ctx & HAVE_FULL_SIMD))
|
||||
return false;
|
||||
if (*ctx & HAVE_SIMD_IN_USE)
|
||||
return true;
|
||||
#if defined(CONFIG_X86_64)
|
||||
kernel_fpu_begin();
|
||||
#elif defined(CONFIG_KERNEL_MODE_NEON)
|
||||
kernel_neon_begin();
|
||||
#endif
|
||||
*ctx |= HAVE_SIMD_IN_USE;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* _WG_SIMD_H */
|
140
net/wireguard/compat/siphash/include/linux/siphash.h
Normal file
140
net/wireguard/compat/siphash/include/linux/siphash.h
Normal file
@ -0,0 +1,140 @@
|
||||
/* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license.
|
||||
*
|
||||
* SipHash: a fast short-input PRF
|
||||
* https://131002.net/siphash/
|
||||
*
|
||||
* This implementation is specifically for SipHash2-4 for a secure PRF
|
||||
* and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
|
||||
* hashtables.
|
||||
*/
|
||||
|
||||
#ifndef _WG_LINUX_SIPHASH_H
|
||||
#define _WG_LINUX_SIPHASH_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#define SIPHASH_ALIGNMENT __alignof__(u64)
|
||||
typedef struct {
|
||||
u64 key[2];
|
||||
} siphash_key_t;
|
||||
|
||||
u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
|
||||
#endif
|
||||
|
||||
u64 siphash_1u64(const u64 a, const siphash_key_t *key);
|
||||
u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
|
||||
u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
|
||||
const siphash_key_t *key);
|
||||
u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
|
||||
const siphash_key_t *key);
|
||||
u64 siphash_1u32(const u32 a, const siphash_key_t *key);
|
||||
u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
|
||||
const siphash_key_t *key);
|
||||
|
||||
static inline u64 siphash_2u32(const u32 a, const u32 b,
|
||||
const siphash_key_t *key)
|
||||
{
|
||||
return siphash_1u64((u64)b << 32 | a, key);
|
||||
}
|
||||
static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
|
||||
const u32 d, const siphash_key_t *key)
|
||||
{
|
||||
return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
|
||||
}
|
||||
|
||||
|
||||
static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
|
||||
const siphash_key_t *key)
|
||||
{
|
||||
if (__builtin_constant_p(len) && len == 4)
|
||||
return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
|
||||
if (__builtin_constant_p(len) && len == 8)
|
||||
return siphash_1u64(le64_to_cpu(data[0]), key);
|
||||
if (__builtin_constant_p(len) && len == 16)
|
||||
return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
|
||||
key);
|
||||
if (__builtin_constant_p(len) && len == 24)
|
||||
return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
|
||||
le64_to_cpu(data[2]), key);
|
||||
if (__builtin_constant_p(len) && len == 32)
|
||||
return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
|
||||
le64_to_cpu(data[2]), le64_to_cpu(data[3]),
|
||||
key);
|
||||
return __siphash_aligned(data, len, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* siphash - compute 64-bit siphash PRF value
|
||||
* @data: buffer to hash
|
||||
* @size: size of @data
|
||||
* @key: the siphash key
|
||||
*/
|
||||
static inline u64 siphash(const void *data, size_t len,
|
||||
const siphash_key_t *key)
|
||||
{
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
|
||||
return __siphash_unaligned(data, len, key);
|
||||
#endif
|
||||
return ___siphash_aligned(data, len, key);
|
||||
}
|
||||
|
||||
#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
|
||||
typedef struct {
|
||||
unsigned long key[2];
|
||||
} hsiphash_key_t;
|
||||
|
||||
u32 __hsiphash_aligned(const void *data, size_t len,
|
||||
const hsiphash_key_t *key);
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
u32 __hsiphash_unaligned(const void *data, size_t len,
|
||||
const hsiphash_key_t *key);
|
||||
#endif
|
||||
|
||||
u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
|
||||
u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
|
||||
u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
|
||||
const hsiphash_key_t *key);
|
||||
u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
|
||||
const hsiphash_key_t *key);
|
||||
|
||||
static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
|
||||
const hsiphash_key_t *key)
|
||||
{
|
||||
if (__builtin_constant_p(len) && len == 4)
|
||||
return hsiphash_1u32(le32_to_cpu(data[0]), key);
|
||||
if (__builtin_constant_p(len) && len == 8)
|
||||
return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
|
||||
key);
|
||||
if (__builtin_constant_p(len) && len == 12)
|
||||
return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
|
||||
le32_to_cpu(data[2]), key);
|
||||
if (__builtin_constant_p(len) && len == 16)
|
||||
return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
|
||||
le32_to_cpu(data[2]), le32_to_cpu(data[3]),
|
||||
key);
|
||||
return __hsiphash_aligned(data, len, key);
|
||||
}
|
||||
|
||||
/**
|
||||
* hsiphash - compute 32-bit hsiphash PRF value
|
||||
* @data: buffer to hash
|
||||
* @size: size of @data
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
static inline u32 hsiphash(const void *data, size_t len,
|
||||
const hsiphash_key_t *key)
|
||||
{
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
|
||||
return __hsiphash_unaligned(data, len, key);
|
||||
#endif
|
||||
return ___hsiphash_aligned(data, len, key);
|
||||
}
|
||||
|
||||
#endif /* _WG_LINUX_SIPHASH_H */
|
539
net/wireguard/compat/siphash/siphash.c
Normal file
539
net/wireguard/compat/siphash/siphash.c
Normal file
@ -0,0 +1,539 @@
|
||||
/* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license.
|
||||
*
|
||||
* SipHash: a fast short-input PRF
|
||||
* https://131002.net/siphash/
|
||||
*
|
||||
* This implementation is specifically for SipHash2-4 for a secure PRF
|
||||
* and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
|
||||
* hashtables.
|
||||
*/
|
||||
|
||||
#include <linux/siphash.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
|
||||
#ifdef __LITTLE_ENDIAN
|
||||
#define bytemask_from_count(cnt) (~(~0ul << (cnt)*8))
|
||||
#else
|
||||
#define bytemask_from_count(cnt) (~(~0ul >> (cnt)*8))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
|
||||
#include <linux/dcache.h>
|
||||
#include <asm/word-at-a-time.h>
|
||||
#endif
|
||||
|
||||
#define SIPROUND \
|
||||
do { \
|
||||
v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
|
||||
v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
|
||||
v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
|
||||
v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
|
||||
} while (0)
|
||||
|
||||
#define PREAMBLE(len) \
|
||||
u64 v0 = 0x736f6d6570736575ULL; \
|
||||
u64 v1 = 0x646f72616e646f6dULL; \
|
||||
u64 v2 = 0x6c7967656e657261ULL; \
|
||||
u64 v3 = 0x7465646279746573ULL; \
|
||||
u64 b = ((u64)(len)) << 56; \
|
||||
v3 ^= key->key[1]; \
|
||||
v2 ^= key->key[0]; \
|
||||
v1 ^= key->key[1]; \
|
||||
v0 ^= key->key[0];
|
||||
|
||||
#define POSTAMBLE \
|
||||
v3 ^= b; \
|
||||
SIPROUND; \
|
||||
SIPROUND; \
|
||||
v0 ^= b; \
|
||||
v2 ^= 0xff; \
|
||||
SIPROUND; \
|
||||
SIPROUND; \
|
||||
SIPROUND; \
|
||||
SIPROUND; \
|
||||
return (v0 ^ v1) ^ (v2 ^ v3);
|
||||
|
||||
u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
|
||||
{
|
||||
const u8 *end = data + len - (len % sizeof(u64));
|
||||
const u8 left = len & (sizeof(u64) - 1);
|
||||
u64 m;
|
||||
PREAMBLE(len)
|
||||
for (; data != end; data += sizeof(u64)) {
|
||||
m = le64_to_cpup(data);
|
||||
v3 ^= m;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= m;
|
||||
}
|
||||
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
|
||||
if (left)
|
||||
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
|
||||
bytemask_from_count(left)));
|
||||
#else
|
||||
switch (left) {
|
||||
case 7: b |= ((u64)end[6]) << 48;
|
||||
case 6: b |= ((u64)end[5]) << 40;
|
||||
case 5: b |= ((u64)end[4]) << 32;
|
||||
case 4: b |= le32_to_cpup(data); break;
|
||||
case 3: b |= ((u64)end[2]) << 16;
|
||||
case 2: b |= le16_to_cpup(data); break;
|
||||
case 1: b |= end[0];
|
||||
}
|
||||
#endif
|
||||
POSTAMBLE
|
||||
}
|
||||
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
|
||||
{
|
||||
const u8 *end = data + len - (len % sizeof(u64));
|
||||
const u8 left = len & (sizeof(u64) - 1);
|
||||
u64 m;
|
||||
PREAMBLE(len)
|
||||
for (; data != end; data += sizeof(u64)) {
|
||||
m = get_unaligned_le64(data);
|
||||
v3 ^= m;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= m;
|
||||
}
|
||||
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
|
||||
if (left)
|
||||
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
|
||||
bytemask_from_count(left)));
|
||||
#else
|
||||
switch (left) {
|
||||
case 7: b |= ((u64)end[6]) << 48;
|
||||
case 6: b |= ((u64)end[5]) << 40;
|
||||
case 5: b |= ((u64)end[4]) << 32;
|
||||
case 4: b |= get_unaligned_le32(end); break;
|
||||
case 3: b |= ((u64)end[2]) << 16;
|
||||
case 2: b |= get_unaligned_le16(end); break;
|
||||
case 1: b |= end[0];
|
||||
}
|
||||
#endif
|
||||
POSTAMBLE
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* siphash_1u64 - compute 64-bit siphash PRF value of a u64
|
||||
* @first: first u64
|
||||
* @key: the siphash key
|
||||
*/
|
||||
u64 siphash_1u64(const u64 first, const siphash_key_t *key)
|
||||
{
|
||||
PREAMBLE(8)
|
||||
v3 ^= first;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= first;
|
||||
POSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
|
||||
* @first: first u64
|
||||
* @second: second u64
|
||||
* @key: the siphash key
|
||||
*/
|
||||
u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
|
||||
{
|
||||
PREAMBLE(16)
|
||||
v3 ^= first;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= first;
|
||||
v3 ^= second;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= second;
|
||||
POSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
|
||||
* @first: first u64
|
||||
* @second: second u64
|
||||
* @third: third u64
|
||||
* @key: the siphash key
|
||||
*/
|
||||
u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
|
||||
const siphash_key_t *key)
|
||||
{
|
||||
PREAMBLE(24)
|
||||
v3 ^= first;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= first;
|
||||
v3 ^= second;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= second;
|
||||
v3 ^= third;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= third;
|
||||
POSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
|
||||
* @first: first u64
|
||||
* @second: second u64
|
||||
* @third: third u64
|
||||
* @forth: forth u64
|
||||
* @key: the siphash key
|
||||
*/
|
||||
u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
|
||||
const u64 forth, const siphash_key_t *key)
|
||||
{
|
||||
PREAMBLE(32)
|
||||
v3 ^= first;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= first;
|
||||
v3 ^= second;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= second;
|
||||
v3 ^= third;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= third;
|
||||
v3 ^= forth;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= forth;
|
||||
POSTAMBLE
|
||||
}
|
||||
|
||||
u64 siphash_1u32(const u32 first, const siphash_key_t *key)
|
||||
{
|
||||
PREAMBLE(4)
|
||||
b |= first;
|
||||
POSTAMBLE
|
||||
}
|
||||
|
||||
u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
|
||||
const siphash_key_t *key)
|
||||
{
|
||||
u64 combined = (u64)second << 32 | first;
|
||||
PREAMBLE(12)
|
||||
v3 ^= combined;
|
||||
SIPROUND;
|
||||
SIPROUND;
|
||||
v0 ^= combined;
|
||||
b |= third;
|
||||
POSTAMBLE
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 64
|
||||
/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
|
||||
* performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
|
||||
*/
|
||||
|
||||
#define HSIPROUND SIPROUND
|
||||
#define HPREAMBLE(len) PREAMBLE(len)
|
||||
#define HPOSTAMBLE \
|
||||
v3 ^= b; \
|
||||
HSIPROUND; \
|
||||
v0 ^= b; \
|
||||
v2 ^= 0xff; \
|
||||
HSIPROUND; \
|
||||
HSIPROUND; \
|
||||
HSIPROUND; \
|
||||
return (v0 ^ v1) ^ (v2 ^ v3);
|
||||
|
||||
u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
|
||||
{
|
||||
const u8 *end = data + len - (len % sizeof(u64));
|
||||
const u8 left = len & (sizeof(u64) - 1);
|
||||
u64 m;
|
||||
HPREAMBLE(len)
|
||||
for (; data != end; data += sizeof(u64)) {
|
||||
m = le64_to_cpup(data);
|
||||
v3 ^= m;
|
||||
HSIPROUND;
|
||||
v0 ^= m;
|
||||
}
|
||||
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
|
||||
if (left)
|
||||
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
|
||||
bytemask_from_count(left)));
|
||||
#else
|
||||
switch (left) {
|
||||
case 7: b |= ((u64)end[6]) << 48;
|
||||
case 6: b |= ((u64)end[5]) << 40;
|
||||
case 5: b |= ((u64)end[4]) << 32;
|
||||
case 4: b |= le32_to_cpup(data); break;
|
||||
case 3: b |= ((u64)end[2]) << 16;
|
||||
case 2: b |= le16_to_cpup(data); break;
|
||||
case 1: b |= end[0];
|
||||
}
|
||||
#endif
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
u32 __hsiphash_unaligned(const void *data, size_t len,
|
||||
const hsiphash_key_t *key)
|
||||
{
|
||||
const u8 *end = data + len - (len % sizeof(u64));
|
||||
const u8 left = len & (sizeof(u64) - 1);
|
||||
u64 m;
|
||||
HPREAMBLE(len)
|
||||
for (; data != end; data += sizeof(u64)) {
|
||||
m = get_unaligned_le64(data);
|
||||
v3 ^= m;
|
||||
HSIPROUND;
|
||||
v0 ^= m;
|
||||
}
|
||||
#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
|
||||
if (left)
|
||||
b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
|
||||
bytemask_from_count(left)));
|
||||
#else
|
||||
switch (left) {
|
||||
case 7: b |= ((u64)end[6]) << 48;
|
||||
case 6: b |= ((u64)end[5]) << 40;
|
||||
case 5: b |= ((u64)end[4]) << 32;
|
||||
case 4: b |= get_unaligned_le32(end); break;
|
||||
case 3: b |= ((u64)end[2]) << 16;
|
||||
case 2: b |= get_unaligned_le16(end); break;
|
||||
case 1: b |= end[0];
|
||||
}
|
||||
#endif
|
||||
HPOSTAMBLE
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
|
||||
* @first: first u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
|
||||
{
|
||||
HPREAMBLE(4)
|
||||
b |= first;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
|
||||
* @first: first u32
|
||||
* @second: second u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
|
||||
{
|
||||
u64 combined = (u64)second << 32 | first;
|
||||
HPREAMBLE(8)
|
||||
v3 ^= combined;
|
||||
HSIPROUND;
|
||||
v0 ^= combined;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
|
||||
* @first: first u32
|
||||
* @second: second u32
|
||||
* @third: third u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
|
||||
const hsiphash_key_t *key)
|
||||
{
|
||||
u64 combined = (u64)second << 32 | first;
|
||||
HPREAMBLE(12)
|
||||
v3 ^= combined;
|
||||
HSIPROUND;
|
||||
v0 ^= combined;
|
||||
b |= third;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
|
||||
* @first: first u32
|
||||
* @second: second u32
|
||||
* @third: third u32
|
||||
* @forth: forth u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
|
||||
const u32 forth, const hsiphash_key_t *key)
|
||||
{
|
||||
u64 combined = (u64)second << 32 | first;
|
||||
HPREAMBLE(16)
|
||||
v3 ^= combined;
|
||||
HSIPROUND;
|
||||
v0 ^= combined;
|
||||
combined = (u64)forth << 32 | third;
|
||||
v3 ^= combined;
|
||||
HSIPROUND;
|
||||
v0 ^= combined;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
#else
|
||||
#define HSIPROUND \
|
||||
do { \
|
||||
v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
|
||||
v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
|
||||
v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
|
||||
v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
|
||||
} while (0)
|
||||
|
||||
#define HPREAMBLE(len) \
|
||||
u32 v0 = 0; \
|
||||
u32 v1 = 0; \
|
||||
u32 v2 = 0x6c796765U; \
|
||||
u32 v3 = 0x74656462U; \
|
||||
u32 b = ((u32)(len)) << 24; \
|
||||
v3 ^= key->key[1]; \
|
||||
v2 ^= key->key[0]; \
|
||||
v1 ^= key->key[1]; \
|
||||
v0 ^= key->key[0];
|
||||
|
||||
#define HPOSTAMBLE \
|
||||
v3 ^= b; \
|
||||
HSIPROUND; \
|
||||
v0 ^= b; \
|
||||
v2 ^= 0xff; \
|
||||
HSIPROUND; \
|
||||
HSIPROUND; \
|
||||
HSIPROUND; \
|
||||
return v1 ^ v3;
|
||||
|
||||
u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
|
||||
{
|
||||
const u8 *end = data + len - (len % sizeof(u32));
|
||||
const u8 left = len & (sizeof(u32) - 1);
|
||||
u32 m;
|
||||
HPREAMBLE(len)
|
||||
for (; data != end; data += sizeof(u32)) {
|
||||
m = le32_to_cpup(data);
|
||||
v3 ^= m;
|
||||
HSIPROUND;
|
||||
v0 ^= m;
|
||||
}
|
||||
switch (left) {
|
||||
case 3: b |= ((u32)end[2]) << 16;
|
||||
case 2: b |= le16_to_cpup(data); break;
|
||||
case 1: b |= end[0];
|
||||
}
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
u32 __hsiphash_unaligned(const void *data, size_t len,
|
||||
const hsiphash_key_t *key)
|
||||
{
|
||||
const u8 *end = data + len - (len % sizeof(u32));
|
||||
const u8 left = len & (sizeof(u32) - 1);
|
||||
u32 m;
|
||||
HPREAMBLE(len)
|
||||
for (; data != end; data += sizeof(u32)) {
|
||||
m = get_unaligned_le32(data);
|
||||
v3 ^= m;
|
||||
HSIPROUND;
|
||||
v0 ^= m;
|
||||
}
|
||||
switch (left) {
|
||||
case 3: b |= ((u32)end[2]) << 16;
|
||||
case 2: b |= get_unaligned_le16(end); break;
|
||||
case 1: b |= end[0];
|
||||
}
|
||||
HPOSTAMBLE
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
|
||||
* @first: first u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
|
||||
{
|
||||
HPREAMBLE(4)
|
||||
v3 ^= first;
|
||||
HSIPROUND;
|
||||
v0 ^= first;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
|
||||
* @first: first u32
|
||||
* @second: second u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
|
||||
{
|
||||
HPREAMBLE(8)
|
||||
v3 ^= first;
|
||||
HSIPROUND;
|
||||
v0 ^= first;
|
||||
v3 ^= second;
|
||||
HSIPROUND;
|
||||
v0 ^= second;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
|
||||
* @first: first u32
|
||||
* @second: second u32
|
||||
* @third: third u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
|
||||
const hsiphash_key_t *key)
|
||||
{
|
||||
HPREAMBLE(12)
|
||||
v3 ^= first;
|
||||
HSIPROUND;
|
||||
v0 ^= first;
|
||||
v3 ^= second;
|
||||
HSIPROUND;
|
||||
v0 ^= second;
|
||||
v3 ^= third;
|
||||
HSIPROUND;
|
||||
v0 ^= third;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
|
||||
/**
|
||||
* hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
|
||||
* @first: first u32
|
||||
* @second: second u32
|
||||
* @third: third u32
|
||||
* @forth: forth u32
|
||||
* @key: the hsiphash key
|
||||
*/
|
||||
u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
|
||||
const u32 forth, const hsiphash_key_t *key)
|
||||
{
|
||||
HPREAMBLE(16)
|
||||
v3 ^= first;
|
||||
HSIPROUND;
|
||||
v0 ^= first;
|
||||
v3 ^= second;
|
||||
HSIPROUND;
|
||||
v0 ^= second;
|
||||
v3 ^= third;
|
||||
HSIPROUND;
|
||||
v0 ^= third;
|
||||
v3 ^= forth;
|
||||
HSIPROUND;
|
||||
v0 ^= forth;
|
||||
HPOSTAMBLE
|
||||
}
|
||||
#endif
|
94
net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h
Normal file
94
net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h
Normal file
@ -0,0 +1,94 @@
|
||||
#ifndef _WG_NET_UDP_TUNNEL_H
|
||||
#define _WG_NET_UDP_TUNNEL_H
|
||||
|
||||
#include <net/ip_tunnels.h>
|
||||
#include <net/udp.h>
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#include <net/ipv6.h>
|
||||
#include <net/addrconf.h>
|
||||
#endif
|
||||
|
||||
struct udp_port_cfg {
|
||||
u8 family;
|
||||
|
||||
/* Used only for kernel-created sockets */
|
||||
union {
|
||||
struct in_addr local_ip;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct in6_addr local_ip6;
|
||||
#endif
|
||||
};
|
||||
|
||||
union {
|
||||
struct in_addr peer_ip;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct in6_addr peer_ip6;
|
||||
#endif
|
||||
};
|
||||
|
||||
__be16 local_udp_port;
|
||||
__be16 peer_udp_port;
|
||||
unsigned int use_udp_checksums:1,
|
||||
use_udp6_tx_checksums:1,
|
||||
use_udp6_rx_checksums:1,
|
||||
ipv6_v6only:1;
|
||||
};
|
||||
|
||||
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
|
||||
struct socket **sockp);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
|
||||
struct socket **sockp);
|
||||
#else
|
||||
static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
|
||||
struct socket **sockp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int udp_sock_create(struct net *net,
|
||||
struct udp_port_cfg *cfg,
|
||||
struct socket **sockp)
|
||||
{
|
||||
if (cfg->family == AF_INET)
|
||||
return udp_sock_create4(net, cfg, sockp);
|
||||
|
||||
if (cfg->family == AF_INET6)
|
||||
return udp_sock_create6(net, cfg, sockp);
|
||||
|
||||
return -EPFNOSUPPORT;
|
||||
}
|
||||
|
||||
typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
|
||||
|
||||
struct udp_tunnel_sock_cfg {
|
||||
void *sk_user_data;
|
||||
__u8 encap_type;
|
||||
udp_tunnel_encap_rcv_t encap_rcv;
|
||||
};
|
||||
|
||||
/* Setup the given (UDP) sock to receive UDP encapsulated packets */
|
||||
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
|
||||
struct udp_tunnel_sock_cfg *sock_cfg);
|
||||
|
||||
/* Transmit the skb using UDP encapsulation. */
|
||||
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
|
||||
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
|
||||
__be16 df, __be16 src_port, __be16 dst_port,
|
||||
bool xnet, bool nocheck);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
struct net_device *dev, struct in6_addr *saddr,
|
||||
struct in6_addr *daddr,
|
||||
__u8 prio, __u8 ttl, __be32 label,
|
||||
__be16 src_port, __be16 dst_port, bool nocheck);
|
||||
#endif
|
||||
|
||||
void udp_tunnel_sock_release(struct socket *sock);
|
||||
|
||||
#endif /* _WG_NET_UDP_TUNNEL_H */
|
394
net/wireguard/compat/udp_tunnel/udp_tunnel.c
Normal file
394
net/wireguard/compat/udp_tunnel/udp_tunnel.c
Normal file
@ -0,0 +1,394 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/udp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/inet_common.h>
|
||||
#include <net/udp.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
|
||||
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
|
||||
#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
|
||||
#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
|
||||
#endif
|
||||
|
||||
/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
|
||||
static udp_tunnel_encap_rcv_t encap_rcv = NULL;
|
||||
static void __compat_sk_data_ready(struct sock *sk
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
|
||||
,int unused_vulnerable_length_param
|
||||
#endif
|
||||
)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
|
||||
skb_orphan(skb);
|
||||
sk_mem_reclaim(sk);
|
||||
encap_rcv(sk, skb);
|
||||
}
|
||||
}
|
||||
|
||||
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
|
||||
struct socket **sockp)
|
||||
{
|
||||
int err;
|
||||
struct socket *sock = NULL;
|
||||
struct sockaddr_in udp_addr;
|
||||
|
||||
err = __sock_create(net, AF_INET, SOCK_DGRAM, 0, &sock, 1);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
|
||||
udp_addr.sin_family = AF_INET;
|
||||
udp_addr.sin_addr = cfg->local_ip;
|
||||
udp_addr.sin_port = cfg->local_udp_port;
|
||||
err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
|
||||
sizeof(udp_addr));
|
||||
if (err < 0)
|
||||
goto error;
|
||||
|
||||
if (cfg->peer_udp_port) {
|
||||
udp_addr.sin_family = AF_INET;
|
||||
udp_addr.sin_addr = cfg->peer_ip;
|
||||
udp_addr.sin_port = cfg->peer_udp_port;
|
||||
err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
|
||||
sizeof(udp_addr), 0);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
|
||||
sock->sk->sk_no_check = !cfg->use_udp_checksums;
|
||||
#else
|
||||
sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
|
||||
#endif
|
||||
|
||||
*sockp = sock;
|
||||
return 0;
|
||||
|
||||
error:
|
||||
if (sock) {
|
||||
kernel_sock_shutdown(sock, SHUT_RDWR);
|
||||
sock_release(sock);
|
||||
}
|
||||
*sockp = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
|
||||
struct udp_tunnel_sock_cfg *cfg)
|
||||
{
|
||||
inet_sk(sock->sk)->mc_loop = 0;
|
||||
encap_rcv = cfg->encap_rcv;
|
||||
rcu_assign_sk_user_data(sock->sk, cfg->sk_user_data);
|
||||
/* We force the cast in this awful way, due to various Android kernels
|
||||
* backporting things stupidly. */
|
||||
*(void **)&sock->sk->sk_data_ready = (void *)__compat_sk_data_ready;
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
|
||||
static inline __sum16 udp_v4_check(int len, __be32 saddr,
|
||||
__be32 daddr, __wsum base)
|
||||
{
|
||||
return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
|
||||
}
|
||||
|
||||
static void udp_set_csum(bool nocheck, struct sk_buff *skb,
|
||||
__be32 saddr, __be32 daddr, int len)
|
||||
{
|
||||
struct udphdr *uh = udp_hdr(skb);
|
||||
|
||||
if (nocheck)
|
||||
uh->check = 0;
|
||||
else if (skb_is_gso(skb))
|
||||
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
|
||||
else if (skb_dst(skb) && skb_dst(skb)->dev &&
|
||||
(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
|
||||
|
||||
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
|
||||
|
||||
skb->ip_summed = CHECKSUM_PARTIAL;
|
||||
skb->csum_start = skb_transport_header(skb) - skb->head;
|
||||
skb->csum_offset = offsetof(struct udphdr, check);
|
||||
uh->check = ~udp_v4_check(len, saddr, daddr, 0);
|
||||
} else {
|
||||
__wsum csum;
|
||||
|
||||
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
|
||||
|
||||
uh->check = 0;
|
||||
csum = skb_checksum(skb, 0, len, 0);
|
||||
uh->check = udp_v4_check(len, saddr, daddr, csum);
|
||||
if (uh->check == 0)
|
||||
uh->check = CSUM_MANGLED_0;
|
||||
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void __compat_fake_destructor(struct sk_buff *skb)
|
||||
{
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
|
||||
static void __compat_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
|
||||
__be32 src, __be32 dst, __u8 proto,
|
||||
__u8 tos, __u8 ttl, __be16 df, bool xnet)
|
||||
{
|
||||
struct iphdr *iph;
|
||||
struct pcpu_tstats *tstats = this_cpu_ptr(skb->dev->tstats);
|
||||
|
||||
skb_scrub_packet(skb, xnet);
|
||||
|
||||
skb->rxhash = 0;
|
||||
skb_dst_set(skb, &rt->dst);
|
||||
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
|
||||
|
||||
/* Push down and install the IP header. */
|
||||
skb_push(skb, sizeof(struct iphdr));
|
||||
skb_reset_network_header(skb);
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
|
||||
iph->version = 4;
|
||||
iph->ihl = sizeof(struct iphdr) >> 2;
|
||||
iph->frag_off = df;
|
||||
iph->protocol = proto;
|
||||
iph->tos = tos;
|
||||
iph->daddr = dst;
|
||||
iph->saddr = src;
|
||||
iph->ttl = ttl;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 53)
|
||||
__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
|
||||
#else
|
||||
__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
|
||||
#endif
|
||||
|
||||
iptunnel_xmit(skb, skb->dev);
|
||||
u64_stats_update_begin(&tstats->syncp);
|
||||
tstats->tx_bytes -= 8;
|
||||
u64_stats_update_end(&tstats->syncp);
|
||||
}
|
||||
#define iptunnel_xmit __compat_iptunnel_xmit
|
||||
#endif
|
||||
|
||||
void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
|
||||
__be32 src, __be32 dst, __u8 tos, __u8 ttl,
|
||||
__be16 df, __be16 src_port, __be16 dst_port,
|
||||
bool xnet, bool nocheck)
|
||||
{
|
||||
struct udphdr *uh;
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
|
||||
struct net_device *dev = skb->dev;
|
||||
int ret;
|
||||
#endif
|
||||
|
||||
__skb_push(skb, sizeof(*uh));
|
||||
skb_reset_transport_header(skb);
|
||||
uh = udp_hdr(skb);
|
||||
|
||||
uh->dest = dst_port;
|
||||
uh->source = src_port;
|
||||
uh->len = htons(skb->len);
|
||||
|
||||
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
|
||||
|
||||
udp_set_csum(nocheck, skb, src, dst, skb->len);
|
||||
|
||||
if (!skb->sk)
|
||||
skb->sk = sk;
|
||||
if (!skb->destructor)
|
||||
skb->destructor = __compat_fake_destructor;
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
|
||||
ret =
|
||||
#endif
|
||||
iptunnel_xmit(
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
|
||||
sk,
|
||||
#endif
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
|
||||
dev_net(dev),
|
||||
#endif
|
||||
rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0)
|
||||
, xnet
|
||||
#endif
|
||||
);
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)
|
||||
if (ret)
|
||||
iptunnel_xmit_stats(ret - 8, &dev->stats, dev->tstats);
|
||||
#endif
|
||||
}
|
||||
|
||||
void udp_tunnel_sock_release(struct socket *sock)
|
||||
{
|
||||
rcu_assign_sk_user_data(sock->sk, NULL);
|
||||
kernel_sock_shutdown(sock, SHUT_RDWR);
|
||||
sock_release(sock);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#include <linux/module.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/udp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/in6.h>
|
||||
#include <net/udp.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#include <net/net_namespace.h>
|
||||
#include <net/netns/generic.h>
|
||||
#include <net/ip6_tunnel.h>
|
||||
#include <net/ip6_checksum.h>
|
||||
|
||||
int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
|
||||
struct socket **sockp)
|
||||
{
|
||||
struct sockaddr_in6 udp6_addr;
|
||||
int err;
|
||||
struct socket *sock = NULL;
|
||||
|
||||
err = __sock_create(net, AF_INET6, SOCK_DGRAM, 0, &sock, 1);
|
||||
if (err < 0)
|
||||
goto error;
|
||||
|
||||
if (cfg->ipv6_v6only) {
|
||||
int val = 1;
|
||||
|
||||
err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
|
||||
(char *) &val, sizeof(val));
|
||||
if (err < 0)
|
||||
goto error;
|
||||
}
|
||||
|
||||
udp6_addr.sin6_family = AF_INET6;
|
||||
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
|
||||
sizeof(udp6_addr.sin6_addr));
|
||||
udp6_addr.sin6_port = cfg->local_udp_port;
|
||||
err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
|
||||
sizeof(udp6_addr));
|
||||
if (err < 0)
|
||||
goto error;
|
||||
|
||||
if (cfg->peer_udp_port) {
|
||||
udp6_addr.sin6_family = AF_INET6;
|
||||
memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
|
||||
sizeof(udp6_addr.sin6_addr));
|
||||
udp6_addr.sin6_port = cfg->peer_udp_port;
|
||||
err = kernel_connect(sock,
|
||||
(struct sockaddr *)&udp6_addr,
|
||||
sizeof(udp6_addr), 0);
|
||||
}
|
||||
if (err < 0)
|
||||
goto error;
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
|
||||
sock->sk->sk_no_check = !cfg->use_udp_checksums;
|
||||
#else
|
||||
udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
|
||||
udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
|
||||
#endif
|
||||
|
||||
*sockp = sock;
|
||||
return 0;
|
||||
|
||||
error:
|
||||
if (sock) {
|
||||
kernel_sock_shutdown(sock, SHUT_RDWR);
|
||||
sock_release(sock);
|
||||
}
|
||||
*sockp = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
|
||||
static inline __sum16 udp_v6_check(int len,
|
||||
const struct in6_addr *saddr,
|
||||
const struct in6_addr *daddr,
|
||||
__wsum base)
|
||||
{
|
||||
return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
|
||||
}
|
||||
static void udp6_set_csum(bool nocheck, struct sk_buff *skb,
|
||||
const struct in6_addr *saddr,
|
||||
const struct in6_addr *daddr, int len)
|
||||
{
|
||||
struct udphdr *uh = udp_hdr(skb);
|
||||
|
||||
if (nocheck)
|
||||
uh->check = 0;
|
||||
else if (skb_is_gso(skb))
|
||||
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
|
||||
else if (skb_dst(skb) && skb_dst(skb)->dev &&
|
||||
(skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
|
||||
|
||||
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
|
||||
|
||||
skb->ip_summed = CHECKSUM_PARTIAL;
|
||||
skb->csum_start = skb_transport_header(skb) - skb->head;
|
||||
skb->csum_offset = offsetof(struct udphdr, check);
|
||||
uh->check = ~udp_v6_check(len, saddr, daddr, 0);
|
||||
} else {
|
||||
__wsum csum;
|
||||
|
||||
BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
|
||||
|
||||
uh->check = 0;
|
||||
csum = skb_checksum(skb, 0, len, 0);
|
||||
uh->check = udp_v6_check(len, saddr, daddr, csum);
|
||||
if (uh->check == 0)
|
||||
uh->check = CSUM_MANGLED_0;
|
||||
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
struct net_device *dev, struct in6_addr *saddr,
|
||||
struct in6_addr *daddr,
|
||||
__u8 prio, __u8 ttl, __be32 label,
|
||||
__be16 src_port, __be16 dst_port, bool nocheck)
|
||||
{
|
||||
struct udphdr *uh;
|
||||
struct ipv6hdr *ip6h;
|
||||
|
||||
__skb_push(skb, sizeof(*uh));
|
||||
skb_reset_transport_header(skb);
|
||||
uh = udp_hdr(skb);
|
||||
|
||||
uh->dest = dst_port;
|
||||
uh->source = src_port;
|
||||
|
||||
uh->len = htons(skb->len);
|
||||
|
||||
skb_dst_set(skb, dst);
|
||||
|
||||
udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
|
||||
|
||||
__skb_push(skb, sizeof(*ip6h));
|
||||
skb_reset_network_header(skb);
|
||||
ip6h = ipv6_hdr(skb);
|
||||
ip6_flow_hdr(ip6h, prio, label);
|
||||
ip6h->payload_len = htons(skb->len);
|
||||
ip6h->nexthdr = IPPROTO_UDP;
|
||||
ip6h->hop_limit = ttl;
|
||||
ip6h->daddr = *daddr;
|
||||
ip6h->saddr = *saddr;
|
||||
|
||||
if (!skb->sk)
|
||||
skb->sk = sk;
|
||||
if (!skb->destructor)
|
||||
skb->destructor = __compat_fake_destructor;
|
||||
|
||||
ip6tunnel_xmit(skb, dev);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
226
net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h
Normal file
226
net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h
Normal file
@ -0,0 +1,226 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
|
||||
#define udp_sock_create4 udp_sock_create
|
||||
#define udp_sock_create6 udp_sock_create
|
||||
#include <linux/socket.h>
|
||||
#include <linux/if.h>
|
||||
#include <linux/in.h>
|
||||
#include <net/ip_tunnels.h>
|
||||
#include <net/udp.h>
|
||||
#include <net/inet_common.h>
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#include <linux/in6.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <net/addrconf.h>
|
||||
#include <net/ip6_checksum.h>
|
||||
#include <net/ip6_tunnel.h>
|
||||
#endif
|
||||
static inline void __compat_fake_destructor(struct sk_buff *skb)
|
||||
{
|
||||
}
|
||||
typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
|
||||
struct udp_tunnel_sock_cfg {
|
||||
void *sk_user_data;
|
||||
__u8 encap_type;
|
||||
udp_tunnel_encap_rcv_t encap_rcv;
|
||||
};
|
||||
/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
|
||||
static udp_tunnel_encap_rcv_t encap_rcv = NULL;
|
||||
static void __compat_sk_data_ready(struct sock *sk)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
|
||||
skb_orphan(skb);
|
||||
sk_mem_reclaim(sk);
|
||||
encap_rcv(sk, skb);
|
||||
}
|
||||
}
|
||||
static inline void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
|
||||
struct udp_tunnel_sock_cfg *cfg)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
inet_sk(sk)->mc_loop = 0;
|
||||
encap_rcv = cfg->encap_rcv;
|
||||
rcu_assign_sk_user_data(sk, cfg->sk_user_data);
|
||||
sk->sk_data_ready = __compat_sk_data_ready;
|
||||
}
|
||||
static inline void udp_tunnel_sock_release(struct socket *sock)
|
||||
{
|
||||
rcu_assign_sk_user_data(sock->sk, NULL);
|
||||
kernel_sock_shutdown(sock, SHUT_RDWR);
|
||||
sk_release_kernel(sock->sk);
|
||||
}
|
||||
static inline int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
|
||||
struct sk_buff *skb, __be32 src, __be32 dst,
|
||||
__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
|
||||
__be16 dst_port, bool xnet)
|
||||
{
|
||||
struct udphdr *uh;
|
||||
__skb_push(skb, sizeof(*uh));
|
||||
skb_reset_transport_header(skb);
|
||||
uh = udp_hdr(skb);
|
||||
uh->dest = dst_port;
|
||||
uh->source = src_port;
|
||||
uh->len = htons(skb->len);
|
||||
udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
|
||||
return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
|
||||
tos, ttl, df, xnet);
|
||||
}
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static inline int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
|
||||
struct sk_buff *skb, struct net_device *dev,
|
||||
struct in6_addr *saddr, struct in6_addr *daddr,
|
||||
__u8 prio, __u8 ttl, __be16 src_port,
|
||||
__be16 dst_port)
|
||||
{
|
||||
struct udphdr *uh;
|
||||
struct ipv6hdr *ip6h;
|
||||
struct sock *sk = sock->sk;
|
||||
__skb_push(skb, sizeof(*uh));
|
||||
skb_reset_transport_header(skb);
|
||||
uh = udp_hdr(skb);
|
||||
uh->dest = dst_port;
|
||||
uh->source = src_port;
|
||||
uh->len = htons(skb->len);
|
||||
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
|
||||
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
|
||||
| IPSKB_REROUTED);
|
||||
skb_dst_set(skb, dst);
|
||||
udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
|
||||
&sk->sk_v6_daddr, skb->len);
|
||||
__skb_push(skb, sizeof(*ip6h));
|
||||
skb_reset_network_header(skb);
|
||||
ip6h = ipv6_hdr(skb);
|
||||
ip6_flow_hdr(ip6h, prio, htonl(0));
|
||||
ip6h->payload_len = htons(skb->len);
|
||||
ip6h->nexthdr = IPPROTO_UDP;
|
||||
ip6h->hop_limit = ttl;
|
||||
ip6h->daddr = *daddr;
|
||||
ip6h->saddr = *saddr;
|
||||
ip6tunnel_xmit(skb, dev);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/udp.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/if.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; ret__ = udp_tunnel_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, i, j, k); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, j, k);
|
||||
#endif
|
||||
#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
|
||||
#include <linux/if.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
|
||||
static inline void __compat_fake_destructor(struct sk_buff *skb)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = __compat_fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0)
|
||||
#endif
|
||||
#else
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
|
||||
#include <linux/if.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); if (ret__) iptunnel_xmit_stats(ret__ - 8, &dev__->stats, dev__->tstats); } while (0)
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
|
||||
#include <linux/if.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l); iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0)
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
|
||||
#include <linux/if.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, j, k, l)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/if.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
struct __compat_udp_port_cfg {
|
||||
u8 family;
|
||||
union {
|
||||
struct in_addr local_ip;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct in6_addr local_ip6;
|
||||
#endif
|
||||
};
|
||||
union {
|
||||
struct in_addr peer_ip;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct in6_addr peer_ip6;
|
||||
#endif
|
||||
};
|
||||
__be16 local_udp_port;
|
||||
__be16 peer_udp_port;
|
||||
unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1;
|
||||
};
|
||||
static inline int __maybe_unused __compat_udp_sock_create(struct net *net, struct __compat_udp_port_cfg *cfg, struct socket **sockp)
|
||||
{
|
||||
struct udp_port_cfg old_cfg = {
|
||||
.family = cfg->family,
|
||||
.local_ip = cfg->local_ip,
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
.local_ip6 = cfg->local_ip6,
|
||||
#endif
|
||||
.peer_ip = cfg->peer_ip,
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
.peer_ip6 = cfg->peer_ip6,
|
||||
#endif
|
||||
.local_udp_port = cfg->local_udp_port,
|
||||
.peer_udp_port = cfg->peer_udp_port,
|
||||
.use_udp_checksums = cfg->use_udp_checksums,
|
||||
.use_udp6_tx_checksums = cfg->use_udp6_tx_checksums,
|
||||
.use_udp6_rx_checksums = cfg->use_udp6_rx_checksums
|
||||
};
|
||||
if (cfg->family == AF_INET)
|
||||
return udp_sock_create4(net, &old_cfg, sockp);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
if (cfg->family == AF_INET6) {
|
||||
int ret;
|
||||
int old_bindv6only;
|
||||
struct net *nobns;
|
||||
|
||||
if (cfg->ipv6_v6only) {
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
|
||||
nobns = &init_net;
|
||||
#else
|
||||
nobns = net;
|
||||
#endif
|
||||
/* Since udp_port_cfg only learned of ipv6_v6only in 4.3, we do this horrible
|
||||
* hack here and set the sysctl variable temporarily to something that will
|
||||
* set the right option for us in sock_create. It's super racey! */
|
||||
old_bindv6only = nobns->ipv6.sysctl.bindv6only;
|
||||
nobns->ipv6.sysctl.bindv6only = 1;
|
||||
}
|
||||
ret = udp_sock_create6(net, &old_cfg, sockp);
|
||||
if (cfg->ipv6_v6only)
|
||||
nobns->ipv6.sysctl.bindv6only = old_bindv6only;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
return -EPFNOSUPPORT;
|
||||
}
|
||||
#define udp_port_cfg __compat_udp_port_cfg
|
||||
#define udp_sock_create(a, b, c) __compat_udp_sock_create(a, b, c)
|
||||
#endif
|
236
net/wireguard/cookie.c
Normal file
236
net/wireguard/cookie.c
Normal file
@ -0,0 +1,236 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "cookie.h"
|
||||
#include "peer.h"
|
||||
#include "device.h"
|
||||
#include "messages.h"
|
||||
#include "ratelimiter.h"
|
||||
#include "timers.h"
|
||||
|
||||
#include <zinc/blake2s.h>
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
|
||||
#include <net/ipv6.h>
|
||||
#include <crypto/algapi.h>
|
||||
|
||||
void wg_cookie_checker_init(struct cookie_checker *checker,
|
||||
struct wg_device *wg)
|
||||
{
|
||||
init_rwsem(&checker->secret_lock);
|
||||
checker->secret_birthdate = ktime_get_coarse_boottime_ns();
|
||||
get_random_bytes(checker->secret, NOISE_HASH_LEN);
|
||||
checker->device = wg;
|
||||
}
|
||||
|
||||
enum { COOKIE_KEY_LABEL_LEN = 8 };
|
||||
static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
|
||||
static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
|
||||
|
||||
static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
const u8 pubkey[NOISE_PUBLIC_KEY_LEN],
|
||||
const u8 label[COOKIE_KEY_LABEL_LEN])
|
||||
{
|
||||
struct blake2s_state blake;
|
||||
|
||||
blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
|
||||
blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
|
||||
blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
|
||||
blake2s_final(&blake, key);
|
||||
}
|
||||
|
||||
/* Must hold peer->handshake.static_identity->lock */
|
||||
void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker)
|
||||
{
|
||||
if (likely(checker->device->static_identity.has_identity)) {
|
||||
precompute_key(checker->cookie_encryption_key,
|
||||
checker->device->static_identity.static_public,
|
||||
cookie_key_label);
|
||||
precompute_key(checker->message_mac1_key,
|
||||
checker->device->static_identity.static_public,
|
||||
mac1_key_label);
|
||||
} else {
|
||||
memset(checker->cookie_encryption_key, 0,
|
||||
NOISE_SYMMETRIC_KEY_LEN);
|
||||
memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
|
||||
}
|
||||
}
|
||||
|
||||
void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer)
|
||||
{
|
||||
precompute_key(peer->latest_cookie.cookie_decryption_key,
|
||||
peer->handshake.remote_static, cookie_key_label);
|
||||
precompute_key(peer->latest_cookie.message_mac1_key,
|
||||
peer->handshake.remote_static, mac1_key_label);
|
||||
}
|
||||
|
||||
void wg_cookie_init(struct cookie *cookie)
|
||||
{
|
||||
memset(cookie, 0, sizeof(*cookie));
|
||||
init_rwsem(&cookie->lock);
|
||||
}
|
||||
|
||||
static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len,
|
||||
const u8 key[NOISE_SYMMETRIC_KEY_LEN])
|
||||
{
|
||||
len = len - sizeof(struct message_macs) +
|
||||
offsetof(struct message_macs, mac1);
|
||||
blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
|
||||
}
|
||||
|
||||
static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
|
||||
const u8 cookie[COOKIE_LEN])
|
||||
{
|
||||
len = len - sizeof(struct message_macs) +
|
||||
offsetof(struct message_macs, mac2);
|
||||
blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
|
||||
}
|
||||
|
||||
static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
|
||||
struct cookie_checker *checker)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
|
||||
if (wg_birthdate_has_expired(checker->secret_birthdate,
|
||||
COOKIE_SECRET_MAX_AGE)) {
|
||||
down_write(&checker->secret_lock);
|
||||
checker->secret_birthdate = ktime_get_coarse_boottime_ns();
|
||||
get_random_bytes(checker->secret, NOISE_HASH_LEN);
|
||||
up_write(&checker->secret_lock);
|
||||
}
|
||||
|
||||
down_read(&checker->secret_lock);
|
||||
|
||||
blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr,
|
||||
sizeof(struct in_addr));
|
||||
else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr,
|
||||
sizeof(struct in6_addr));
|
||||
blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
|
||||
blake2s_final(&state, cookie);
|
||||
|
||||
up_read(&checker->secret_lock);
|
||||
}
|
||||
|
||||
enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
|
||||
struct sk_buff *skb,
|
||||
bool check_cookie)
|
||||
{
|
||||
struct message_macs *macs = (struct message_macs *)
|
||||
(skb->data + skb->len - sizeof(*macs));
|
||||
enum cookie_mac_state ret;
|
||||
u8 computed_mac[COOKIE_LEN];
|
||||
u8 cookie[COOKIE_LEN];
|
||||
|
||||
ret = INVALID_MAC;
|
||||
compute_mac1(computed_mac, skb->data, skb->len,
|
||||
checker->message_mac1_key);
|
||||
if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
|
||||
goto out;
|
||||
|
||||
ret = VALID_MAC_BUT_NO_COOKIE;
|
||||
|
||||
if (!check_cookie)
|
||||
goto out;
|
||||
|
||||
make_cookie(cookie, skb, checker);
|
||||
|
||||
compute_mac2(computed_mac, skb->data, skb->len, cookie);
|
||||
if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
|
||||
goto out;
|
||||
|
||||
ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
|
||||
if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev)))
|
||||
goto out;
|
||||
|
||||
ret = VALID_MAC_WITH_COOKIE;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void wg_cookie_add_mac_to_packet(void *message, size_t len,
|
||||
struct wg_peer *peer)
|
||||
{
|
||||
struct message_macs *macs = (struct message_macs *)
|
||||
((u8 *)message + len - sizeof(*macs));
|
||||
|
||||
down_write(&peer->latest_cookie.lock);
|
||||
compute_mac1(macs->mac1, message, len,
|
||||
peer->latest_cookie.message_mac1_key);
|
||||
memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
|
||||
peer->latest_cookie.have_sent_mac1 = true;
|
||||
up_write(&peer->latest_cookie.lock);
|
||||
|
||||
down_read(&peer->latest_cookie.lock);
|
||||
if (peer->latest_cookie.is_valid &&
|
||||
!wg_birthdate_has_expired(peer->latest_cookie.birthdate,
|
||||
COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
|
||||
compute_mac2(macs->mac2, message, len,
|
||||
peer->latest_cookie.cookie);
|
||||
else
|
||||
memset(macs->mac2, 0, COOKIE_LEN);
|
||||
up_read(&peer->latest_cookie.lock);
|
||||
}
|
||||
|
||||
void wg_cookie_message_create(struct message_handshake_cookie *dst,
|
||||
struct sk_buff *skb, __le32 index,
|
||||
struct cookie_checker *checker)
|
||||
{
|
||||
struct message_macs *macs = (struct message_macs *)
|
||||
((u8 *)skb->data + skb->len - sizeof(*macs));
|
||||
u8 cookie[COOKIE_LEN];
|
||||
|
||||
dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
|
||||
dst->receiver_index = index;
|
||||
get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
|
||||
|
||||
make_cookie(cookie, skb, checker);
|
||||
xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN,
|
||||
macs->mac1, COOKIE_LEN, dst->nonce,
|
||||
checker->cookie_encryption_key);
|
||||
}
|
||||
|
||||
void wg_cookie_message_consume(struct message_handshake_cookie *src,
|
||||
struct wg_device *wg)
|
||||
{
|
||||
struct wg_peer *peer = NULL;
|
||||
u8 cookie[COOKIE_LEN];
|
||||
bool ret;
|
||||
|
||||
if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable,
|
||||
INDEX_HASHTABLE_HANDSHAKE |
|
||||
INDEX_HASHTABLE_KEYPAIR,
|
||||
src->receiver_index, &peer)))
|
||||
return;
|
||||
|
||||
down_read(&peer->latest_cookie.lock);
|
||||
if (unlikely(!peer->latest_cookie.have_sent_mac1)) {
|
||||
up_read(&peer->latest_cookie.lock);
|
||||
goto out;
|
||||
}
|
||||
ret = xchacha20poly1305_decrypt(
|
||||
cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie),
|
||||
peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce,
|
||||
peer->latest_cookie.cookie_decryption_key);
|
||||
up_read(&peer->latest_cookie.lock);
|
||||
|
||||
if (ret) {
|
||||
down_write(&peer->latest_cookie.lock);
|
||||
memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN);
|
||||
peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns();
|
||||
peer->latest_cookie.is_valid = true;
|
||||
peer->latest_cookie.have_sent_mac1 = false;
|
||||
up_write(&peer->latest_cookie.lock);
|
||||
} else {
|
||||
net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n",
|
||||
wg->dev->name);
|
||||
}
|
||||
|
||||
out:
|
||||
wg_peer_put(peer);
|
||||
}
|
59
net/wireguard/cookie.h
Normal file
59
net/wireguard/cookie.h
Normal file
@ -0,0 +1,59 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_COOKIE_H
|
||||
#define _WG_COOKIE_H
|
||||
|
||||
#include "messages.h"
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
struct wg_peer;
|
||||
|
||||
struct cookie_checker {
|
||||
u8 secret[NOISE_HASH_LEN];
|
||||
u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
u64 secret_birthdate;
|
||||
struct rw_semaphore secret_lock;
|
||||
struct wg_device *device;
|
||||
};
|
||||
|
||||
struct cookie {
|
||||
u64 birthdate;
|
||||
bool is_valid;
|
||||
u8 cookie[COOKIE_LEN];
|
||||
bool have_sent_mac1;
|
||||
u8 last_mac1_sent[COOKIE_LEN];
|
||||
u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
struct rw_semaphore lock;
|
||||
};
|
||||
|
||||
enum cookie_mac_state {
|
||||
INVALID_MAC,
|
||||
VALID_MAC_BUT_NO_COOKIE,
|
||||
VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
|
||||
VALID_MAC_WITH_COOKIE
|
||||
};
|
||||
|
||||
void wg_cookie_checker_init(struct cookie_checker *checker,
|
||||
struct wg_device *wg);
|
||||
void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker);
|
||||
void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer);
|
||||
void wg_cookie_init(struct cookie *cookie);
|
||||
|
||||
enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
|
||||
struct sk_buff *skb,
|
||||
bool check_cookie);
|
||||
void wg_cookie_add_mac_to_packet(void *message, size_t len,
|
||||
struct wg_peer *peer);
|
||||
|
||||
void wg_cookie_message_create(struct message_handshake_cookie *src,
|
||||
struct sk_buff *skb, __le32 index,
|
||||
struct cookie_checker *checker);
|
||||
void wg_cookie_message_consume(struct message_handshake_cookie *src,
|
||||
struct wg_device *wg);
|
||||
|
||||
#endif /* _WG_COOKIE_H */
|
57
net/wireguard/crypto/Kbuild.include
Normal file
57
net/wireguard/crypto/Kbuild.include
Normal file
@ -0,0 +1,57 @@
|
||||
ifeq ($(CONFIG_X86_64)$(if $(CONFIG_UML),y,n),yn)
|
||||
CONFIG_ZINC_ARCH_X86_64 := y
|
||||
endif
|
||||
ifeq ($(CONFIG_ARM)$(if $(CONFIG_CPU_32v3),y,n),yn)
|
||||
CONFIG_ZINC_ARCH_ARM := y
|
||||
endif
|
||||
ifeq ($(CONFIG_ARM64),y)
|
||||
CONFIG_ZINC_ARCH_ARM64 := y
|
||||
endif
|
||||
ifeq ($(CONFIG_MIPS)$(CONFIG_CPU_MIPS32_R2),yy)
|
||||
CONFIG_ZINC_ARCH_MIPS := y
|
||||
endif
|
||||
ifeq ($(CONFIG_MIPS)$(CONFIG_64BIT),yy)
|
||||
CONFIG_ZINC_ARCH_MIPS64 := y
|
||||
endif
|
||||
|
||||
zinc-y += chacha20/chacha20.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_X86_64) += chacha20/chacha20-x86_64.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_ARM) += chacha20/chacha20-arm.o chacha20/chacha20-unrolled-arm.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_ARM64) += chacha20/chacha20-arm64.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_MIPS) += chacha20/chacha20-mips.o
|
||||
AFLAGS_chacha20-mips.o += -O2 # This is required to fill the branch delay slots
|
||||
|
||||
zinc-y += poly1305/poly1305.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_X86_64) += poly1305/poly1305-x86_64.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_ARM) += poly1305/poly1305-arm.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_ARM64) += poly1305/poly1305-arm64.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_MIPS) += poly1305/poly1305-mips.o
|
||||
AFLAGS_poly1305-mips.o += -O2 # This is required to fill the branch delay slots
|
||||
zinc-$(CONFIG_ZINC_ARCH_MIPS64) += poly1305/poly1305-mips64.o
|
||||
|
||||
zinc-y += chacha20poly1305.o
|
||||
|
||||
zinc-y += blake2s/blake2s.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_X86_64) += blake2s/blake2s-x86_64.o
|
||||
|
||||
zinc-y += curve25519/curve25519.o
|
||||
zinc-$(CONFIG_ZINC_ARCH_ARM) += curve25519/curve25519-arm.o
|
||||
|
||||
quiet_cmd_perlasm = PERLASM $@
|
||||
cmd_perlasm = $(PERL) $< > $@
|
||||
$(obj)/%.S: $(src)/%.pl FORCE
|
||||
$(call if_changed,perlasm)
|
||||
kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
|
||||
targets := $(patsubst $(kbuild-dir)/%.pl,%.S,$(wildcard $(patsubst %.o,$(kbuild-dir)/crypto/zinc/%.pl,$(zinc-y) $(zinc-m) $(zinc-))))
|
||||
|
||||
# Old kernels don't set this, which causes trouble.
|
||||
.SECONDARY:
|
||||
|
||||
wireguard-y += $(addprefix crypto/zinc/,$(zinc-y))
|
||||
ccflags-y += -I$(kbuild-dir)/crypto/include
|
||||
ccflags-$(CONFIG_ZINC_ARCH_X86_64) += -DCONFIG_ZINC_ARCH_X86_64
|
||||
ccflags-$(CONFIG_ZINC_ARCH_ARM) += -DCONFIG_ZINC_ARCH_ARM
|
||||
ccflags-$(CONFIG_ZINC_ARCH_ARM64) += -DCONFIG_ZINC_ARCH_ARM64
|
||||
ccflags-$(CONFIG_ZINC_ARCH_MIPS) += -DCONFIG_ZINC_ARCH_MIPS
|
||||
ccflags-$(CONFIG_ZINC_ARCH_MIPS64) += -DCONFIG_ZINC_ARCH_MIPS64
|
||||
ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DCONFIG_ZINC_SELFTEST
|
56
net/wireguard/crypto/include/zinc/blake2s.h
Normal file
56
net/wireguard/crypto/include/zinc/blake2s.h
Normal file
@ -0,0 +1,56 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_BLAKE2S_H
|
||||
#define _ZINC_BLAKE2S_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/bug.h>
|
||||
|
||||
enum blake2s_lengths {
|
||||
BLAKE2S_BLOCK_SIZE = 64,
|
||||
BLAKE2S_HASH_SIZE = 32,
|
||||
BLAKE2S_KEY_SIZE = 32
|
||||
};
|
||||
|
||||
struct blake2s_state {
|
||||
u32 h[8];
|
||||
u32 t[2];
|
||||
u32 f[2];
|
||||
u8 buf[BLAKE2S_BLOCK_SIZE];
|
||||
unsigned int buflen;
|
||||
unsigned int outlen;
|
||||
};
|
||||
|
||||
void blake2s_init(struct blake2s_state *state, const size_t outlen);
|
||||
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
|
||||
const void *key, const size_t keylen);
|
||||
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
|
||||
void blake2s_final(struct blake2s_state *state, u8 *out);
|
||||
|
||||
static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
|
||||
const size_t outlen, const size_t inlen,
|
||||
const size_t keylen)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
|
||||
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
|
||||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
|
||||
(!key && keylen)));
|
||||
|
||||
if (keylen)
|
||||
blake2s_init_key(&state, outlen, key, keylen);
|
||||
else
|
||||
blake2s_init(&state, outlen);
|
||||
|
||||
blake2s_update(&state, in, inlen);
|
||||
blake2s_final(&state, out);
|
||||
}
|
||||
|
||||
void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
|
||||
const size_t inlen, const size_t keylen);
|
||||
|
||||
#endif /* _ZINC_BLAKE2S_H */
|
70
net/wireguard/crypto/include/zinc/chacha20.h
Normal file
70
net/wireguard/crypto/include/zinc/chacha20.h
Normal file
@ -0,0 +1,70 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_CHACHA20_H
|
||||
#define _ZINC_CHACHA20_H
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/simd.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
enum chacha20_lengths {
|
||||
CHACHA20_NONCE_SIZE = 16,
|
||||
CHACHA20_KEY_SIZE = 32,
|
||||
CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(u32),
|
||||
CHACHA20_BLOCK_SIZE = 64,
|
||||
CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(u32),
|
||||
HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
|
||||
HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
|
||||
};
|
||||
|
||||
enum chacha20_constants { /* expand 32-byte k */
|
||||
CHACHA20_CONSTANT_EXPA = 0x61707865U,
|
||||
CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
|
||||
CHACHA20_CONSTANT_2_BY = 0x79622d32U,
|
||||
CHACHA20_CONSTANT_TE_K = 0x6b206574U
|
||||
};
|
||||
|
||||
struct chacha20_ctx {
|
||||
union {
|
||||
u32 state[16];
|
||||
struct {
|
||||
u32 constant[4];
|
||||
u32 key[8];
|
||||
u32 counter[4];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
static inline void chacha20_init(struct chacha20_ctx *ctx,
|
||||
const u8 key[CHACHA20_KEY_SIZE],
|
||||
const u64 nonce)
|
||||
{
|
||||
ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
|
||||
ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
|
||||
ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
|
||||
ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
|
||||
ctx->key[0] = get_unaligned_le32(key + 0);
|
||||
ctx->key[1] = get_unaligned_le32(key + 4);
|
||||
ctx->key[2] = get_unaligned_le32(key + 8);
|
||||
ctx->key[3] = get_unaligned_le32(key + 12);
|
||||
ctx->key[4] = get_unaligned_le32(key + 16);
|
||||
ctx->key[5] = get_unaligned_le32(key + 20);
|
||||
ctx->key[6] = get_unaligned_le32(key + 24);
|
||||
ctx->key[7] = get_unaligned_le32(key + 28);
|
||||
ctx->counter[0] = 0;
|
||||
ctx->counter[1] = 0;
|
||||
ctx->counter[2] = nonce & U32_MAX;
|
||||
ctx->counter[3] = nonce >> 32;
|
||||
}
|
||||
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
|
||||
simd_context_t *simd_context);
|
||||
|
||||
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context);
|
||||
|
||||
#endif /* _ZINC_CHACHA20_H */
|
50
net/wireguard/crypto/include/zinc/chacha20poly1305.h
Normal file
50
net/wireguard/crypto/include/zinc/chacha20poly1305.h
Normal file
@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_CHACHA20POLY1305_H
|
||||
#define _ZINC_CHACHA20POLY1305_H
|
||||
|
||||
#include <linux/simd.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct scatterlist;
|
||||
|
||||
enum chacha20poly1305_lengths {
|
||||
XCHACHA20POLY1305_NONCE_SIZE = 24,
|
||||
CHACHA20POLY1305_KEY_SIZE = 32,
|
||||
CHACHA20POLY1305_AUTHTAG_SIZE = 16
|
||||
};
|
||||
|
||||
void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
bool __must_check chacha20poly1305_encrypt_sg_inplace(
|
||||
struct scatterlist *src, const size_t src_len, const u8 *ad,
|
||||
const size_t ad_len, const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
|
||||
|
||||
bool __must_check
|
||||
chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len, const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
bool __must_check chacha20poly1305_decrypt_sg_inplace(
|
||||
struct scatterlist *src, size_t src_len, const u8 *ad,
|
||||
const size_t ad_len, const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE], simd_context_t *simd_context);
|
||||
|
||||
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
bool __must_check xchacha20poly1305_decrypt(
|
||||
u8 *dst, const u8 *src, const size_t src_len, const u8 *ad,
|
||||
const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE]);
|
||||
|
||||
#endif /* _ZINC_CHACHA20POLY1305_H */
|
28
net/wireguard/crypto/include/zinc/curve25519.h
Normal file
28
net/wireguard/crypto/include/zinc/curve25519.h
Normal file
@ -0,0 +1,28 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_CURVE25519_H
|
||||
#define _ZINC_CURVE25519_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
enum curve25519_lengths {
|
||||
CURVE25519_KEY_SIZE = 32
|
||||
};
|
||||
|
||||
bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE]);
|
||||
void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE]);
|
||||
bool __must_check curve25519_generate_public(
|
||||
u8 pub[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE]);
|
||||
|
||||
static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
secret[0] &= 248;
|
||||
secret[31] = (secret[31] & 127) | 64;
|
||||
}
|
||||
|
||||
#endif /* _ZINC_CURVE25519_H */
|
31
net/wireguard/crypto/include/zinc/poly1305.h
Normal file
31
net/wireguard/crypto/include/zinc/poly1305.h
Normal file
@ -0,0 +1,31 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_POLY1305_H
|
||||
#define _ZINC_POLY1305_H
|
||||
|
||||
#include <linux/simd.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
enum poly1305_lengths {
|
||||
POLY1305_BLOCK_SIZE = 16,
|
||||
POLY1305_KEY_SIZE = 32,
|
||||
POLY1305_MAC_SIZE = 16
|
||||
};
|
||||
|
||||
struct poly1305_ctx {
|
||||
u8 opaque[24 * sizeof(u64)];
|
||||
u32 nonce[4];
|
||||
u8 data[POLY1305_BLOCK_SIZE];
|
||||
size_t num;
|
||||
} __aligned(8);
|
||||
|
||||
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]);
|
||||
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
|
||||
simd_context_t *simd_context);
|
||||
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
simd_context_t *simd_context);
|
||||
|
||||
#endif /* _ZINC_POLY1305_H */
|
15
net/wireguard/crypto/zinc.h
Normal file
15
net/wireguard/crypto/zinc.h
Normal file
@ -0,0 +1,15 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_ZINC_H
|
||||
#define _WG_ZINC_H
|
||||
|
||||
int chacha20_mod_init(void);
|
||||
int poly1305_mod_init(void);
|
||||
int chacha20poly1305_mod_init(void);
|
||||
int blake2s_mod_init(void);
|
||||
int curve25519_mod_init(void);
|
||||
|
||||
#endif
|
72
net/wireguard/crypto/zinc/blake2s/blake2s-x86_64-glue.c
Normal file
72
net/wireguard/crypto/zinc/blake2s/blake2s-x86_64-glue.c
Normal file
@ -0,0 +1,72 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <linux/simd.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
|
||||
const u8 *block, const size_t nblocks,
|
||||
const u32 inc);
|
||||
asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
|
||||
const u8 *block, const size_t nblocks,
|
||||
const u32 inc);
|
||||
|
||||
static bool blake2s_use_ssse3 __ro_after_init;
|
||||
static bool blake2s_use_avx512 __ro_after_init;
|
||||
static bool *const blake2s_nobs[] __initconst = { &blake2s_use_ssse3,
|
||||
&blake2s_use_avx512 };
|
||||
|
||||
static void __init blake2s_fpu_init(void)
|
||||
{
|
||||
blake2s_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
|
||||
#ifndef COMPAT_CANNOT_USE_AVX512
|
||||
blake2s_use_avx512 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
XFEATURE_MASK_AVX512, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool blake2s_compress_arch(struct blake2s_state *state,
|
||||
const u8 *block, size_t nblocks,
|
||||
const u32 inc)
|
||||
{
|
||||
simd_context_t simd_context;
|
||||
bool used_arch = false;
|
||||
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
|
||||
|
||||
simd_get(&simd_context);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_AS_SSSE3) || !blake2s_use_ssse3 ||
|
||||
!simd_use(&simd_context))
|
||||
goto out;
|
||||
used_arch = true;
|
||||
|
||||
for (;;) {
|
||||
const size_t blocks = min_t(size_t, nblocks,
|
||||
PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
|
||||
|
||||
if (IS_ENABLED(CONFIG_AS_AVX512) && blake2s_use_avx512)
|
||||
blake2s_compress_avx512(state, block, blocks, inc);
|
||||
else
|
||||
blake2s_compress_ssse3(state, block, blocks, inc);
|
||||
|
||||
nblocks -= blocks;
|
||||
if (!nblocks)
|
||||
break;
|
||||
block += blocks * BLAKE2S_BLOCK_SIZE;
|
||||
simd_relax(&simd_context);
|
||||
}
|
||||
out:
|
||||
simd_put(&simd_context);
|
||||
return used_arch;
|
||||
}
|
258
net/wireguard/crypto/zinc/blake2s/blake2s-x86_64.S
Normal file
258
net/wireguard/crypto/zinc/blake2s/blake2s-x86_64.S
Normal file
@ -0,0 +1,258 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
* Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
|
||||
.align 32
|
||||
IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
|
||||
.octa 0x5BE0CD191F83D9AB9B05688C510E527F
|
||||
.section .rodata.cst16.ROT16, "aM", @progbits, 16
|
||||
.align 16
|
||||
ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
|
||||
.section .rodata.cst16.ROR328, "aM", @progbits, 16
|
||||
.align 16
|
||||
ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
|
||||
.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
|
||||
.align 64
|
||||
SIGMA:
|
||||
.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
|
||||
.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
|
||||
.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
|
||||
.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
|
||||
.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
|
||||
.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
|
||||
.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
|
||||
.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
|
||||
.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
|
||||
.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
|
||||
#ifdef CONFIG_AS_AVX512
|
||||
.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
|
||||
.align 64
|
||||
SIGMA2:
|
||||
.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
|
||||
.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
|
||||
.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
|
||||
.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
|
||||
.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
|
||||
.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
|
||||
.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
|
||||
.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
|
||||
.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
|
||||
.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
|
||||
#endif /* CONFIG_AS_AVX512 */
|
||||
|
||||
.text
|
||||
#ifdef CONFIG_AS_SSSE3
|
||||
SYM_FUNC_START(blake2s_compress_ssse3)
|
||||
testq %rdx,%rdx
|
||||
je .Lendofloop
|
||||
movdqu (%rdi),%xmm0
|
||||
movdqu 0x10(%rdi),%xmm1
|
||||
movdqa ROT16(%rip),%xmm12
|
||||
movdqa ROR328(%rip),%xmm13
|
||||
movdqu 0x20(%rdi),%xmm14
|
||||
movq %rcx,%xmm15
|
||||
leaq SIGMA+0xa0(%rip),%r8
|
||||
jmp .Lbeginofloop
|
||||
.align 32
|
||||
.Lbeginofloop:
|
||||
movdqa %xmm0,%xmm10
|
||||
movdqa %xmm1,%xmm11
|
||||
paddq %xmm15,%xmm14
|
||||
movdqa IV(%rip),%xmm2
|
||||
movdqa %xmm14,%xmm3
|
||||
pxor IV+0x10(%rip),%xmm3
|
||||
leaq SIGMA(%rip),%rcx
|
||||
.Lroundloop:
|
||||
movzbl (%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm4
|
||||
movzbl 0x1(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm5
|
||||
movzbl 0x2(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm6
|
||||
movzbl 0x3(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm7
|
||||
punpckldq %xmm5,%xmm4
|
||||
punpckldq %xmm7,%xmm6
|
||||
punpcklqdq %xmm6,%xmm4
|
||||
paddd %xmm4,%xmm0
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
pshufb %xmm12,%xmm3
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm8
|
||||
psrld $0xc,%xmm1
|
||||
pslld $0x14,%xmm8
|
||||
por %xmm8,%xmm1
|
||||
movzbl 0x4(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm5
|
||||
movzbl 0x5(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm6
|
||||
movzbl 0x6(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm7
|
||||
movzbl 0x7(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm4
|
||||
punpckldq %xmm6,%xmm5
|
||||
punpckldq %xmm4,%xmm7
|
||||
punpcklqdq %xmm7,%xmm5
|
||||
paddd %xmm5,%xmm0
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
pshufb %xmm13,%xmm3
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm8
|
||||
psrld $0x7,%xmm1
|
||||
pslld $0x19,%xmm8
|
||||
por %xmm8,%xmm1
|
||||
pshufd $0x93,%xmm0,%xmm0
|
||||
pshufd $0x4e,%xmm3,%xmm3
|
||||
pshufd $0x39,%xmm2,%xmm2
|
||||
movzbl 0x8(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm6
|
||||
movzbl 0x9(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm7
|
||||
movzbl 0xa(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm4
|
||||
movzbl 0xb(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm5
|
||||
punpckldq %xmm7,%xmm6
|
||||
punpckldq %xmm5,%xmm4
|
||||
punpcklqdq %xmm4,%xmm6
|
||||
paddd %xmm6,%xmm0
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
pshufb %xmm12,%xmm3
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm8
|
||||
psrld $0xc,%xmm1
|
||||
pslld $0x14,%xmm8
|
||||
por %xmm8,%xmm1
|
||||
movzbl 0xc(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm7
|
||||
movzbl 0xd(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm4
|
||||
movzbl 0xe(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm5
|
||||
movzbl 0xf(%rcx),%eax
|
||||
movd (%rsi,%rax,4),%xmm6
|
||||
punpckldq %xmm4,%xmm7
|
||||
punpckldq %xmm6,%xmm5
|
||||
punpcklqdq %xmm5,%xmm7
|
||||
paddd %xmm7,%xmm0
|
||||
paddd %xmm1,%xmm0
|
||||
pxor %xmm0,%xmm3
|
||||
pshufb %xmm13,%xmm3
|
||||
paddd %xmm3,%xmm2
|
||||
pxor %xmm2,%xmm1
|
||||
movdqa %xmm1,%xmm8
|
||||
psrld $0x7,%xmm1
|
||||
pslld $0x19,%xmm8
|
||||
por %xmm8,%xmm1
|
||||
pshufd $0x39,%xmm0,%xmm0
|
||||
pshufd $0x4e,%xmm3,%xmm3
|
||||
pshufd $0x93,%xmm2,%xmm2
|
||||
addq $0x10,%rcx
|
||||
cmpq %r8,%rcx
|
||||
jnz .Lroundloop
|
||||
pxor %xmm2,%xmm0
|
||||
pxor %xmm3,%xmm1
|
||||
pxor %xmm10,%xmm0
|
||||
pxor %xmm11,%xmm1
|
||||
addq $0x40,%rsi
|
||||
decq %rdx
|
||||
jnz .Lbeginofloop
|
||||
movdqu %xmm0,(%rdi)
|
||||
movdqu %xmm1,0x10(%rdi)
|
||||
movdqu %xmm14,0x20(%rdi)
|
||||
.Lendofloop:
|
||||
ret
|
||||
SYM_FUNC_END(blake2s_compress_ssse3)
|
||||
#endif /* CONFIG_AS_SSSE3 */
|
||||
|
||||
#ifdef CONFIG_AS_AVX512
|
||||
SYM_FUNC_START(blake2s_compress_avx512)
|
||||
vmovdqu (%rdi),%xmm0
|
||||
vmovdqu 0x10(%rdi),%xmm1
|
||||
vmovdqu 0x20(%rdi),%xmm4
|
||||
vmovq %rcx,%xmm5
|
||||
vmovdqa IV(%rip),%xmm14
|
||||
vmovdqa IV+16(%rip),%xmm15
|
||||
jmp .Lblake2s_compress_avx512_mainloop
|
||||
.align 32
|
||||
.Lblake2s_compress_avx512_mainloop:
|
||||
vmovdqa %xmm0,%xmm10
|
||||
vmovdqa %xmm1,%xmm11
|
||||
vpaddq %xmm5,%xmm4,%xmm4
|
||||
vmovdqa %xmm14,%xmm2
|
||||
vpxor %xmm15,%xmm4,%xmm3
|
||||
vmovdqu (%rsi),%ymm6
|
||||
vmovdqu 0x20(%rsi),%ymm7
|
||||
addq $0x40,%rsi
|
||||
leaq SIGMA2(%rip),%rax
|
||||
movb $0xa,%cl
|
||||
.Lblake2s_compress_avx512_roundloop:
|
||||
addq $0x40,%rax
|
||||
vmovdqa -0x40(%rax),%ymm8
|
||||
vmovdqa -0x20(%rax),%ymm9
|
||||
vpermi2d %ymm7,%ymm6,%ymm8
|
||||
vpermi2d %ymm7,%ymm6,%ymm9
|
||||
vmovdqa %ymm8,%ymm6
|
||||
vmovdqa %ymm9,%ymm7
|
||||
vpaddd %xmm8,%xmm0,%xmm0
|
||||
vpaddd %xmm1,%xmm0,%xmm0
|
||||
vpxor %xmm0,%xmm3,%xmm3
|
||||
vprord $0x10,%xmm3,%xmm3
|
||||
vpaddd %xmm3,%xmm2,%xmm2
|
||||
vpxor %xmm2,%xmm1,%xmm1
|
||||
vprord $0xc,%xmm1,%xmm1
|
||||
vextracti128 $0x1,%ymm8,%xmm8
|
||||
vpaddd %xmm8,%xmm0,%xmm0
|
||||
vpaddd %xmm1,%xmm0,%xmm0
|
||||
vpxor %xmm0,%xmm3,%xmm3
|
||||
vprord $0x8,%xmm3,%xmm3
|
||||
vpaddd %xmm3,%xmm2,%xmm2
|
||||
vpxor %xmm2,%xmm1,%xmm1
|
||||
vprord $0x7,%xmm1,%xmm1
|
||||
vpshufd $0x93,%xmm0,%xmm0
|
||||
vpshufd $0x4e,%xmm3,%xmm3
|
||||
vpshufd $0x39,%xmm2,%xmm2
|
||||
vpaddd %xmm9,%xmm0,%xmm0
|
||||
vpaddd %xmm1,%xmm0,%xmm0
|
||||
vpxor %xmm0,%xmm3,%xmm3
|
||||
vprord $0x10,%xmm3,%xmm3
|
||||
vpaddd %xmm3,%xmm2,%xmm2
|
||||
vpxor %xmm2,%xmm1,%xmm1
|
||||
vprord $0xc,%xmm1,%xmm1
|
||||
vextracti128 $0x1,%ymm9,%xmm9
|
||||
vpaddd %xmm9,%xmm0,%xmm0
|
||||
vpaddd %xmm1,%xmm0,%xmm0
|
||||
vpxor %xmm0,%xmm3,%xmm3
|
||||
vprord $0x8,%xmm3,%xmm3
|
||||
vpaddd %xmm3,%xmm2,%xmm2
|
||||
vpxor %xmm2,%xmm1,%xmm1
|
||||
vprord $0x7,%xmm1,%xmm1
|
||||
vpshufd $0x39,%xmm0,%xmm0
|
||||
vpshufd $0x4e,%xmm3,%xmm3
|
||||
vpshufd $0x93,%xmm2,%xmm2
|
||||
decb %cl
|
||||
jne .Lblake2s_compress_avx512_roundloop
|
||||
vpxor %xmm10,%xmm0,%xmm0
|
||||
vpxor %xmm11,%xmm1,%xmm1
|
||||
vpxor %xmm2,%xmm0,%xmm0
|
||||
vpxor %xmm3,%xmm1,%xmm1
|
||||
decq %rdx
|
||||
jne .Lblake2s_compress_avx512_mainloop
|
||||
vmovdqu %xmm0,(%rdi)
|
||||
vmovdqu %xmm1,0x10(%rdi)
|
||||
vmovdqu %xmm4,0x20(%rdi)
|
||||
vzeroupper
|
||||
retq
|
||||
SYM_FUNC_END(blake2s_compress_avx512)
|
||||
#endif /* CONFIG_AS_AVX512 */
|
271
net/wireguard/crypto/zinc/blake2s/blake2s.c
Normal file
271
net/wireguard/crypto/zinc/blake2s/blake2s.c
Normal file
@ -0,0 +1,271 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is an implementation of the BLAKE2s hash and PRF functions.
|
||||
*
|
||||
* Information: https://blake2.net/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <zinc/blake2s.h>
|
||||
#include "../selftest/run.h"
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bug.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
static const u32 blake2s_iv[8] = {
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
|
||||
};
|
||||
|
||||
static const u8 blake2s_sigma[10][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
};
|
||||
|
||||
static inline void blake2s_set_lastblock(struct blake2s_state *state)
|
||||
{
|
||||
state->f[0] = -1;
|
||||
}
|
||||
|
||||
static inline void blake2s_increment_counter(struct blake2s_state *state,
|
||||
const u32 inc)
|
||||
{
|
||||
state->t[0] += inc;
|
||||
state->t[1] += (state->t[0] < inc);
|
||||
}
|
||||
|
||||
static inline void blake2s_init_param(struct blake2s_state *state,
|
||||
const u32 param)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(state, 0, sizeof(*state));
|
||||
for (i = 0; i < 8; ++i)
|
||||
state->h[i] = blake2s_iv[i];
|
||||
state->h[0] ^= param;
|
||||
}
|
||||
|
||||
void blake2s_init(struct blake2s_state *state, const size_t outlen)
|
||||
{
|
||||
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE));
|
||||
blake2s_init_param(state, 0x01010000 | outlen);
|
||||
state->outlen = outlen;
|
||||
}
|
||||
|
||||
void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
|
||||
const void *key, const size_t keylen)
|
||||
{
|
||||
u8 block[BLAKE2S_BLOCK_SIZE] = { 0 };
|
||||
|
||||
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
|
||||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));
|
||||
blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen);
|
||||
state->outlen = outlen;
|
||||
memcpy(block, key, keylen);
|
||||
blake2s_update(state, block, BLAKE2S_BLOCK_SIZE);
|
||||
memzero_explicit(block, BLAKE2S_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_X86_64)
|
||||
#include "blake2s-x86_64-glue.c"
|
||||
#else
|
||||
static bool *const blake2s_nobs[] __initconst = { };
|
||||
static void __init blake2s_fpu_init(void)
|
||||
{
|
||||
}
|
||||
static inline bool blake2s_compress_arch(struct blake2s_state *state,
|
||||
const u8 *block, size_t nblocks,
|
||||
const u32 inc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void blake2s_compress(struct blake2s_state *state,
|
||||
const u8 *block, size_t nblocks,
|
||||
const u32 inc)
|
||||
{
|
||||
u32 m[16];
|
||||
u32 v[16];
|
||||
int i;
|
||||
|
||||
WARN_ON(IS_ENABLED(DEBUG) &&
|
||||
(nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
|
||||
|
||||
if (blake2s_compress_arch(state, block, nblocks, inc))
|
||||
return;
|
||||
|
||||
while (nblocks > 0) {
|
||||
blake2s_increment_counter(state, inc);
|
||||
memcpy(m, block, BLAKE2S_BLOCK_SIZE);
|
||||
le32_to_cpu_array(m, ARRAY_SIZE(m));
|
||||
memcpy(v, state->h, 32);
|
||||
v[ 8] = blake2s_iv[0];
|
||||
v[ 9] = blake2s_iv[1];
|
||||
v[10] = blake2s_iv[2];
|
||||
v[11] = blake2s_iv[3];
|
||||
v[12] = blake2s_iv[4] ^ state->t[0];
|
||||
v[13] = blake2s_iv[5] ^ state->t[1];
|
||||
v[14] = blake2s_iv[6] ^ state->f[0];
|
||||
v[15] = blake2s_iv[7] ^ state->f[1];
|
||||
|
||||
#define G(r, i, a, b, c, d) do { \
|
||||
a += b + m[blake2s_sigma[r][2 * i + 0]]; \
|
||||
d = ror32(d ^ a, 16); \
|
||||
c += d; \
|
||||
b = ror32(b ^ c, 12); \
|
||||
a += b + m[blake2s_sigma[r][2 * i + 1]]; \
|
||||
d = ror32(d ^ a, 8); \
|
||||
c += d; \
|
||||
b = ror32(b ^ c, 7); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND(r) do { \
|
||||
G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
|
||||
G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
|
||||
G(r, 2, v[2], v[ 6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[ 7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[ 5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[ 6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
|
||||
G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
|
||||
} while (0)
|
||||
ROUND(0);
|
||||
ROUND(1);
|
||||
ROUND(2);
|
||||
ROUND(3);
|
||||
ROUND(4);
|
||||
ROUND(5);
|
||||
ROUND(6);
|
||||
ROUND(7);
|
||||
ROUND(8);
|
||||
ROUND(9);
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
|
||||
for (i = 0; i < 8; ++i)
|
||||
state->h[i] ^= v[i] ^ v[i + 8];
|
||||
|
||||
block += BLAKE2S_BLOCK_SIZE;
|
||||
--nblocks;
|
||||
}
|
||||
}
|
||||
|
||||
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
|
||||
{
|
||||
const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
|
||||
|
||||
if (unlikely(!inlen))
|
||||
return;
|
||||
if (inlen > fill) {
|
||||
memcpy(state->buf + state->buflen, in, fill);
|
||||
blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
|
||||
state->buflen = 0;
|
||||
in += fill;
|
||||
inlen -= fill;
|
||||
}
|
||||
if (inlen > BLAKE2S_BLOCK_SIZE) {
|
||||
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
|
||||
/* Hash one less (full) block than strictly possible */
|
||||
blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
|
||||
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
|
||||
}
|
||||
memcpy(state->buf + state->buflen, in, inlen);
|
||||
state->buflen += inlen;
|
||||
}
|
||||
|
||||
void blake2s_final(struct blake2s_state *state, u8 *out)
|
||||
{
|
||||
WARN_ON(IS_ENABLED(DEBUG) && !out);
|
||||
blake2s_set_lastblock(state);
|
||||
memset(state->buf + state->buflen, 0,
|
||||
BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
|
||||
blake2s_compress(state, state->buf, 1, state->buflen);
|
||||
cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
|
||||
memcpy(out, state->h, state->outlen);
|
||||
memzero_explicit(state, sizeof(*state));
|
||||
}
|
||||
|
||||
void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const size_t outlen,
|
||||
const size_t inlen, const size_t keylen)
|
||||
{
|
||||
struct blake2s_state state;
|
||||
u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
|
||||
u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
|
||||
int i;
|
||||
|
||||
if (keylen > BLAKE2S_BLOCK_SIZE) {
|
||||
blake2s_init(&state, BLAKE2S_HASH_SIZE);
|
||||
blake2s_update(&state, key, keylen);
|
||||
blake2s_final(&state, x_key);
|
||||
} else
|
||||
memcpy(x_key, key, keylen);
|
||||
|
||||
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
|
||||
x_key[i] ^= 0x36;
|
||||
|
||||
blake2s_init(&state, BLAKE2S_HASH_SIZE);
|
||||
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
|
||||
blake2s_update(&state, in, inlen);
|
||||
blake2s_final(&state, i_hash);
|
||||
|
||||
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
|
||||
x_key[i] ^= 0x5c ^ 0x36;
|
||||
|
||||
blake2s_init(&state, BLAKE2S_HASH_SIZE);
|
||||
blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
|
||||
blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
|
||||
blake2s_final(&state, i_hash);
|
||||
|
||||
memcpy(out, i_hash, outlen);
|
||||
memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
|
||||
memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
|
||||
}
|
||||
|
||||
#include "../selftest/blake2s.c"
|
||||
|
||||
static bool nosimd __initdata = false;
|
||||
|
||||
#ifndef COMPAT_ZINC_IS_A_MODULE
|
||||
int __init blake2s_mod_init(void)
|
||||
#else
|
||||
static int __init mod_init(void)
|
||||
#endif
|
||||
{
|
||||
if (!nosimd)
|
||||
blake2s_fpu_init();
|
||||
if (!selftest_run("blake2s", blake2s_selftest, blake2s_nobs,
|
||||
ARRAY_SIZE(blake2s_nobs)))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_ZINC_IS_A_MODULE
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_param(nosimd, bool, 0);
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("BLAKE2s hash function");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
#endif
|
98
net/wireguard/crypto/zinc/chacha20/chacha20-arm-glue.c
Normal file
98
net/wireguard/crypto/zinc/chacha20/chacha20-arm-glue.c
Normal file
@ -0,0 +1,98 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM)
|
||||
#include <asm/system_info.h>
|
||||
#include <asm/cputype.h>
|
||||
#endif
|
||||
|
||||
asmlinkage void chacha20_arm(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void hchacha20_arm(const u32 state[16], u32 out[8]);
|
||||
asmlinkage void chacha20_neon(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
|
||||
static bool chacha20_use_neon __ro_after_init;
|
||||
static bool *const chacha20_nobs[] __initconst = { &chacha20_use_neon };
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
chacha20_use_neon = cpu_have_named_feature(ASIMD);
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM)
|
||||
switch (read_cpuid_part()) {
|
||||
case ARM_CPU_PART_CORTEX_A7:
|
||||
case ARM_CPU_PART_CORTEX_A5:
|
||||
/* The Cortex-A7 and Cortex-A5 do not perform well with the NEON
|
||||
* implementation but do incredibly with the scalar one and use
|
||||
* less power.
|
||||
*/
|
||||
break;
|
||||
default:
|
||||
chacha20_use_neon = elf_hwcap & HWCAP_NEON;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
|
||||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
|
||||
|
||||
for (;;) {
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && chacha20_use_neon &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 3 && simd_use(simd_context)) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
chacha20_neon(dst, src, bytes, ctx->key, ctx->counter);
|
||||
ctx->counter[0] += (bytes + 63) / 64;
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
dst += bytes;
|
||||
src += bytes;
|
||||
simd_relax(simd_context);
|
||||
} else {
|
||||
chacha20_arm(dst, src, len, ctx->key, ctx->counter);
|
||||
ctx->counter[0] += (len + 63) / 64;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM)) {
|
||||
u32 x[] = { CHACHA20_CONSTANT_EXPA,
|
||||
CHACHA20_CONSTANT_ND_3,
|
||||
CHACHA20_CONSTANT_2_BY,
|
||||
CHACHA20_CONSTANT_TE_K,
|
||||
get_unaligned_le32(key + 0),
|
||||
get_unaligned_le32(key + 4),
|
||||
get_unaligned_le32(key + 8),
|
||||
get_unaligned_le32(key + 12),
|
||||
get_unaligned_le32(key + 16),
|
||||
get_unaligned_le32(key + 20),
|
||||
get_unaligned_le32(key + 24),
|
||||
get_unaligned_le32(key + 28),
|
||||
get_unaligned_le32(nonce + 0),
|
||||
get_unaligned_le32(nonce + 4),
|
||||
get_unaligned_le32(nonce + 8),
|
||||
get_unaligned_le32(nonce + 12)
|
||||
};
|
||||
hchacha20_arm(x, derived_key);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
1227
net/wireguard/crypto/zinc/chacha20/chacha20-arm.pl
Normal file
1227
net/wireguard/crypto/zinc/chacha20/chacha20-arm.pl
Normal file
File diff suppressed because it is too large
Load Diff
1163
net/wireguard/crypto/zinc/chacha20/chacha20-arm64.pl
Normal file
1163
net/wireguard/crypto/zinc/chacha20/chacha20-arm64.pl
Normal file
File diff suppressed because it is too large
Load Diff
27
net/wireguard/crypto/zinc/chacha20/chacha20-mips-glue.c
Normal file
27
net/wireguard/crypto/zinc/chacha20/chacha20-mips-glue.c
Normal file
@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
asmlinkage void chacha20_mips(u32 state[16], u8 *out, const u8 *in,
|
||||
const size_t len);
|
||||
static bool *const chacha20_nobs[] __initconst = { };
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
chacha20_mips(ctx->state, dst, src, len);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
424
net/wireguard/crypto/zinc/chacha20/chacha20-mips.S
Normal file
424
net/wireguard/crypto/zinc/chacha20/chacha20-mips.S
Normal file
@ -0,0 +1,424 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#define MASK_U32 0x3c
|
||||
#define CHACHA20_BLOCK_SIZE 64
|
||||
#define STACK_SIZE 32
|
||||
|
||||
#define X0 $t0
|
||||
#define X1 $t1
|
||||
#define X2 $t2
|
||||
#define X3 $t3
|
||||
#define X4 $t4
|
||||
#define X5 $t5
|
||||
#define X6 $t6
|
||||
#define X7 $t7
|
||||
#define X8 $t8
|
||||
#define X9 $t9
|
||||
#define X10 $v1
|
||||
#define X11 $s6
|
||||
#define X12 $s5
|
||||
#define X13 $s4
|
||||
#define X14 $s3
|
||||
#define X15 $s2
|
||||
/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
|
||||
#define T0 $s1
|
||||
#define T1 $s0
|
||||
#define T(n) T ## n
|
||||
#define X(n) X ## n
|
||||
|
||||
/* Input arguments */
|
||||
#define STATE $a0
|
||||
#define OUT $a1
|
||||
#define IN $a2
|
||||
#define BYTES $a3
|
||||
|
||||
/* Output argument */
|
||||
/* NONCE[0] is kept in a register and not in memory.
|
||||
* We don't want to touch original value in memory.
|
||||
* Must be incremented every loop iteration.
|
||||
*/
|
||||
#define NONCE_0 $v0
|
||||
|
||||
/* SAVED_X and SAVED_CA are set in the jump table.
|
||||
* Use regs which are overwritten on exit else we don't leak clear data.
|
||||
* They are used to handling the last bytes which are not multiple of 4.
|
||||
*/
|
||||
#define SAVED_X X15
|
||||
#define SAVED_CA $s7
|
||||
|
||||
#define IS_UNALIGNED $s7
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define MSB 0
|
||||
#define LSB 3
|
||||
#define ROTx rotl
|
||||
#define ROTR(n) rotr n, 24
|
||||
#define CPU_TO_LE32(n) \
|
||||
wsbh n; \
|
||||
rotr n, 16;
|
||||
#else
|
||||
#define MSB 3
|
||||
#define LSB 0
|
||||
#define ROTx rotr
|
||||
#define CPU_TO_LE32(n)
|
||||
#define ROTR(n)
|
||||
#endif
|
||||
|
||||
#define FOR_EACH_WORD(x) \
|
||||
x( 0); \
|
||||
x( 1); \
|
||||
x( 2); \
|
||||
x( 3); \
|
||||
x( 4); \
|
||||
x( 5); \
|
||||
x( 6); \
|
||||
x( 7); \
|
||||
x( 8); \
|
||||
x( 9); \
|
||||
x(10); \
|
||||
x(11); \
|
||||
x(12); \
|
||||
x(13); \
|
||||
x(14); \
|
||||
x(15);
|
||||
|
||||
#define FOR_EACH_WORD_REV(x) \
|
||||
x(15); \
|
||||
x(14); \
|
||||
x(13); \
|
||||
x(12); \
|
||||
x(11); \
|
||||
x(10); \
|
||||
x( 9); \
|
||||
x( 8); \
|
||||
x( 7); \
|
||||
x( 6); \
|
||||
x( 5); \
|
||||
x( 4); \
|
||||
x( 3); \
|
||||
x( 2); \
|
||||
x( 1); \
|
||||
x( 0);
|
||||
|
||||
#define PLUS_ONE_0 1
|
||||
#define PLUS_ONE_1 2
|
||||
#define PLUS_ONE_2 3
|
||||
#define PLUS_ONE_3 4
|
||||
#define PLUS_ONE_4 5
|
||||
#define PLUS_ONE_5 6
|
||||
#define PLUS_ONE_6 7
|
||||
#define PLUS_ONE_7 8
|
||||
#define PLUS_ONE_8 9
|
||||
#define PLUS_ONE_9 10
|
||||
#define PLUS_ONE_10 11
|
||||
#define PLUS_ONE_11 12
|
||||
#define PLUS_ONE_12 13
|
||||
#define PLUS_ONE_13 14
|
||||
#define PLUS_ONE_14 15
|
||||
#define PLUS_ONE_15 16
|
||||
#define PLUS_ONE(x) PLUS_ONE_ ## x
|
||||
#define _CONCAT3(a,b,c) a ## b ## c
|
||||
#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
|
||||
|
||||
#define STORE_UNALIGNED(x) \
|
||||
CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
|
||||
.if (x != 12); \
|
||||
lw T0, (x*4)(STATE); \
|
||||
.endif; \
|
||||
lwl T1, (x*4)+MSB ## (IN); \
|
||||
lwr T1, (x*4)+LSB ## (IN); \
|
||||
.if (x == 12); \
|
||||
addu X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu X ## x, T0; \
|
||||
.endif; \
|
||||
CPU_TO_LE32(X ## x); \
|
||||
xor X ## x, T1; \
|
||||
swl X ## x, (x*4)+MSB ## (OUT); \
|
||||
swr X ## x, (x*4)+LSB ## (OUT);
|
||||
|
||||
#define STORE_ALIGNED(x) \
|
||||
CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
|
||||
.if (x != 12); \
|
||||
lw T0, (x*4)(STATE); \
|
||||
.endif; \
|
||||
lw T1, (x*4) ## (IN); \
|
||||
.if (x == 12); \
|
||||
addu X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu X ## x, T0; \
|
||||
.endif; \
|
||||
CPU_TO_LE32(X ## x); \
|
||||
xor X ## x, T1; \
|
||||
sw X ## x, (x*4) ## (OUT);
|
||||
|
||||
/* Jump table macro.
|
||||
* Used for setup and handling the last bytes, which are not multiple of 4.
|
||||
* X15 is free to store Xn
|
||||
* Every jumptable entry must be equal in size.
|
||||
*/
|
||||
#define JMPTBL_ALIGNED(x) \
|
||||
.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
|
||||
.set noreorder; \
|
||||
b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
|
||||
.if (x == 12); \
|
||||
addu SAVED_X, X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu SAVED_X, X ## x, SAVED_CA; \
|
||||
.endif; \
|
||||
.set reorder
|
||||
|
||||
#define JMPTBL_UNALIGNED(x) \
|
||||
.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
|
||||
.set noreorder; \
|
||||
b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
|
||||
.if (x == 12); \
|
||||
addu SAVED_X, X ## x, NONCE_0; \
|
||||
.else; \
|
||||
addu SAVED_X, X ## x, SAVED_CA; \
|
||||
.endif; \
|
||||
.set reorder
|
||||
|
||||
#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
|
||||
addu X(A), X(K); \
|
||||
addu X(B), X(L); \
|
||||
addu X(C), X(M); \
|
||||
addu X(D), X(N); \
|
||||
xor X(V), X(A); \
|
||||
xor X(W), X(B); \
|
||||
xor X(Y), X(C); \
|
||||
xor X(Z), X(D); \
|
||||
rotl X(V), S; \
|
||||
rotl X(W), S; \
|
||||
rotl X(Y), S; \
|
||||
rotl X(Z), S;
|
||||
|
||||
.text
|
||||
.set reorder
|
||||
.set noat
|
||||
.globl chacha20_mips
|
||||
.ent chacha20_mips
|
||||
chacha20_mips:
|
||||
.frame $sp, STACK_SIZE, $ra
|
||||
|
||||
addiu $sp, -STACK_SIZE
|
||||
|
||||
/* Return bytes = 0. */
|
||||
beqz BYTES, .Lchacha20_mips_end
|
||||
|
||||
lw NONCE_0, 48(STATE)
|
||||
|
||||
/* Save s0-s7 */
|
||||
sw $s0, 0($sp)
|
||||
sw $s1, 4($sp)
|
||||
sw $s2, 8($sp)
|
||||
sw $s3, 12($sp)
|
||||
sw $s4, 16($sp)
|
||||
sw $s5, 20($sp)
|
||||
sw $s6, 24($sp)
|
||||
sw $s7, 28($sp)
|
||||
|
||||
/* Test IN or OUT is unaligned.
|
||||
* IS_UNALIGNED = ( IN | OUT ) & 0x00000003
|
||||
*/
|
||||
or IS_UNALIGNED, IN, OUT
|
||||
andi IS_UNALIGNED, 0x3
|
||||
|
||||
/* Set number of rounds */
|
||||
li $at, 20
|
||||
|
||||
b .Lchacha20_rounds_start
|
||||
|
||||
.align 4
|
||||
.Loop_chacha20_rounds:
|
||||
addiu IN, CHACHA20_BLOCK_SIZE
|
||||
addiu OUT, CHACHA20_BLOCK_SIZE
|
||||
addiu NONCE_0, 1
|
||||
|
||||
.Lchacha20_rounds_start:
|
||||
lw X0, 0(STATE)
|
||||
lw X1, 4(STATE)
|
||||
lw X2, 8(STATE)
|
||||
lw X3, 12(STATE)
|
||||
|
||||
lw X4, 16(STATE)
|
||||
lw X5, 20(STATE)
|
||||
lw X6, 24(STATE)
|
||||
lw X7, 28(STATE)
|
||||
lw X8, 32(STATE)
|
||||
lw X9, 36(STATE)
|
||||
lw X10, 40(STATE)
|
||||
lw X11, 44(STATE)
|
||||
|
||||
move X12, NONCE_0
|
||||
lw X13, 52(STATE)
|
||||
lw X14, 56(STATE)
|
||||
lw X15, 60(STATE)
|
||||
|
||||
.Loop_chacha20_xor_rounds:
|
||||
addiu $at, -2
|
||||
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
|
||||
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
|
||||
AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
|
||||
AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
|
||||
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
|
||||
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
|
||||
AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
|
||||
AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
|
||||
bnez $at, .Loop_chacha20_xor_rounds
|
||||
|
||||
addiu BYTES, -(CHACHA20_BLOCK_SIZE)
|
||||
|
||||
/* Is data src/dst unaligned? Jump */
|
||||
bnez IS_UNALIGNED, .Loop_chacha20_unaligned
|
||||
|
||||
/* Set number rounds here to fill delayslot. */
|
||||
li $at, 20
|
||||
|
||||
/* BYTES < 0, it has no full block. */
|
||||
bltz BYTES, .Lchacha20_mips_no_full_block_aligned
|
||||
|
||||
FOR_EACH_WORD_REV(STORE_ALIGNED)
|
||||
|
||||
/* BYTES > 0? Loop again. */
|
||||
bgtz BYTES, .Loop_chacha20_rounds
|
||||
|
||||
/* Place this here to fill delay slot */
|
||||
addiu NONCE_0, 1
|
||||
|
||||
/* BYTES < 0? Handle last bytes */
|
||||
bltz BYTES, .Lchacha20_mips_xor_bytes
|
||||
|
||||
.Lchacha20_mips_xor_done:
|
||||
/* Restore used registers */
|
||||
lw $s0, 0($sp)
|
||||
lw $s1, 4($sp)
|
||||
lw $s2, 8($sp)
|
||||
lw $s3, 12($sp)
|
||||
lw $s4, 16($sp)
|
||||
lw $s5, 20($sp)
|
||||
lw $s6, 24($sp)
|
||||
lw $s7, 28($sp)
|
||||
|
||||
/* Write NONCE_0 back to right location in state */
|
||||
sw NONCE_0, 48(STATE)
|
||||
|
||||
.Lchacha20_mips_end:
|
||||
addiu $sp, STACK_SIZE
|
||||
jr $ra
|
||||
|
||||
.Lchacha20_mips_no_full_block_aligned:
|
||||
/* Restore the offset on BYTES */
|
||||
addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
|
||||
/* Get number of full WORDS */
|
||||
andi $at, BYTES, MASK_U32
|
||||
|
||||
/* Load upper half of jump table addr */
|
||||
lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
|
||||
/* Calculate lower half jump table offset */
|
||||
ins T0, $at, 1, 6
|
||||
|
||||
/* Add offset to STATE */
|
||||
addu T1, STATE, $at
|
||||
|
||||
/* Add lower half jump table addr */
|
||||
addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
|
||||
|
||||
/* Read value from STATE */
|
||||
lw SAVED_CA, 0(T1)
|
||||
|
||||
/* Store remaining bytecounter as negative value */
|
||||
subu BYTES, $at, BYTES
|
||||
|
||||
jr T0
|
||||
|
||||
/* Jump table */
|
||||
FOR_EACH_WORD(JMPTBL_ALIGNED)
|
||||
|
||||
|
||||
.Loop_chacha20_unaligned:
|
||||
/* Set number rounds here to fill delayslot. */
|
||||
li $at, 20
|
||||
|
||||
/* BYTES > 0, it has no full block. */
|
||||
bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
|
||||
|
||||
FOR_EACH_WORD_REV(STORE_UNALIGNED)
|
||||
|
||||
/* BYTES > 0? Loop again. */
|
||||
bgtz BYTES, .Loop_chacha20_rounds
|
||||
|
||||
/* Write NONCE_0 back to right location in state */
|
||||
sw NONCE_0, 48(STATE)
|
||||
|
||||
.set noreorder
|
||||
/* Fall through to byte handling */
|
||||
bgez BYTES, .Lchacha20_mips_xor_done
|
||||
.Lchacha20_mips_xor_unaligned_0_b:
|
||||
.Lchacha20_mips_xor_aligned_0_b:
|
||||
/* Place this here to fill delay slot */
|
||||
addiu NONCE_0, 1
|
||||
.set reorder
|
||||
|
||||
.Lchacha20_mips_xor_bytes:
|
||||
addu IN, $at
|
||||
addu OUT, $at
|
||||
/* First byte */
|
||||
lbu T1, 0(IN)
|
||||
addiu $at, BYTES, 1
|
||||
CPU_TO_LE32(SAVED_X)
|
||||
ROTR(SAVED_X)
|
||||
xor T1, SAVED_X
|
||||
sb T1, 0(OUT)
|
||||
beqz $at, .Lchacha20_mips_xor_done
|
||||
/* Second byte */
|
||||
lbu T1, 1(IN)
|
||||
addiu $at, BYTES, 2
|
||||
ROTx SAVED_X, 8
|
||||
xor T1, SAVED_X
|
||||
sb T1, 1(OUT)
|
||||
beqz $at, .Lchacha20_mips_xor_done
|
||||
/* Third byte */
|
||||
lbu T1, 2(IN)
|
||||
ROTx SAVED_X, 8
|
||||
xor T1, SAVED_X
|
||||
sb T1, 2(OUT)
|
||||
b .Lchacha20_mips_xor_done
|
||||
|
||||
.Lchacha20_mips_no_full_block_unaligned:
|
||||
/* Restore the offset on BYTES */
|
||||
addiu BYTES, CHACHA20_BLOCK_SIZE
|
||||
|
||||
/* Get number of full WORDS */
|
||||
andi $at, BYTES, MASK_U32
|
||||
|
||||
/* Load upper half of jump table addr */
|
||||
lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
|
||||
/* Calculate lower half jump table offset */
|
||||
ins T0, $at, 1, 6
|
||||
|
||||
/* Add offset to STATE */
|
||||
addu T1, STATE, $at
|
||||
|
||||
/* Add lower half jump table addr */
|
||||
addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
|
||||
|
||||
/* Read value from STATE */
|
||||
lw SAVED_CA, 0(T1)
|
||||
|
||||
/* Store remaining bytecounter as negative value */
|
||||
subu BYTES, $at, BYTES
|
||||
|
||||
jr T0
|
||||
|
||||
/* Jump table */
|
||||
FOR_EACH_WORD(JMPTBL_UNALIGNED)
|
||||
.end chacha20_mips
|
||||
.set at
|
461
net/wireguard/crypto/zinc/chacha20/chacha20-unrolled-arm.S
Normal file
461
net/wireguard/crypto/zinc/chacha20/chacha20-unrolled-arm.S
Normal file
@ -0,0 +1,461 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2018 Google, Inc.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* Design notes:
|
||||
*
|
||||
* 16 registers would be needed to hold the state matrix, but only 14 are
|
||||
* available because 'sp' and 'pc' cannot be used. So we spill the elements
|
||||
* (x8, x9) to the stack and swap them out with (x10, x11). This adds one
|
||||
* 'ldrd' and one 'strd' instruction per round.
|
||||
*
|
||||
* All rotates are performed using the implicit rotate operand accepted by the
|
||||
* 'add' and 'eor' instructions. This is faster than using explicit rotate
|
||||
* instructions. To make this work, we allow the values in the second and last
|
||||
* rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
|
||||
* wrong rotation amount. The rotation amount is then fixed up just in time
|
||||
* when the values are used. 'brot' is the number of bits the values in row 'b'
|
||||
* need to be rotated right to arrive at the correct values, and 'drot'
|
||||
* similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
|
||||
* that they end up as (25, 24) after every round.
|
||||
*/
|
||||
|
||||
// ChaCha state registers
|
||||
X0 .req r0
|
||||
X1 .req r1
|
||||
X2 .req r2
|
||||
X3 .req r3
|
||||
X4 .req r4
|
||||
X5 .req r5
|
||||
X6 .req r6
|
||||
X7 .req r7
|
||||
X8_X10 .req r8 // shared by x8 and x10
|
||||
X9_X11 .req r9 // shared by x9 and x11
|
||||
X12 .req r10
|
||||
X13 .req r11
|
||||
X14 .req r12
|
||||
X15 .req r14
|
||||
|
||||
.Lexpand_32byte_k:
|
||||
// "expand 32-byte k"
|
||||
.word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
|
||||
|
||||
#ifdef __thumb2__
|
||||
# define adrl adr
|
||||
#endif
|
||||
|
||||
.macro __rev out, in, t0, t1, t2
|
||||
.if __LINUX_ARM_ARCH__ >= 6
|
||||
rev \out, \in
|
||||
.else
|
||||
lsl \t0, \in, #24
|
||||
and \t1, \in, #0xff00
|
||||
and \t2, \in, #0xff0000
|
||||
orr \out, \t0, \in, lsr #24
|
||||
orr \out, \out, \t1, lsl #8
|
||||
orr \out, \out, \t2, lsr #8
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro _le32_bswap x, t0, t1, t2
|
||||
#ifdef __ARMEB__
|
||||
__rev \x, \x, \t0, \t1, \t2
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
|
||||
_le32_bswap \a, \t0, \t1, \t2
|
||||
_le32_bswap \b, \t0, \t1, \t2
|
||||
_le32_bswap \c, \t0, \t1, \t2
|
||||
_le32_bswap \d, \t0, \t1, \t2
|
||||
.endm
|
||||
|
||||
.macro __ldrd a, b, src, offset
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
ldrd \a, \b, [\src, #\offset]
|
||||
#else
|
||||
ldr \a, [\src, #\offset]
|
||||
ldr \b, [\src, #\offset + 4]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro __strd a, b, dst, offset
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
strd \a, \b, [\dst, #\offset]
|
||||
#else
|
||||
str \a, [\dst, #\offset]
|
||||
str \b, [\dst, #\offset + 4]
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
|
||||
|
||||
// a += b; d ^= a; d = rol(d, 16);
|
||||
add \a1, \a1, \b1, ror #brot
|
||||
add \a2, \a2, \b2, ror #brot
|
||||
eor \d1, \a1, \d1, ror #drot
|
||||
eor \d2, \a2, \d2, ror #drot
|
||||
// drot == 32 - 16 == 16
|
||||
|
||||
// c += d; b ^= c; b = rol(b, 12);
|
||||
add \c1, \c1, \d1, ror #16
|
||||
add \c2, \c2, \d2, ror #16
|
||||
eor \b1, \c1, \b1, ror #brot
|
||||
eor \b2, \c2, \b2, ror #brot
|
||||
// brot == 32 - 12 == 20
|
||||
|
||||
// a += b; d ^= a; d = rol(d, 8);
|
||||
add \a1, \a1, \b1, ror #20
|
||||
add \a2, \a2, \b2, ror #20
|
||||
eor \d1, \a1, \d1, ror #16
|
||||
eor \d2, \a2, \d2, ror #16
|
||||
// drot == 32 - 8 == 24
|
||||
|
||||
// c += d; b ^= c; b = rol(b, 7);
|
||||
add \c1, \c1, \d1, ror #24
|
||||
add \c2, \c2, \d2, ror #24
|
||||
eor \b1, \c1, \b1, ror #20
|
||||
eor \b2, \c2, \b2, ror #20
|
||||
// brot == 32 - 7 == 25
|
||||
.endm
|
||||
|
||||
.macro _doubleround
|
||||
|
||||
// column round
|
||||
|
||||
// quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
|
||||
_halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
|
||||
|
||||
// save (x8, x9); restore (x10, x11)
|
||||
__strd X8_X10, X9_X11, sp, 0
|
||||
__ldrd X8_X10, X9_X11, sp, 8
|
||||
|
||||
// quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
|
||||
_halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
|
||||
|
||||
.set brot, 25
|
||||
.set drot, 24
|
||||
|
||||
// diagonal round
|
||||
|
||||
// quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
|
||||
_halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
|
||||
|
||||
// save (x10, x11); restore (x8, x9)
|
||||
__strd X8_X10, X9_X11, sp, 8
|
||||
__ldrd X8_X10, X9_X11, sp, 0
|
||||
|
||||
// quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
|
||||
_halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
|
||||
.endm
|
||||
|
||||
.macro _chacha_permute nrounds
|
||||
.set brot, 0
|
||||
.set drot, 0
|
||||
.rept \nrounds / 2
|
||||
_doubleround
|
||||
.endr
|
||||
.endm
|
||||
|
||||
.macro _chacha nrounds
|
||||
|
||||
.Lnext_block\@:
|
||||
// Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
|
||||
// Registers contain x0-x9,x12-x15.
|
||||
|
||||
// Do the core ChaCha permutation to update x0-x15.
|
||||
_chacha_permute \nrounds
|
||||
|
||||
add sp, #8
|
||||
// Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers contain x0-x9,x12-x15.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
|
||||
push {X8_X10, X9_X11, X12, X13, X14, X15}
|
||||
|
||||
// Load (OUT, IN, LEN).
|
||||
ldr r14, [sp, #96]
|
||||
ldr r12, [sp, #100]
|
||||
ldr r11, [sp, #104]
|
||||
|
||||
orr r10, r14, r12
|
||||
|
||||
// Use slow path if fewer than 64 bytes remain.
|
||||
cmp r11, #64
|
||||
blt .Lxor_slowpath\@
|
||||
|
||||
// Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
|
||||
// ARMv6+, since ldmia and stmia (used below) still require alignment.
|
||||
tst r10, #3
|
||||
bne .Lxor_slowpath\@
|
||||
|
||||
// Fast path: XOR 64 bytes of aligned data.
|
||||
|
||||
// Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// x0-x3
|
||||
__ldrd r8, r9, sp, 32
|
||||
__ldrd r10, r11, sp, 40
|
||||
add X0, X0, r8
|
||||
add X1, X1, r9
|
||||
add X2, X2, r10
|
||||
add X3, X3, r11
|
||||
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
ldmia r12!, {r8-r11}
|
||||
eor X0, X0, r8
|
||||
eor X1, X1, r9
|
||||
eor X2, X2, r10
|
||||
eor X3, X3, r11
|
||||
stmia r14!, {X0-X3}
|
||||
|
||||
// x4-x7
|
||||
__ldrd r8, r9, sp, 48
|
||||
__ldrd r10, r11, sp, 56
|
||||
add X4, r8, X4, ror #brot
|
||||
add X5, r9, X5, ror #brot
|
||||
ldmia r12!, {X0-X3}
|
||||
add X6, r10, X6, ror #brot
|
||||
add X7, r11, X7, ror #brot
|
||||
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
eor X4, X4, X0
|
||||
eor X5, X5, X1
|
||||
eor X6, X6, X2
|
||||
eor X7, X7, X3
|
||||
stmia r14!, {X4-X7}
|
||||
|
||||
// x8-x15
|
||||
pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
__ldrd r8, r9, sp, 32
|
||||
__ldrd r10, r11, sp, 40
|
||||
add r0, r0, r8 // x8
|
||||
add r1, r1, r9 // x9
|
||||
add r6, r6, r10 // x10
|
||||
add r7, r7, r11 // x11
|
||||
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
ldmia r12!, {r8-r11}
|
||||
eor r0, r0, r8 // x8
|
||||
eor r1, r1, r9 // x9
|
||||
eor r6, r6, r10 // x10
|
||||
eor r7, r7, r11 // x11
|
||||
stmia r14!, {r0,r1,r6,r7}
|
||||
ldmia r12!, {r0,r1,r6,r7}
|
||||
__ldrd r8, r9, sp, 48
|
||||
__ldrd r10, r11, sp, 56
|
||||
add r2, r8, r2, ror #drot // x12
|
||||
add r3, r9, r3, ror #drot // x13
|
||||
add r4, r10, r4, ror #drot // x14
|
||||
add r5, r11, r5, ror #drot // x15
|
||||
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
ldr r9, [sp, #72] // load LEN
|
||||
eor r2, r2, r0 // x12
|
||||
eor r3, r3, r1 // x13
|
||||
eor r4, r4, r6 // x14
|
||||
eor r5, r5, r7 // x15
|
||||
subs r9, #64 // decrement and check LEN
|
||||
stmia r14!, {r2-r5}
|
||||
|
||||
beq .Ldone\@
|
||||
|
||||
.Lprepare_for_next_block\@:
|
||||
|
||||
// Stack: x0-x15 OUT IN LEN
|
||||
|
||||
// Increment block counter (x12)
|
||||
add r8, #1
|
||||
|
||||
// Store updated (OUT, IN, LEN)
|
||||
str r14, [sp, #64]
|
||||
str r12, [sp, #68]
|
||||
str r9, [sp, #72]
|
||||
|
||||
mov r14, sp
|
||||
|
||||
// Store updated block counter (x12)
|
||||
str r8, [sp, #48]
|
||||
|
||||
sub sp, #16
|
||||
|
||||
// Reload state and do next block
|
||||
ldmia r14!, {r0-r11} // load x0-x11
|
||||
__strd r10, r11, sp, 8 // store x10-x11 before state
|
||||
ldmia r14, {r10-r12,r14} // load x12-x15
|
||||
b .Lnext_block\@
|
||||
|
||||
.Lxor_slowpath\@:
|
||||
// Slow path: < 64 bytes remaining, or unaligned input or output buffer.
|
||||
// We handle it by storing the 64 bytes of keystream to the stack, then
|
||||
// XOR-ing the needed portion with the data.
|
||||
|
||||
// Allocate keystream buffer
|
||||
sub sp, #64
|
||||
mov r14, sp
|
||||
|
||||
// Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
|
||||
// Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
|
||||
// x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
|
||||
|
||||
// Save keystream for x0-x3
|
||||
__ldrd r8, r9, sp, 96
|
||||
__ldrd r10, r11, sp, 104
|
||||
add X0, X0, r8
|
||||
add X1, X1, r9
|
||||
add X2, X2, r10
|
||||
add X3, X3, r11
|
||||
_le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
|
||||
stmia r14!, {X0-X3}
|
||||
|
||||
// Save keystream for x4-x7
|
||||
__ldrd r8, r9, sp, 112
|
||||
__ldrd r10, r11, sp, 120
|
||||
add X4, r8, X4, ror #brot
|
||||
add X5, r9, X5, ror #brot
|
||||
add X6, r10, X6, ror #brot
|
||||
add X7, r11, X7, ror #brot
|
||||
_le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
|
||||
add r8, sp, #64
|
||||
stmia r14!, {X4-X7}
|
||||
|
||||
// Save keystream for x8-x15
|
||||
ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
|
||||
__ldrd r8, r9, sp, 128
|
||||
__ldrd r10, r11, sp, 136
|
||||
add r0, r0, r8 // x8
|
||||
add r1, r1, r9 // x9
|
||||
add r6, r6, r10 // x10
|
||||
add r7, r7, r11 // x11
|
||||
_le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
|
||||
stmia r14!, {r0,r1,r6,r7}
|
||||
__ldrd r8, r9, sp, 144
|
||||
__ldrd r10, r11, sp, 152
|
||||
add r2, r8, r2, ror #drot // x12
|
||||
add r3, r9, r3, ror #drot // x13
|
||||
add r4, r10, r4, ror #drot // x14
|
||||
add r5, r11, r5, ror #drot // x15
|
||||
_le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
|
||||
stmia r14, {r2-r5}
|
||||
|
||||
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
|
||||
// Registers: r8 is block counter, r12 is IN.
|
||||
|
||||
ldr r9, [sp, #168] // LEN
|
||||
ldr r14, [sp, #160] // OUT
|
||||
cmp r9, #64
|
||||
mov r0, sp
|
||||
movle r1, r9
|
||||
movgt r1, #64
|
||||
// r1 is number of bytes to XOR, in range [1, 64]
|
||||
|
||||
.if __LINUX_ARM_ARCH__ < 6
|
||||
orr r2, r12, r14
|
||||
tst r2, #3 // IN or OUT misaligned?
|
||||
bne .Lxor_next_byte\@
|
||||
.endif
|
||||
|
||||
// XOR a word at a time
|
||||
.rept 16
|
||||
subs r1, #4
|
||||
blt .Lxor_words_done\@
|
||||
ldr r2, [r12], #4
|
||||
ldr r3, [r0], #4
|
||||
eor r2, r2, r3
|
||||
str r2, [r14], #4
|
||||
.endr
|
||||
b .Lxor_slowpath_done\@
|
||||
.Lxor_words_done\@:
|
||||
ands r1, r1, #3
|
||||
beq .Lxor_slowpath_done\@
|
||||
|
||||
// XOR a byte at a time
|
||||
.Lxor_next_byte\@:
|
||||
ldrb r2, [r12], #1
|
||||
ldrb r3, [r0], #1
|
||||
eor r2, r2, r3
|
||||
strb r2, [r14], #1
|
||||
subs r1, #1
|
||||
bne .Lxor_next_byte\@
|
||||
|
||||
.Lxor_slowpath_done\@:
|
||||
subs r9, #64
|
||||
add sp, #96
|
||||
bgt .Lprepare_for_next_block\@
|
||||
|
||||
.Ldone\@:
|
||||
.endm // _chacha
|
||||
|
||||
/*
|
||||
* void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
|
||||
* const u32 iv[4]);
|
||||
*/
|
||||
SYM_FUNC_START(chacha20_arm)
|
||||
cmp r2, #0 // len == 0?
|
||||
reteq lr
|
||||
|
||||
push {r0-r2,r4-r11,lr}
|
||||
|
||||
// Push state x0-x15 onto stack.
|
||||
// Also store an extra copy of x10-x11 just before the state.
|
||||
|
||||
ldr r4, [sp, #48] // iv
|
||||
mov r0, sp
|
||||
sub sp, #80
|
||||
|
||||
// iv: x12-x15
|
||||
ldm r4, {X12,X13,X14,X15}
|
||||
stmdb r0!, {X12,X13,X14,X15}
|
||||
|
||||
// key: x4-x11
|
||||
__ldrd X8_X10, X9_X11, r3, 24
|
||||
__strd X8_X10, X9_X11, sp, 8
|
||||
stmdb r0!, {X8_X10, X9_X11}
|
||||
ldm r3, {X4-X9_X11}
|
||||
stmdb r0!, {X4-X9_X11}
|
||||
|
||||
// constants: x0-x3
|
||||
adrl X3, .Lexpand_32byte_k
|
||||
ldm X3, {X0-X3}
|
||||
__strd X0, X1, sp, 16
|
||||
__strd X2, X3, sp, 24
|
||||
|
||||
_chacha 20
|
||||
|
||||
add sp, #76
|
||||
pop {r4-r11, pc}
|
||||
SYM_FUNC_END(chacha20_arm)
|
||||
|
||||
/*
|
||||
* void hchacha20_arm(const u32 state[16], u32 out[8]);
|
||||
*/
|
||||
SYM_FUNC_START(hchacha20_arm)
|
||||
push {r1,r4-r11,lr}
|
||||
|
||||
mov r14, r0
|
||||
ldmia r14!, {r0-r11} // load x0-x11
|
||||
push {r10-r11} // store x10-x11 to stack
|
||||
ldm r14, {r10-r12,r14} // load x12-x15
|
||||
sub sp, #8
|
||||
|
||||
_chacha_permute 20
|
||||
|
||||
// Skip over (unused0-unused1, x10-x11)
|
||||
add sp, #16
|
||||
|
||||
// Fix up rotations of x12-x15
|
||||
ror X12, X12, #drot
|
||||
ror X13, X13, #drot
|
||||
pop {r4} // load 'out'
|
||||
ror X14, X14, #drot
|
||||
ror X15, X15, #drot
|
||||
|
||||
// Store (x0-x3,x12-x15) to 'out'
|
||||
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
|
||||
|
||||
pop {r4-r11,pc}
|
||||
SYM_FUNC_END(hchacha20_arm)
|
105
net/wireguard/crypto/zinc/chacha20/chacha20-x86_64-glue.c
Normal file
105
net/wireguard/crypto/zinc/chacha20/chacha20-x86_64-glue.c
Normal file
@ -0,0 +1,105 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <asm/fpu/api.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel-family.h>
|
||||
|
||||
asmlinkage void hchacha20_ssse3(u32 *derived_key, const u8 *nonce,
|
||||
const u8 *key);
|
||||
asmlinkage void chacha20_ssse3(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void chacha20_avx2(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void chacha20_avx512(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
asmlinkage void chacha20_avx512vl(u8 *out, const u8 *in, const size_t len,
|
||||
const u32 key[8], const u32 counter[4]);
|
||||
|
||||
static bool chacha20_use_ssse3 __ro_after_init;
|
||||
static bool chacha20_use_avx2 __ro_after_init;
|
||||
static bool chacha20_use_avx512 __ro_after_init;
|
||||
static bool chacha20_use_avx512vl __ro_after_init;
|
||||
static bool *const chacha20_nobs[] __initconst = {
|
||||
&chacha20_use_ssse3, &chacha20_use_avx2, &chacha20_use_avx512,
|
||||
&chacha20_use_avx512vl };
|
||||
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
chacha20_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
|
||||
chacha20_use_avx2 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
#ifndef COMPAT_CANNOT_USE_AVX512
|
||||
chacha20_use_avx512 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
XFEATURE_MASK_AVX512, NULL) &&
|
||||
/* Skylake downclocks unacceptably much when using zmm. */
|
||||
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
|
||||
chacha20_use_avx512vl =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512VL) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
XFEATURE_MASK_AVX512, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < CHACHA20_BLOCK_SIZE ||
|
||||
PAGE_SIZE % CHACHA20_BLOCK_SIZE);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_AS_SSSE3) || !chacha20_use_ssse3 ||
|
||||
len <= CHACHA20_BLOCK_SIZE || !simd_use(simd_context))
|
||||
return false;
|
||||
|
||||
for (;;) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512 &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 8)
|
||||
chacha20_avx512(dst, src, bytes, ctx->key, ctx->counter);
|
||||
else if (IS_ENABLED(CONFIG_AS_AVX512) && chacha20_use_avx512vl &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 4)
|
||||
chacha20_avx512vl(dst, src, bytes, ctx->key, ctx->counter);
|
||||
else if (IS_ENABLED(CONFIG_AS_AVX2) && chacha20_use_avx2 &&
|
||||
len >= CHACHA20_BLOCK_SIZE * 4)
|
||||
chacha20_avx2(dst, src, bytes, ctx->key, ctx->counter);
|
||||
else
|
||||
chacha20_ssse3(dst, src, bytes, ctx->key, ctx->counter);
|
||||
ctx->counter[0] += (bytes + 63) / 64;
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
dst += bytes;
|
||||
src += bytes;
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_AS_SSSE3) && chacha20_use_ssse3 &&
|
||||
simd_use(simd_context)) {
|
||||
hchacha20_ssse3(derived_key, nonce, key);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
4106
net/wireguard/crypto/zinc/chacha20/chacha20-x86_64.pl
Normal file
4106
net/wireguard/crypto/zinc/chacha20/chacha20-x86_64.pl
Normal file
File diff suppressed because it is too large
Load Diff
191
net/wireguard/crypto/zinc/chacha20/chacha20.c
Normal file
191
net/wireguard/crypto/zinc/chacha20/chacha20.c
Normal file
@ -0,0 +1,191 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* Implementation of the ChaCha20 stream cipher.
|
||||
*
|
||||
* Information: https://cr.yp.to/chacha.html
|
||||
*/
|
||||
|
||||
#include <zinc/chacha20.h>
|
||||
#include "../selftest/run.h"
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <crypto/algapi.h> // For crypto_xor_cpy.
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_X86_64)
|
||||
#include "chacha20-x86_64-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
#include "chacha20-arm-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_MIPS)
|
||||
#include "chacha20-mips-glue.c"
|
||||
#else
|
||||
static bool *const chacha20_nobs[] __initconst = { };
|
||||
static void __init chacha20_fpu_init(void)
|
||||
{
|
||||
}
|
||||
static inline bool chacha20_arch(struct chacha20_ctx *ctx, u8 *dst,
|
||||
const u8 *src, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define QUARTER_ROUND(x, a, b, c, d) ( \
|
||||
x[a] += x[b], \
|
||||
x[d] = rol32((x[d] ^ x[a]), 16), \
|
||||
x[c] += x[d], \
|
||||
x[b] = rol32((x[b] ^ x[c]), 12), \
|
||||
x[a] += x[b], \
|
||||
x[d] = rol32((x[d] ^ x[a]), 8), \
|
||||
x[c] += x[d], \
|
||||
x[b] = rol32((x[b] ^ x[c]), 7) \
|
||||
)
|
||||
|
||||
#define C(i, j) (i * 4 + j)
|
||||
|
||||
#define DOUBLE_ROUND(x) ( \
|
||||
/* Column Round */ \
|
||||
QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
|
||||
QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
|
||||
QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
|
||||
QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
|
||||
/* Diagonal Round */ \
|
||||
QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
|
||||
QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
|
||||
QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
|
||||
QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
|
||||
)
|
||||
|
||||
#define TWENTY_ROUNDS(x) ( \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x), \
|
||||
DOUBLE_ROUND(x) \
|
||||
)
|
||||
|
||||
static void chacha20_block_generic(struct chacha20_ctx *ctx, __le32 *stream)
|
||||
{
|
||||
u32 x[CHACHA20_BLOCK_WORDS];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(x); ++i)
|
||||
x[i] = ctx->state[i];
|
||||
|
||||
TWENTY_ROUNDS(x);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(x); ++i)
|
||||
stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
|
||||
|
||||
ctx->counter[0] += 1;
|
||||
}
|
||||
|
||||
static void chacha20_generic(struct chacha20_ctx *ctx, u8 *out, const u8 *in,
|
||||
u32 len)
|
||||
{
|
||||
__le32 buf[CHACHA20_BLOCK_WORDS];
|
||||
|
||||
while (len >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_block_generic(ctx, buf);
|
||||
crypto_xor_cpy(out, in, (u8 *)buf, CHACHA20_BLOCK_SIZE);
|
||||
len -= CHACHA20_BLOCK_SIZE;
|
||||
out += CHACHA20_BLOCK_SIZE;
|
||||
in += CHACHA20_BLOCK_SIZE;
|
||||
}
|
||||
if (len) {
|
||||
chacha20_block_generic(ctx, buf);
|
||||
crypto_xor_cpy(out, in, (u8 *)buf, len);
|
||||
}
|
||||
}
|
||||
|
||||
void chacha20(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!chacha20_arch(ctx, dst, src, len, simd_context))
|
||||
chacha20_generic(ctx, dst, src, len);
|
||||
}
|
||||
|
||||
static void hchacha20_generic(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE])
|
||||
{
|
||||
u32 x[] = { CHACHA20_CONSTANT_EXPA,
|
||||
CHACHA20_CONSTANT_ND_3,
|
||||
CHACHA20_CONSTANT_2_BY,
|
||||
CHACHA20_CONSTANT_TE_K,
|
||||
get_unaligned_le32(key + 0),
|
||||
get_unaligned_le32(key + 4),
|
||||
get_unaligned_le32(key + 8),
|
||||
get_unaligned_le32(key + 12),
|
||||
get_unaligned_le32(key + 16),
|
||||
get_unaligned_le32(key + 20),
|
||||
get_unaligned_le32(key + 24),
|
||||
get_unaligned_le32(key + 28),
|
||||
get_unaligned_le32(nonce + 0),
|
||||
get_unaligned_le32(nonce + 4),
|
||||
get_unaligned_le32(nonce + 8),
|
||||
get_unaligned_le32(nonce + 12)
|
||||
};
|
||||
|
||||
TWENTY_ROUNDS(x);
|
||||
|
||||
memcpy(derived_key + 0, x + 0, sizeof(u32) * 4);
|
||||
memcpy(derived_key + 4, x + 12, sizeof(u32) * 4);
|
||||
}
|
||||
|
||||
/* Derived key should be 32-bit aligned */
|
||||
void hchacha20(u32 derived_key[CHACHA20_KEY_WORDS],
|
||||
const u8 nonce[HCHACHA20_NONCE_SIZE],
|
||||
const u8 key[HCHACHA20_KEY_SIZE], simd_context_t *simd_context)
|
||||
{
|
||||
if (!hchacha20_arch(derived_key, nonce, key, simd_context))
|
||||
hchacha20_generic(derived_key, nonce, key);
|
||||
}
|
||||
|
||||
#include "../selftest/chacha20.c"
|
||||
|
||||
static bool nosimd __initdata = false;
|
||||
|
||||
#ifndef COMPAT_ZINC_IS_A_MODULE
|
||||
int __init chacha20_mod_init(void)
|
||||
#else
|
||||
static int __init mod_init(void)
|
||||
#endif
|
||||
{
|
||||
if (!nosimd)
|
||||
chacha20_fpu_init();
|
||||
if (!selftest_run("chacha20", chacha20_selftest, chacha20_nobs,
|
||||
ARRAY_SIZE(chacha20_nobs)))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_ZINC_IS_A_MODULE
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_param(nosimd, bool, 0);
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("ChaCha20 stream cipher");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
#endif
|
398
net/wireguard/crypto/zinc/chacha20poly1305.c
Normal file
398
net/wireguard/crypto/zinc/chacha20poly1305.c
Normal file
@ -0,0 +1,398 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is an implementation of the ChaCha20Poly1305 AEAD construction.
|
||||
*
|
||||
* Information: https://tools.ietf.org/html/rfc8439
|
||||
*/
|
||||
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
#include <zinc/chacha20.h>
|
||||
#include <zinc/poly1305.h>
|
||||
#include "selftest/run.h"
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <crypto/scatterwalk.h> // For blkcipher_walk.
|
||||
|
||||
static const u8 pad0[CHACHA20_BLOCK_SIZE] = { 0 };
|
||||
|
||||
static inline void
|
||||
__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len, const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_ctx poly1305_state;
|
||||
struct chacha20_ctx chacha20_state;
|
||||
union {
|
||||
u8 block0[POLY1305_KEY_SIZE];
|
||||
__le64 lens[2];
|
||||
} b = { { 0 } };
|
||||
|
||||
chacha20_init(&chacha20_state, key, nonce);
|
||||
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
|
||||
simd_context);
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
chacha20(&chacha20_state, dst, src, src_len, simd_context);
|
||||
|
||||
poly1305_update(&poly1305_state, dst, src_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
b.lens[0] = cpu_to_le64(ad_len);
|
||||
b.lens[1] = cpu_to_le64(src_len);
|
||||
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
|
||||
simd_context);
|
||||
|
||||
poly1305_final(&poly1305_state, dst + src_len, simd_context);
|
||||
|
||||
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
|
||||
memzero_explicit(&b, sizeof(b));
|
||||
}
|
||||
|
||||
void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
simd_context_t simd_context;
|
||||
|
||||
simd_get(&simd_context);
|
||||
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key,
|
||||
&simd_context);
|
||||
simd_put(&simd_context);
|
||||
}
|
||||
|
||||
bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src,
|
||||
const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_ctx poly1305_state;
|
||||
struct chacha20_ctx chacha20_state;
|
||||
struct sg_mapping_iter miter;
|
||||
size_t partial = 0;
|
||||
ssize_t sl;
|
||||
union {
|
||||
u8 chacha20_stream[CHACHA20_BLOCK_SIZE];
|
||||
u8 block0[POLY1305_KEY_SIZE];
|
||||
u8 mac[POLY1305_MAC_SIZE];
|
||||
__le64 lens[2];
|
||||
} b __aligned(16) = { { 0 } };
|
||||
|
||||
if (WARN_ON(src_len > INT_MAX))
|
||||
return false;
|
||||
|
||||
chacha20_init(&chacha20_state, key, nonce);
|
||||
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
|
||||
simd_context);
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC);
|
||||
for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
|
||||
u8 *addr = miter.addr;
|
||||
size_t length = min_t(size_t, sl, miter.length);
|
||||
|
||||
if (unlikely(partial)) {
|
||||
size_t l = min(length, CHACHA20_BLOCK_SIZE - partial);
|
||||
|
||||
crypto_xor(addr, b.chacha20_stream + partial, l);
|
||||
partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1);
|
||||
|
||||
addr += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) {
|
||||
size_t l = length;
|
||||
|
||||
if (unlikely(length < sl))
|
||||
l &= ~(CHACHA20_BLOCK_SIZE - 1);
|
||||
chacha20(&chacha20_state, addr, addr, l, simd_context);
|
||||
addr += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (unlikely(length > 0)) {
|
||||
chacha20(&chacha20_state, b.chacha20_stream, pad0,
|
||||
CHACHA20_BLOCK_SIZE, simd_context);
|
||||
crypto_xor(addr, b.chacha20_stream, length);
|
||||
partial = length;
|
||||
}
|
||||
|
||||
poly1305_update(&poly1305_state, miter.addr,
|
||||
min_t(size_t, sl, miter.length), simd_context);
|
||||
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
b.lens[0] = cpu_to_le64(ad_len);
|
||||
b.lens[1] = cpu_to_le64(src_len);
|
||||
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
|
||||
simd_context);
|
||||
|
||||
if (likely(sl <= -POLY1305_MAC_SIZE))
|
||||
poly1305_final(&poly1305_state, miter.addr + miter.length + sl,
|
||||
simd_context);
|
||||
|
||||
sg_miter_stop(&miter);
|
||||
|
||||
if (unlikely(sl > -POLY1305_MAC_SIZE)) {
|
||||
poly1305_final(&poly1305_state, b.mac, simd_context);
|
||||
scatterwalk_map_and_copy(b.mac, src, src_len, sizeof(b.mac), 1);
|
||||
}
|
||||
|
||||
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
|
||||
memzero_explicit(&b, sizeof(b));
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len, const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_ctx poly1305_state;
|
||||
struct chacha20_ctx chacha20_state;
|
||||
int ret;
|
||||
size_t dst_len;
|
||||
union {
|
||||
u8 block0[POLY1305_KEY_SIZE];
|
||||
u8 mac[POLY1305_MAC_SIZE];
|
||||
__le64 lens[2];
|
||||
} b = { { 0 } };
|
||||
|
||||
if (unlikely(src_len < POLY1305_MAC_SIZE))
|
||||
return false;
|
||||
|
||||
chacha20_init(&chacha20_state, key, nonce);
|
||||
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
|
||||
simd_context);
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
dst_len = src_len - POLY1305_MAC_SIZE;
|
||||
poly1305_update(&poly1305_state, src, dst_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
b.lens[0] = cpu_to_le64(ad_len);
|
||||
b.lens[1] = cpu_to_le64(dst_len);
|
||||
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
|
||||
simd_context);
|
||||
|
||||
poly1305_final(&poly1305_state, b.mac, simd_context);
|
||||
|
||||
ret = crypto_memneq(b.mac, src + dst_len, POLY1305_MAC_SIZE);
|
||||
if (likely(!ret))
|
||||
chacha20(&chacha20_state, dst, src, dst_len, simd_context);
|
||||
|
||||
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
|
||||
memzero_explicit(&b, sizeof(b));
|
||||
|
||||
return !ret;
|
||||
}
|
||||
|
||||
bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
simd_context_t simd_context, ret;
|
||||
|
||||
simd_get(&simd_context);
|
||||
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce,
|
||||
key, &simd_context);
|
||||
simd_put(&simd_context);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src,
|
||||
size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u64 nonce,
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_ctx poly1305_state;
|
||||
struct chacha20_ctx chacha20_state;
|
||||
struct sg_mapping_iter miter;
|
||||
size_t partial = 0;
|
||||
ssize_t sl;
|
||||
union {
|
||||
u8 chacha20_stream[CHACHA20_BLOCK_SIZE];
|
||||
u8 block0[POLY1305_KEY_SIZE];
|
||||
struct {
|
||||
u8 read_mac[POLY1305_MAC_SIZE];
|
||||
u8 computed_mac[POLY1305_MAC_SIZE];
|
||||
};
|
||||
__le64 lens[2];
|
||||
} b __aligned(16) = { { 0 } };
|
||||
bool ret = false;
|
||||
|
||||
if (unlikely(src_len < POLY1305_MAC_SIZE || WARN_ON(src_len > INT_MAX)))
|
||||
return ret;
|
||||
src_len -= POLY1305_MAC_SIZE;
|
||||
|
||||
chacha20_init(&chacha20_state, key, nonce);
|
||||
chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0),
|
||||
simd_context);
|
||||
poly1305_init(&poly1305_state, b.block0);
|
||||
|
||||
poly1305_update(&poly1305_state, ad, ad_len, simd_context);
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
sg_miter_start(&miter, src, sg_nents(src), SG_MITER_TO_SG | SG_MITER_ATOMIC);
|
||||
for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
|
||||
u8 *addr = miter.addr;
|
||||
size_t length = min_t(size_t, sl, miter.length);
|
||||
|
||||
poly1305_update(&poly1305_state, addr, length, simd_context);
|
||||
|
||||
if (unlikely(partial)) {
|
||||
size_t l = min(length, CHACHA20_BLOCK_SIZE - partial);
|
||||
|
||||
crypto_xor(addr, b.chacha20_stream + partial, l);
|
||||
partial = (partial + l) & (CHACHA20_BLOCK_SIZE - 1);
|
||||
|
||||
addr += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (likely(length >= CHACHA20_BLOCK_SIZE || length == sl)) {
|
||||
size_t l = length;
|
||||
|
||||
if (unlikely(length < sl))
|
||||
l &= ~(CHACHA20_BLOCK_SIZE - 1);
|
||||
chacha20(&chacha20_state, addr, addr, l, simd_context);
|
||||
addr += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (unlikely(length > 0)) {
|
||||
chacha20(&chacha20_state, b.chacha20_stream, pad0,
|
||||
CHACHA20_BLOCK_SIZE, simd_context);
|
||||
crypto_xor(addr, b.chacha20_stream, length);
|
||||
partial = length;
|
||||
}
|
||||
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
|
||||
poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf,
|
||||
simd_context);
|
||||
|
||||
b.lens[0] = cpu_to_le64(ad_len);
|
||||
b.lens[1] = cpu_to_le64(src_len);
|
||||
poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens),
|
||||
simd_context);
|
||||
|
||||
if (likely(sl <= -POLY1305_MAC_SIZE)) {
|
||||
poly1305_final(&poly1305_state, b.computed_mac, simd_context);
|
||||
ret = !crypto_memneq(b.computed_mac,
|
||||
miter.addr + miter.length + sl,
|
||||
POLY1305_MAC_SIZE);
|
||||
}
|
||||
|
||||
sg_miter_stop(&miter);
|
||||
|
||||
if (unlikely(sl > -POLY1305_MAC_SIZE)) {
|
||||
poly1305_final(&poly1305_state, b.computed_mac, simd_context);
|
||||
scatterwalk_map_and_copy(b.read_mac, src, src_len,
|
||||
sizeof(b.read_mac), 0);
|
||||
ret = !crypto_memneq(b.read_mac, b.computed_mac,
|
||||
POLY1305_MAC_SIZE);
|
||||
|
||||
}
|
||||
|
||||
memzero_explicit(&chacha20_state, sizeof(chacha20_state));
|
||||
memzero_explicit(&b, sizeof(b));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
simd_context_t simd_context;
|
||||
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
|
||||
|
||||
simd_get(&simd_context);
|
||||
hchacha20(derived_key, nonce, key, &simd_context);
|
||||
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
|
||||
__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
|
||||
get_unaligned_le64(nonce + 16),
|
||||
(u8 *)derived_key, &simd_context);
|
||||
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
|
||||
simd_put(&simd_context);
|
||||
}
|
||||
|
||||
bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
|
||||
const u8 *ad, const size_t ad_len,
|
||||
const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
|
||||
const u8 key[CHACHA20POLY1305_KEY_SIZE])
|
||||
{
|
||||
bool ret;
|
||||
simd_context_t simd_context;
|
||||
u32 derived_key[CHACHA20_KEY_WORDS] __aligned(16);
|
||||
|
||||
simd_get(&simd_context);
|
||||
hchacha20(derived_key, nonce, key, &simd_context);
|
||||
cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
|
||||
ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
|
||||
get_unaligned_le64(nonce + 16),
|
||||
(u8 *)derived_key, &simd_context);
|
||||
memzero_explicit(derived_key, CHACHA20POLY1305_KEY_SIZE);
|
||||
simd_put(&simd_context);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#include "selftest/chacha20poly1305.c"
|
||||
|
||||
#ifndef COMPAT_ZINC_IS_A_MODULE
|
||||
int __init chacha20poly1305_mod_init(void)
|
||||
#else
|
||||
static int __init mod_init(void)
|
||||
#endif
|
||||
{
|
||||
if (!selftest_run("chacha20poly1305", chacha20poly1305_selftest,
|
||||
NULL, 0))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_ZINC_IS_A_MODULE
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
#endif
|
43
net/wireguard/crypto/zinc/curve25519/curve25519-arm-glue.c
Normal file
43
net/wireguard/crypto/zinc/curve25519/curve25519-arm-glue.c
Normal file
@ -0,0 +1,43 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <linux/simd.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
|
||||
asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE]);
|
||||
|
||||
static bool curve25519_use_neon __ro_after_init;
|
||||
static bool *const curve25519_nobs[] __initconst = { &curve25519_use_neon };
|
||||
static void __init curve25519_fpu_init(void)
|
||||
{
|
||||
curve25519_use_neon = elf_hwcap & HWCAP_NEON;
|
||||
}
|
||||
|
||||
static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
simd_context_t simd_context;
|
||||
bool used_arch = false;
|
||||
|
||||
simd_get(&simd_context);
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
!IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && curve25519_use_neon &&
|
||||
simd_use(&simd_context)) {
|
||||
curve25519_neon(mypublic, secret, basepoint);
|
||||
used_arch = true;
|
||||
}
|
||||
simd_put(&simd_context);
|
||||
return used_arch;
|
||||
}
|
||||
|
||||
static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
return false;
|
||||
}
|
2064
net/wireguard/crypto/zinc/curve25519/curve25519-arm.S
Normal file
2064
net/wireguard/crypto/zinc/curve25519/curve25519-arm.S
Normal file
File diff suppressed because it is too large
Load Diff
860
net/wireguard/crypto/zinc/curve25519/curve25519-fiat32.c
Normal file
860
net/wireguard/crypto/zinc/curve25519/curve25519-fiat32.c
Normal file
@ -0,0 +1,860 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2016 The fiat-crypto Authors.
|
||||
* Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is a machine-generated formally verified implementation of Curve25519
|
||||
* ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
|
||||
* machine generated, it has been tweaked to be suitable for use in the kernel.
|
||||
* It is optimized for 32-bit machines and machines that cannot work efficiently
|
||||
* with 128-bit integer types.
|
||||
*/
|
||||
|
||||
/* fe means field element. Here the field is \Z/(2^255-19). An element t,
|
||||
* entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
|
||||
* t[3]+2^102 t[4]+...+2^230 t[9].
|
||||
* fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
|
||||
* Multiplication and carrying produce fe from fe_loose.
|
||||
*/
|
||||
typedef struct fe { u32 v[10]; } fe;
|
||||
|
||||
/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
|
||||
* Addition and subtraction produce fe_loose from (fe, fe).
|
||||
*/
|
||||
typedef struct fe_loose { u32 v[10]; } fe_loose;
|
||||
|
||||
static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
|
||||
{
|
||||
/* Ignores top bit of s. */
|
||||
u32 a0 = get_unaligned_le32(s);
|
||||
u32 a1 = get_unaligned_le32(s+4);
|
||||
u32 a2 = get_unaligned_le32(s+8);
|
||||
u32 a3 = get_unaligned_le32(s+12);
|
||||
u32 a4 = get_unaligned_le32(s+16);
|
||||
u32 a5 = get_unaligned_le32(s+20);
|
||||
u32 a6 = get_unaligned_le32(s+24);
|
||||
u32 a7 = get_unaligned_le32(s+28);
|
||||
h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
|
||||
h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
|
||||
h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
|
||||
h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
|
||||
h[4] = (a3>> 6); /* (32- 6) = 26 */
|
||||
h[5] = a4&((1<<25)-1); /* 25 */
|
||||
h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
|
||||
h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
|
||||
h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
|
||||
h[9] = (a7>> 6)&((1<<25)-1); /* 25 */
|
||||
}
|
||||
|
||||
static __always_inline void fe_frombytes(fe *h, const u8 *s)
|
||||
{
|
||||
fe_frombytes_impl(h->v, s);
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 25 bits of result and 1 bit of carry
|
||||
* (26 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a + b + c;
|
||||
*low = x & ((1 << 25) - 1);
|
||||
return (x >> 25) & 1;
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 26 bits of result and 1 bit of carry
|
||||
* (27 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a + b + c;
|
||||
*low = x & ((1 << 26) - 1);
|
||||
return (x >> 26) & 1;
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 25 bits of result and 1 bit of borrow
|
||||
* (26 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a - b - c;
|
||||
*low = x & ((1 << 25) - 1);
|
||||
return x >> 31;
|
||||
}
|
||||
|
||||
static __always_inline u8 /*bool*/
|
||||
subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
|
||||
{
|
||||
/* This function extracts 26 bits of result and 1 bit of borrow
|
||||
*(27 total), so a 32-bit intermediate is sufficient.
|
||||
*/
|
||||
u32 x = a - b - c;
|
||||
*low = x & ((1 << 26) - 1);
|
||||
return x >> 31;
|
||||
}
|
||||
|
||||
static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
|
||||
{
|
||||
t = -!!t; /* all set if nonzero, 0 if 0 */
|
||||
return (t&nz) | ((~t)&z);
|
||||
}
|
||||
|
||||
static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
|
||||
{
|
||||
{ const u32 x17 = in1[9];
|
||||
{ const u32 x18 = in1[8];
|
||||
{ const u32 x16 = in1[7];
|
||||
{ const u32 x14 = in1[6];
|
||||
{ const u32 x12 = in1[5];
|
||||
{ const u32 x10 = in1[4];
|
||||
{ const u32 x8 = in1[3];
|
||||
{ const u32 x6 = in1[2];
|
||||
{ const u32 x4 = in1[1];
|
||||
{ const u32 x2 = in1[0];
|
||||
{ u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
|
||||
{ u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
|
||||
{ u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
|
||||
{ u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
|
||||
{ u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
|
||||
{ u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
|
||||
{ u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
|
||||
{ u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
|
||||
{ u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
|
||||
{ u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
|
||||
{ u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
|
||||
{ u32 x50 = (x49 & 0x3ffffed);
|
||||
{ u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
|
||||
{ u32 x54 = (x49 & 0x1ffffff);
|
||||
{ u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
|
||||
{ u32 x58 = (x49 & 0x3ffffff);
|
||||
{ u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
|
||||
{ u32 x62 = (x49 & 0x1ffffff);
|
||||
{ u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
|
||||
{ u32 x66 = (x49 & 0x3ffffff);
|
||||
{ u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
|
||||
{ u32 x70 = (x49 & 0x1ffffff);
|
||||
{ u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
|
||||
{ u32 x74 = (x49 & 0x3ffffff);
|
||||
{ u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
|
||||
{ u32 x78 = (x49 & 0x1ffffff);
|
||||
{ u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
|
||||
{ u32 x82 = (x49 & 0x3ffffff);
|
||||
{ u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
|
||||
{ u32 x86 = (x49 & 0x1ffffff);
|
||||
{ u32 x88; addcarryx_u25(x85, x47, x86, &x88);
|
||||
out[0] = x52;
|
||||
out[1] = x56;
|
||||
out[2] = x60;
|
||||
out[3] = x64;
|
||||
out[4] = x68;
|
||||
out[5] = x72;
|
||||
out[6] = x76;
|
||||
out[7] = x80;
|
||||
out[8] = x84;
|
||||
out[9] = x88;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_tobytes(u8 s[32], const fe *f)
|
||||
{
|
||||
u32 h[10];
|
||||
fe_freeze(h, f->v);
|
||||
s[0] = h[0] >> 0;
|
||||
s[1] = h[0] >> 8;
|
||||
s[2] = h[0] >> 16;
|
||||
s[3] = (h[0] >> 24) | (h[1] << 2);
|
||||
s[4] = h[1] >> 6;
|
||||
s[5] = h[1] >> 14;
|
||||
s[6] = (h[1] >> 22) | (h[2] << 3);
|
||||
s[7] = h[2] >> 5;
|
||||
s[8] = h[2] >> 13;
|
||||
s[9] = (h[2] >> 21) | (h[3] << 5);
|
||||
s[10] = h[3] >> 3;
|
||||
s[11] = h[3] >> 11;
|
||||
s[12] = (h[3] >> 19) | (h[4] << 6);
|
||||
s[13] = h[4] >> 2;
|
||||
s[14] = h[4] >> 10;
|
||||
s[15] = h[4] >> 18;
|
||||
s[16] = h[5] >> 0;
|
||||
s[17] = h[5] >> 8;
|
||||
s[18] = h[5] >> 16;
|
||||
s[19] = (h[5] >> 24) | (h[6] << 1);
|
||||
s[20] = h[6] >> 7;
|
||||
s[21] = h[6] >> 15;
|
||||
s[22] = (h[6] >> 23) | (h[7] << 3);
|
||||
s[23] = h[7] >> 5;
|
||||
s[24] = h[7] >> 13;
|
||||
s[25] = (h[7] >> 21) | (h[8] << 4);
|
||||
s[26] = h[8] >> 4;
|
||||
s[27] = h[8] >> 12;
|
||||
s[28] = (h[8] >> 20) | (h[9] << 6);
|
||||
s[29] = h[9] >> 2;
|
||||
s[30] = h[9] >> 10;
|
||||
s[31] = h[9] >> 18;
|
||||
}
|
||||
|
||||
/* h = f */
|
||||
static __always_inline void fe_copy(fe *h, const fe *f)
|
||||
{
|
||||
memmove(h, f, sizeof(u32) * 10);
|
||||
}
|
||||
|
||||
static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
|
||||
{
|
||||
memmove(h, f, sizeof(u32) * 10);
|
||||
}
|
||||
|
||||
/* h = 0 */
|
||||
static __always_inline void fe_0(fe *h)
|
||||
{
|
||||
memset(h, 0, sizeof(u32) * 10);
|
||||
}
|
||||
|
||||
/* h = 1 */
|
||||
static __always_inline void fe_1(fe *h)
|
||||
{
|
||||
memset(h, 0, sizeof(u32) * 10);
|
||||
h->v[0] = 1;
|
||||
}
|
||||
|
||||
static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = in2[9];
|
||||
{ const u32 x39 = in2[8];
|
||||
{ const u32 x37 = in2[7];
|
||||
{ const u32 x35 = in2[6];
|
||||
{ const u32 x33 = in2[5];
|
||||
{ const u32 x31 = in2[4];
|
||||
{ const u32 x29 = in2[3];
|
||||
{ const u32 x27 = in2[2];
|
||||
{ const u32 x25 = in2[1];
|
||||
{ const u32 x23 = in2[0];
|
||||
out[0] = (x5 + x23);
|
||||
out[1] = (x7 + x25);
|
||||
out[2] = (x9 + x27);
|
||||
out[3] = (x11 + x29);
|
||||
out[4] = (x13 + x31);
|
||||
out[5] = (x15 + x33);
|
||||
out[6] = (x17 + x35);
|
||||
out[7] = (x19 + x37);
|
||||
out[8] = (x21 + x39);
|
||||
out[9] = (x20 + x38);
|
||||
}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
/* h = f + g
|
||||
* Can overlap h with f or g.
|
||||
*/
|
||||
static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
|
||||
{
|
||||
fe_add_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = in2[9];
|
||||
{ const u32 x39 = in2[8];
|
||||
{ const u32 x37 = in2[7];
|
||||
{ const u32 x35 = in2[6];
|
||||
{ const u32 x33 = in2[5];
|
||||
{ const u32 x31 = in2[4];
|
||||
{ const u32 x29 = in2[3];
|
||||
{ const u32 x27 = in2[2];
|
||||
{ const u32 x25 = in2[1];
|
||||
{ const u32 x23 = in2[0];
|
||||
out[0] = ((0x7ffffda + x5) - x23);
|
||||
out[1] = ((0x3fffffe + x7) - x25);
|
||||
out[2] = ((0x7fffffe + x9) - x27);
|
||||
out[3] = ((0x3fffffe + x11) - x29);
|
||||
out[4] = ((0x7fffffe + x13) - x31);
|
||||
out[5] = ((0x3fffffe + x15) - x33);
|
||||
out[6] = ((0x7fffffe + x17) - x35);
|
||||
out[7] = ((0x3fffffe + x19) - x37);
|
||||
out[8] = ((0x7fffffe + x21) - x39);
|
||||
out[9] = ((0x3fffffe + x20) - x38);
|
||||
}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
/* h = f - g
|
||||
* Can overlap h with f or g.
|
||||
*/
|
||||
static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
|
||||
{
|
||||
fe_sub_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = in2[9];
|
||||
{ const u32 x39 = in2[8];
|
||||
{ const u32 x37 = in2[7];
|
||||
{ const u32 x35 = in2[6];
|
||||
{ const u32 x33 = in2[5];
|
||||
{ const u32 x31 = in2[4];
|
||||
{ const u32 x29 = in2[3];
|
||||
{ const u32 x27 = in2[2];
|
||||
{ const u32 x25 = in2[1];
|
||||
{ const u32 x23 = in2[0];
|
||||
{ u64 x40 = ((u64)x23 * x5);
|
||||
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
|
||||
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
|
||||
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
|
||||
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
|
||||
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
|
||||
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
|
||||
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
|
||||
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
|
||||
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
|
||||
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
|
||||
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
|
||||
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
|
||||
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
|
||||
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
|
||||
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
|
||||
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
|
||||
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
|
||||
{ u64 x58 = ((u64)(0x2 * x38) * x20);
|
||||
{ u64 x59 = (x48 + (x58 << 0x4));
|
||||
{ u64 x60 = (x59 + (x58 << 0x1));
|
||||
{ u64 x61 = (x60 + x58);
|
||||
{ u64 x62 = (x47 + (x57 << 0x4));
|
||||
{ u64 x63 = (x62 + (x57 << 0x1));
|
||||
{ u64 x64 = (x63 + x57);
|
||||
{ u64 x65 = (x46 + (x56 << 0x4));
|
||||
{ u64 x66 = (x65 + (x56 << 0x1));
|
||||
{ u64 x67 = (x66 + x56);
|
||||
{ u64 x68 = (x45 + (x55 << 0x4));
|
||||
{ u64 x69 = (x68 + (x55 << 0x1));
|
||||
{ u64 x70 = (x69 + x55);
|
||||
{ u64 x71 = (x44 + (x54 << 0x4));
|
||||
{ u64 x72 = (x71 + (x54 << 0x1));
|
||||
{ u64 x73 = (x72 + x54);
|
||||
{ u64 x74 = (x43 + (x53 << 0x4));
|
||||
{ u64 x75 = (x74 + (x53 << 0x1));
|
||||
{ u64 x76 = (x75 + x53);
|
||||
{ u64 x77 = (x42 + (x52 << 0x4));
|
||||
{ u64 x78 = (x77 + (x52 << 0x1));
|
||||
{ u64 x79 = (x78 + x52);
|
||||
{ u64 x80 = (x41 + (x51 << 0x4));
|
||||
{ u64 x81 = (x80 + (x51 << 0x1));
|
||||
{ u64 x82 = (x81 + x51);
|
||||
{ u64 x83 = (x40 + (x50 << 0x4));
|
||||
{ u64 x84 = (x83 + (x50 << 0x1));
|
||||
{ u64 x85 = (x84 + x50);
|
||||
{ u64 x86 = (x85 >> 0x1a);
|
||||
{ u32 x87 = ((u32)x85 & 0x3ffffff);
|
||||
{ u64 x88 = (x86 + x82);
|
||||
{ u64 x89 = (x88 >> 0x19);
|
||||
{ u32 x90 = ((u32)x88 & 0x1ffffff);
|
||||
{ u64 x91 = (x89 + x79);
|
||||
{ u64 x92 = (x91 >> 0x1a);
|
||||
{ u32 x93 = ((u32)x91 & 0x3ffffff);
|
||||
{ u64 x94 = (x92 + x76);
|
||||
{ u64 x95 = (x94 >> 0x19);
|
||||
{ u32 x96 = ((u32)x94 & 0x1ffffff);
|
||||
{ u64 x97 = (x95 + x73);
|
||||
{ u64 x98 = (x97 >> 0x1a);
|
||||
{ u32 x99 = ((u32)x97 & 0x3ffffff);
|
||||
{ u64 x100 = (x98 + x70);
|
||||
{ u64 x101 = (x100 >> 0x19);
|
||||
{ u32 x102 = ((u32)x100 & 0x1ffffff);
|
||||
{ u64 x103 = (x101 + x67);
|
||||
{ u64 x104 = (x103 >> 0x1a);
|
||||
{ u32 x105 = ((u32)x103 & 0x3ffffff);
|
||||
{ u64 x106 = (x104 + x64);
|
||||
{ u64 x107 = (x106 >> 0x19);
|
||||
{ u32 x108 = ((u32)x106 & 0x1ffffff);
|
||||
{ u64 x109 = (x107 + x61);
|
||||
{ u64 x110 = (x109 >> 0x1a);
|
||||
{ u32 x111 = ((u32)x109 & 0x3ffffff);
|
||||
{ u64 x112 = (x110 + x49);
|
||||
{ u64 x113 = (x112 >> 0x19);
|
||||
{ u32 x114 = ((u32)x112 & 0x1ffffff);
|
||||
{ u64 x115 = (x87 + (0x13 * x113));
|
||||
{ u32 x116 = (u32) (x115 >> 0x1a);
|
||||
{ u32 x117 = ((u32)x115 & 0x3ffffff);
|
||||
{ u32 x118 = (x116 + x90);
|
||||
{ u32 x119 = (x118 >> 0x19);
|
||||
{ u32 x120 = (x118 & 0x1ffffff);
|
||||
out[0] = x117;
|
||||
out[1] = x120;
|
||||
out[2] = (x119 + x93);
|
||||
out[3] = x96;
|
||||
out[4] = x99;
|
||||
out[5] = x102;
|
||||
out[6] = x105;
|
||||
out[7] = x108;
|
||||
out[8] = x111;
|
||||
out[9] = x114;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
|
||||
{
|
||||
fe_mul_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
|
||||
{
|
||||
fe_mul_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
|
||||
{
|
||||
fe_mul_impl(h->v, f->v, g->v);
|
||||
}
|
||||
|
||||
static void fe_sqr_impl(u32 out[10], const u32 in1[10])
|
||||
{
|
||||
{ const u32 x17 = in1[9];
|
||||
{ const u32 x18 = in1[8];
|
||||
{ const u32 x16 = in1[7];
|
||||
{ const u32 x14 = in1[6];
|
||||
{ const u32 x12 = in1[5];
|
||||
{ const u32 x10 = in1[4];
|
||||
{ const u32 x8 = in1[3];
|
||||
{ const u32 x6 = in1[2];
|
||||
{ const u32 x4 = in1[1];
|
||||
{ const u32 x2 = in1[0];
|
||||
{ u64 x19 = ((u64)x2 * x2);
|
||||
{ u64 x20 = ((u64)(0x2 * x2) * x4);
|
||||
{ u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
|
||||
{ u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
|
||||
{ u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
|
||||
{ u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
|
||||
{ u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
|
||||
{ u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
|
||||
{ u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
|
||||
{ u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
|
||||
{ u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
|
||||
{ u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
|
||||
{ u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
|
||||
{ u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
|
||||
{ u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
|
||||
{ u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
|
||||
{ u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
|
||||
{ u64 x36 = ((u64)(0x2 * x18) * x17);
|
||||
{ u64 x37 = ((u64)(0x2 * x17) * x17);
|
||||
{ u64 x38 = (x27 + (x37 << 0x4));
|
||||
{ u64 x39 = (x38 + (x37 << 0x1));
|
||||
{ u64 x40 = (x39 + x37);
|
||||
{ u64 x41 = (x26 + (x36 << 0x4));
|
||||
{ u64 x42 = (x41 + (x36 << 0x1));
|
||||
{ u64 x43 = (x42 + x36);
|
||||
{ u64 x44 = (x25 + (x35 << 0x4));
|
||||
{ u64 x45 = (x44 + (x35 << 0x1));
|
||||
{ u64 x46 = (x45 + x35);
|
||||
{ u64 x47 = (x24 + (x34 << 0x4));
|
||||
{ u64 x48 = (x47 + (x34 << 0x1));
|
||||
{ u64 x49 = (x48 + x34);
|
||||
{ u64 x50 = (x23 + (x33 << 0x4));
|
||||
{ u64 x51 = (x50 + (x33 << 0x1));
|
||||
{ u64 x52 = (x51 + x33);
|
||||
{ u64 x53 = (x22 + (x32 << 0x4));
|
||||
{ u64 x54 = (x53 + (x32 << 0x1));
|
||||
{ u64 x55 = (x54 + x32);
|
||||
{ u64 x56 = (x21 + (x31 << 0x4));
|
||||
{ u64 x57 = (x56 + (x31 << 0x1));
|
||||
{ u64 x58 = (x57 + x31);
|
||||
{ u64 x59 = (x20 + (x30 << 0x4));
|
||||
{ u64 x60 = (x59 + (x30 << 0x1));
|
||||
{ u64 x61 = (x60 + x30);
|
||||
{ u64 x62 = (x19 + (x29 << 0x4));
|
||||
{ u64 x63 = (x62 + (x29 << 0x1));
|
||||
{ u64 x64 = (x63 + x29);
|
||||
{ u64 x65 = (x64 >> 0x1a);
|
||||
{ u32 x66 = ((u32)x64 & 0x3ffffff);
|
||||
{ u64 x67 = (x65 + x61);
|
||||
{ u64 x68 = (x67 >> 0x19);
|
||||
{ u32 x69 = ((u32)x67 & 0x1ffffff);
|
||||
{ u64 x70 = (x68 + x58);
|
||||
{ u64 x71 = (x70 >> 0x1a);
|
||||
{ u32 x72 = ((u32)x70 & 0x3ffffff);
|
||||
{ u64 x73 = (x71 + x55);
|
||||
{ u64 x74 = (x73 >> 0x19);
|
||||
{ u32 x75 = ((u32)x73 & 0x1ffffff);
|
||||
{ u64 x76 = (x74 + x52);
|
||||
{ u64 x77 = (x76 >> 0x1a);
|
||||
{ u32 x78 = ((u32)x76 & 0x3ffffff);
|
||||
{ u64 x79 = (x77 + x49);
|
||||
{ u64 x80 = (x79 >> 0x19);
|
||||
{ u32 x81 = ((u32)x79 & 0x1ffffff);
|
||||
{ u64 x82 = (x80 + x46);
|
||||
{ u64 x83 = (x82 >> 0x1a);
|
||||
{ u32 x84 = ((u32)x82 & 0x3ffffff);
|
||||
{ u64 x85 = (x83 + x43);
|
||||
{ u64 x86 = (x85 >> 0x19);
|
||||
{ u32 x87 = ((u32)x85 & 0x1ffffff);
|
||||
{ u64 x88 = (x86 + x40);
|
||||
{ u64 x89 = (x88 >> 0x1a);
|
||||
{ u32 x90 = ((u32)x88 & 0x3ffffff);
|
||||
{ u64 x91 = (x89 + x28);
|
||||
{ u64 x92 = (x91 >> 0x19);
|
||||
{ u32 x93 = ((u32)x91 & 0x1ffffff);
|
||||
{ u64 x94 = (x66 + (0x13 * x92));
|
||||
{ u32 x95 = (u32) (x94 >> 0x1a);
|
||||
{ u32 x96 = ((u32)x94 & 0x3ffffff);
|
||||
{ u32 x97 = (x95 + x69);
|
||||
{ u32 x98 = (x97 >> 0x19);
|
||||
{ u32 x99 = (x97 & 0x1ffffff);
|
||||
out[0] = x96;
|
||||
out[1] = x99;
|
||||
out[2] = (x98 + x72);
|
||||
out[3] = x75;
|
||||
out[4] = x78;
|
||||
out[5] = x81;
|
||||
out[6] = x84;
|
||||
out[7] = x87;
|
||||
out[8] = x90;
|
||||
out[9] = x93;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
|
||||
{
|
||||
fe_sqr_impl(h->v, f->v);
|
||||
}
|
||||
|
||||
static __always_inline void fe_sq_tt(fe *h, const fe *f)
|
||||
{
|
||||
fe_sqr_impl(h->v, f->v);
|
||||
}
|
||||
|
||||
static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
|
||||
{
|
||||
fe t0;
|
||||
fe t1;
|
||||
fe t2;
|
||||
fe t3;
|
||||
int i;
|
||||
|
||||
fe_sq_tl(&t0, z);
|
||||
fe_sq_tt(&t1, &t0);
|
||||
for (i = 1; i < 2; ++i)
|
||||
fe_sq_tt(&t1, &t1);
|
||||
fe_mul_tlt(&t1, z, &t1);
|
||||
fe_mul_ttt(&t0, &t0, &t1);
|
||||
fe_sq_tt(&t2, &t0);
|
||||
fe_mul_ttt(&t1, &t1, &t2);
|
||||
fe_sq_tt(&t2, &t1);
|
||||
for (i = 1; i < 5; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t1, &t2, &t1);
|
||||
fe_sq_tt(&t2, &t1);
|
||||
for (i = 1; i < 10; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t2, &t2, &t1);
|
||||
fe_sq_tt(&t3, &t2);
|
||||
for (i = 1; i < 20; ++i)
|
||||
fe_sq_tt(&t3, &t3);
|
||||
fe_mul_ttt(&t2, &t3, &t2);
|
||||
fe_sq_tt(&t2, &t2);
|
||||
for (i = 1; i < 10; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t1, &t2, &t1);
|
||||
fe_sq_tt(&t2, &t1);
|
||||
for (i = 1; i < 50; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t2, &t2, &t1);
|
||||
fe_sq_tt(&t3, &t2);
|
||||
for (i = 1; i < 100; ++i)
|
||||
fe_sq_tt(&t3, &t3);
|
||||
fe_mul_ttt(&t2, &t3, &t2);
|
||||
fe_sq_tt(&t2, &t2);
|
||||
for (i = 1; i < 50; ++i)
|
||||
fe_sq_tt(&t2, &t2);
|
||||
fe_mul_ttt(&t1, &t2, &t1);
|
||||
fe_sq_tt(&t1, &t1);
|
||||
for (i = 1; i < 5; ++i)
|
||||
fe_sq_tt(&t1, &t1);
|
||||
fe_mul_ttt(out, &t1, &t0);
|
||||
}
|
||||
|
||||
static __always_inline void fe_invert(fe *out, const fe *z)
|
||||
{
|
||||
fe_loose l;
|
||||
fe_copy_lt(&l, z);
|
||||
fe_loose_invert(out, &l);
|
||||
}
|
||||
|
||||
/* Replace (f,g) with (g,f) if b == 1;
|
||||
* replace (f,g) with (f,g) if b == 0.
|
||||
*
|
||||
* Preconditions: b in {0,1}
|
||||
*/
|
||||
static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
|
||||
{
|
||||
unsigned i;
|
||||
b = 0 - b;
|
||||
for (i = 0; i < 10; i++) {
|
||||
u32 x = f->v[i] ^ g->v[i];
|
||||
x &= b;
|
||||
f->v[i] ^= x;
|
||||
g->v[i] ^= x;
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
|
||||
static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
|
||||
{
|
||||
{ const u32 x20 = in1[9];
|
||||
{ const u32 x21 = in1[8];
|
||||
{ const u32 x19 = in1[7];
|
||||
{ const u32 x17 = in1[6];
|
||||
{ const u32 x15 = in1[5];
|
||||
{ const u32 x13 = in1[4];
|
||||
{ const u32 x11 = in1[3];
|
||||
{ const u32 x9 = in1[2];
|
||||
{ const u32 x7 = in1[1];
|
||||
{ const u32 x5 = in1[0];
|
||||
{ const u32 x38 = 0;
|
||||
{ const u32 x39 = 0;
|
||||
{ const u32 x37 = 0;
|
||||
{ const u32 x35 = 0;
|
||||
{ const u32 x33 = 0;
|
||||
{ const u32 x31 = 0;
|
||||
{ const u32 x29 = 0;
|
||||
{ const u32 x27 = 0;
|
||||
{ const u32 x25 = 0;
|
||||
{ const u32 x23 = 121666;
|
||||
{ u64 x40 = ((u64)x23 * x5);
|
||||
{ u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
|
||||
{ u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
|
||||
{ u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
|
||||
{ u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
|
||||
{ u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
|
||||
{ u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
|
||||
{ u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
|
||||
{ u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
|
||||
{ u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
|
||||
{ u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
|
||||
{ u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
|
||||
{ u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
|
||||
{ u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
|
||||
{ u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
|
||||
{ u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
|
||||
{ u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
|
||||
{ u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
|
||||
{ u64 x58 = ((u64)(0x2 * x38) * x20);
|
||||
{ u64 x59 = (x48 + (x58 << 0x4));
|
||||
{ u64 x60 = (x59 + (x58 << 0x1));
|
||||
{ u64 x61 = (x60 + x58);
|
||||
{ u64 x62 = (x47 + (x57 << 0x4));
|
||||
{ u64 x63 = (x62 + (x57 << 0x1));
|
||||
{ u64 x64 = (x63 + x57);
|
||||
{ u64 x65 = (x46 + (x56 << 0x4));
|
||||
{ u64 x66 = (x65 + (x56 << 0x1));
|
||||
{ u64 x67 = (x66 + x56);
|
||||
{ u64 x68 = (x45 + (x55 << 0x4));
|
||||
{ u64 x69 = (x68 + (x55 << 0x1));
|
||||
{ u64 x70 = (x69 + x55);
|
||||
{ u64 x71 = (x44 + (x54 << 0x4));
|
||||
{ u64 x72 = (x71 + (x54 << 0x1));
|
||||
{ u64 x73 = (x72 + x54);
|
||||
{ u64 x74 = (x43 + (x53 << 0x4));
|
||||
{ u64 x75 = (x74 + (x53 << 0x1));
|
||||
{ u64 x76 = (x75 + x53);
|
||||
{ u64 x77 = (x42 + (x52 << 0x4));
|
||||
{ u64 x78 = (x77 + (x52 << 0x1));
|
||||
{ u64 x79 = (x78 + x52);
|
||||
{ u64 x80 = (x41 + (x51 << 0x4));
|
||||
{ u64 x81 = (x80 + (x51 << 0x1));
|
||||
{ u64 x82 = (x81 + x51);
|
||||
{ u64 x83 = (x40 + (x50 << 0x4));
|
||||
{ u64 x84 = (x83 + (x50 << 0x1));
|
||||
{ u64 x85 = (x84 + x50);
|
||||
{ u64 x86 = (x85 >> 0x1a);
|
||||
{ u32 x87 = ((u32)x85 & 0x3ffffff);
|
||||
{ u64 x88 = (x86 + x82);
|
||||
{ u64 x89 = (x88 >> 0x19);
|
||||
{ u32 x90 = ((u32)x88 & 0x1ffffff);
|
||||
{ u64 x91 = (x89 + x79);
|
||||
{ u64 x92 = (x91 >> 0x1a);
|
||||
{ u32 x93 = ((u32)x91 & 0x3ffffff);
|
||||
{ u64 x94 = (x92 + x76);
|
||||
{ u64 x95 = (x94 >> 0x19);
|
||||
{ u32 x96 = ((u32)x94 & 0x1ffffff);
|
||||
{ u64 x97 = (x95 + x73);
|
||||
{ u64 x98 = (x97 >> 0x1a);
|
||||
{ u32 x99 = ((u32)x97 & 0x3ffffff);
|
||||
{ u64 x100 = (x98 + x70);
|
||||
{ u64 x101 = (x100 >> 0x19);
|
||||
{ u32 x102 = ((u32)x100 & 0x1ffffff);
|
||||
{ u64 x103 = (x101 + x67);
|
||||
{ u64 x104 = (x103 >> 0x1a);
|
||||
{ u32 x105 = ((u32)x103 & 0x3ffffff);
|
||||
{ u64 x106 = (x104 + x64);
|
||||
{ u64 x107 = (x106 >> 0x19);
|
||||
{ u32 x108 = ((u32)x106 & 0x1ffffff);
|
||||
{ u64 x109 = (x107 + x61);
|
||||
{ u64 x110 = (x109 >> 0x1a);
|
||||
{ u32 x111 = ((u32)x109 & 0x3ffffff);
|
||||
{ u64 x112 = (x110 + x49);
|
||||
{ u64 x113 = (x112 >> 0x19);
|
||||
{ u32 x114 = ((u32)x112 & 0x1ffffff);
|
||||
{ u64 x115 = (x87 + (0x13 * x113));
|
||||
{ u32 x116 = (u32) (x115 >> 0x1a);
|
||||
{ u32 x117 = ((u32)x115 & 0x3ffffff);
|
||||
{ u32 x118 = (x116 + x90);
|
||||
{ u32 x119 = (x118 >> 0x19);
|
||||
{ u32 x120 = (x118 & 0x1ffffff);
|
||||
out[0] = x117;
|
||||
out[1] = x120;
|
||||
out[2] = (x119 + x93);
|
||||
out[3] = x96;
|
||||
out[4] = x99;
|
||||
out[5] = x102;
|
||||
out[6] = x105;
|
||||
out[7] = x108;
|
||||
out[8] = x111;
|
||||
out[9] = x114;
|
||||
}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
|
||||
}
|
||||
|
||||
static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
|
||||
{
|
||||
fe_mul_121666_impl(h->v, f->v);
|
||||
}
|
||||
|
||||
static void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
|
||||
const u8 scalar[CURVE25519_KEY_SIZE],
|
||||
const u8 point[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
fe x1, x2, z2, x3, z3;
|
||||
fe_loose x2l, z2l, x3l;
|
||||
unsigned swap = 0;
|
||||
int pos;
|
||||
u8 e[32];
|
||||
|
||||
memcpy(e, scalar, 32);
|
||||
curve25519_clamp_secret(e);
|
||||
|
||||
/* The following implementation was transcribed to Coq and proven to
|
||||
* correspond to unary scalar multiplication in affine coordinates given
|
||||
* that x1 != 0 is the x coordinate of some point on the curve. It was
|
||||
* also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
|
||||
* z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
|
||||
* quantified over the underlying field, so it applies to Curve25519
|
||||
* itself and the quadratic twist of Curve25519. It was not proven in
|
||||
* Coq that prime-field arithmetic correctly simulates extension-field
|
||||
* arithmetic on prime-field values. The decoding of the byte array
|
||||
* representation of e was not considered.
|
||||
*
|
||||
* Specification of Montgomery curves in affine coordinates:
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
|
||||
*
|
||||
* Proof that these form a group that is isomorphic to a Weierstrass
|
||||
* curve:
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
|
||||
*
|
||||
* Coq transcription and correctness proof of the loop
|
||||
* (where scalarbits=255):
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
|
||||
* preconditions: 0 <= e < 2^255 (not necessarily e < order),
|
||||
* fe_invert(0) = 0
|
||||
*/
|
||||
fe_frombytes(&x1, point);
|
||||
fe_1(&x2);
|
||||
fe_0(&z2);
|
||||
fe_copy(&x3, &x1);
|
||||
fe_1(&z3);
|
||||
|
||||
for (pos = 254; pos >= 0; --pos) {
|
||||
fe tmp0, tmp1;
|
||||
fe_loose tmp0l, tmp1l;
|
||||
/* loop invariant as of right before the test, for the case
|
||||
* where x1 != 0:
|
||||
* pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
|
||||
* is nonzero
|
||||
* let r := e >> (pos+1) in the following equalities of
|
||||
* projective points:
|
||||
* to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
|
||||
* to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
|
||||
* x1 is the nonzero x coordinate of the nonzero
|
||||
* point (r*P-(r+1)*P)
|
||||
*/
|
||||
unsigned b = 1 & (e[pos / 8] >> (pos & 7));
|
||||
swap ^= b;
|
||||
fe_cswap(&x2, &x3, swap);
|
||||
fe_cswap(&z2, &z3, swap);
|
||||
swap = b;
|
||||
/* Coq transcription of ladderstep formula (called from
|
||||
* transcribed loop):
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
|
||||
* <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
|
||||
* x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
|
||||
* x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
|
||||
*/
|
||||
fe_sub(&tmp0l, &x3, &z3);
|
||||
fe_sub(&tmp1l, &x2, &z2);
|
||||
fe_add(&x2l, &x2, &z2);
|
||||
fe_add(&z2l, &x3, &z3);
|
||||
fe_mul_tll(&z3, &tmp0l, &x2l);
|
||||
fe_mul_tll(&z2, &z2l, &tmp1l);
|
||||
fe_sq_tl(&tmp0, &tmp1l);
|
||||
fe_sq_tl(&tmp1, &x2l);
|
||||
fe_add(&x3l, &z3, &z2);
|
||||
fe_sub(&z2l, &z3, &z2);
|
||||
fe_mul_ttt(&x2, &tmp1, &tmp0);
|
||||
fe_sub(&tmp1l, &tmp1, &tmp0);
|
||||
fe_sq_tl(&z2, &z2l);
|
||||
fe_mul121666(&z3, &tmp1l);
|
||||
fe_sq_tl(&x3, &x3l);
|
||||
fe_add(&tmp0l, &tmp0, &z3);
|
||||
fe_mul_ttt(&z3, &x1, &z2);
|
||||
fe_mul_tll(&z2, &tmp1l, &tmp0l);
|
||||
}
|
||||
/* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
|
||||
* else (x2, z2)
|
||||
*/
|
||||
fe_cswap(&x2, &x3, swap);
|
||||
fe_cswap(&z2, &z3, swap);
|
||||
|
||||
fe_invert(&z2, &z2);
|
||||
fe_mul_ttt(&x2, &x2, &z2);
|
||||
fe_tobytes(out, &x2);
|
||||
|
||||
memzero_explicit(&x1, sizeof(x1));
|
||||
memzero_explicit(&x2, sizeof(x2));
|
||||
memzero_explicit(&z2, sizeof(z2));
|
||||
memzero_explicit(&x3, sizeof(x3));
|
||||
memzero_explicit(&z3, sizeof(z3));
|
||||
memzero_explicit(&x2l, sizeof(x2l));
|
||||
memzero_explicit(&z2l, sizeof(z2l));
|
||||
memzero_explicit(&x3l, sizeof(x3l));
|
||||
memzero_explicit(&e, sizeof(e));
|
||||
}
|
779
net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c
Normal file
779
net/wireguard/crypto/zinc/curve25519/curve25519-hacl64.c
Normal file
@ -0,0 +1,779 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2016-2017 INRIA and Microsoft Corporation.
|
||||
* Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is a machine-generated formally verified implementation of Curve25519
|
||||
* ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine
|
||||
* generated, it has been tweaked to be suitable for use in the kernel. It is
|
||||
* optimized for 64-bit machines that can efficiently work with 128-bit
|
||||
* integer types.
|
||||
*/
|
||||
|
||||
typedef __uint128_t u128;
|
||||
|
||||
static __always_inline u64 u64_eq_mask(u64 a, u64 b)
|
||||
{
|
||||
u64 x = a ^ b;
|
||||
u64 minus_x = ~x + (u64)1U;
|
||||
u64 x_or_minus_x = x | minus_x;
|
||||
u64 xnx = x_or_minus_x >> (u32)63U;
|
||||
u64 c = xnx - (u64)1U;
|
||||
return c;
|
||||
}
|
||||
|
||||
static __always_inline u64 u64_gte_mask(u64 a, u64 b)
|
||||
{
|
||||
u64 x = a;
|
||||
u64 y = b;
|
||||
u64 x_xor_y = x ^ y;
|
||||
u64 x_sub_y = x - y;
|
||||
u64 x_sub_y_xor_y = x_sub_y ^ y;
|
||||
u64 q = x_xor_y | x_sub_y_xor_y;
|
||||
u64 x_xor_q = x ^ q;
|
||||
u64 x_xor_q_ = x_xor_q >> (u32)63U;
|
||||
u64 c = x_xor_q_ - (u64)1U;
|
||||
return c;
|
||||
}
|
||||
|
||||
static __always_inline void modulo_carry_top(u64 *b)
|
||||
{
|
||||
u64 b4 = b[4];
|
||||
u64 b0 = b[0];
|
||||
u64 b4_ = b4 & 0x7ffffffffffffLLU;
|
||||
u64 b0_ = b0 + 19 * (b4 >> 51);
|
||||
b[4] = b4_;
|
||||
b[0] = b0_;
|
||||
}
|
||||
|
||||
static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
|
||||
{
|
||||
{
|
||||
u128 xi = input[0];
|
||||
output[0] = ((u64)(xi));
|
||||
}
|
||||
{
|
||||
u128 xi = input[1];
|
||||
output[1] = ((u64)(xi));
|
||||
}
|
||||
{
|
||||
u128 xi = input[2];
|
||||
output[2] = ((u64)(xi));
|
||||
}
|
||||
{
|
||||
u128 xi = input[3];
|
||||
output[3] = ((u64)(xi));
|
||||
}
|
||||
{
|
||||
u128 xi = input[4];
|
||||
output[4] = ((u64)(xi));
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s)
|
||||
{
|
||||
output[0] += (u128)input[0] * s;
|
||||
output[1] += (u128)input[1] * s;
|
||||
output[2] += (u128)input[2] * s;
|
||||
output[3] += (u128)input[3] * s;
|
||||
output[4] += (u128)input[4] * s;
|
||||
}
|
||||
|
||||
static __always_inline void fproduct_carry_wide_(u128 *tmp)
|
||||
{
|
||||
{
|
||||
u32 ctr = 0;
|
||||
u128 tctr = tmp[ctr];
|
||||
u128 tctrp1 = tmp[ctr + 1];
|
||||
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
|
||||
u128 c = ((tctr) >> (51));
|
||||
tmp[ctr] = ((u128)(r0));
|
||||
tmp[ctr + 1] = ((tctrp1) + (c));
|
||||
}
|
||||
{
|
||||
u32 ctr = 1;
|
||||
u128 tctr = tmp[ctr];
|
||||
u128 tctrp1 = tmp[ctr + 1];
|
||||
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
|
||||
u128 c = ((tctr) >> (51));
|
||||
tmp[ctr] = ((u128)(r0));
|
||||
tmp[ctr + 1] = ((tctrp1) + (c));
|
||||
}
|
||||
|
||||
{
|
||||
u32 ctr = 2;
|
||||
u128 tctr = tmp[ctr];
|
||||
u128 tctrp1 = tmp[ctr + 1];
|
||||
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
|
||||
u128 c = ((tctr) >> (51));
|
||||
tmp[ctr] = ((u128)(r0));
|
||||
tmp[ctr + 1] = ((tctrp1) + (c));
|
||||
}
|
||||
{
|
||||
u32 ctr = 3;
|
||||
u128 tctr = tmp[ctr];
|
||||
u128 tctrp1 = tmp[ctr + 1];
|
||||
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
|
||||
u128 c = ((tctr) >> (51));
|
||||
tmp[ctr] = ((u128)(r0));
|
||||
tmp[ctr + 1] = ((tctrp1) + (c));
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void fmul_shift_reduce(u64 *output)
|
||||
{
|
||||
u64 tmp = output[4];
|
||||
u64 b0;
|
||||
{
|
||||
u32 ctr = 5 - 0 - 1;
|
||||
u64 z = output[ctr - 1];
|
||||
output[ctr] = z;
|
||||
}
|
||||
{
|
||||
u32 ctr = 5 - 1 - 1;
|
||||
u64 z = output[ctr - 1];
|
||||
output[ctr] = z;
|
||||
}
|
||||
{
|
||||
u32 ctr = 5 - 2 - 1;
|
||||
u64 z = output[ctr - 1];
|
||||
output[ctr] = z;
|
||||
}
|
||||
{
|
||||
u32 ctr = 5 - 3 - 1;
|
||||
u64 z = output[ctr - 1];
|
||||
output[ctr] = z;
|
||||
}
|
||||
output[0] = tmp;
|
||||
b0 = output[0];
|
||||
output[0] = 19 * b0;
|
||||
}
|
||||
|
||||
static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input,
|
||||
u64 *input21)
|
||||
{
|
||||
u32 i;
|
||||
u64 input2i;
|
||||
{
|
||||
u64 input2i = input21[0];
|
||||
fproduct_sum_scalar_multiplication_(output, input, input2i);
|
||||
fmul_shift_reduce(input);
|
||||
}
|
||||
{
|
||||
u64 input2i = input21[1];
|
||||
fproduct_sum_scalar_multiplication_(output, input, input2i);
|
||||
fmul_shift_reduce(input);
|
||||
}
|
||||
{
|
||||
u64 input2i = input21[2];
|
||||
fproduct_sum_scalar_multiplication_(output, input, input2i);
|
||||
fmul_shift_reduce(input);
|
||||
}
|
||||
{
|
||||
u64 input2i = input21[3];
|
||||
fproduct_sum_scalar_multiplication_(output, input, input2i);
|
||||
fmul_shift_reduce(input);
|
||||
}
|
||||
i = 4;
|
||||
input2i = input21[i];
|
||||
fproduct_sum_scalar_multiplication_(output, input, input2i);
|
||||
}
|
||||
|
||||
static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21)
|
||||
{
|
||||
u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] };
|
||||
{
|
||||
u128 b4;
|
||||
u128 b0;
|
||||
u128 b4_;
|
||||
u128 b0_;
|
||||
u64 i0;
|
||||
u64 i1;
|
||||
u64 i0_;
|
||||
u64 i1_;
|
||||
u128 t[5] = { 0 };
|
||||
fmul_mul_shift_reduce_(t, tmp, input21);
|
||||
fproduct_carry_wide_(t);
|
||||
b4 = t[4];
|
||||
b0 = t[0];
|
||||
b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
|
||||
b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
|
||||
t[4] = b4_;
|
||||
t[0] = b0_;
|
||||
fproduct_copy_from_wide_(output, t);
|
||||
i0 = output[0];
|
||||
i1 = output[1];
|
||||
i0_ = i0 & 0x7ffffffffffffLLU;
|
||||
i1_ = i1 + (i0 >> 51);
|
||||
output[0] = i0_;
|
||||
output[1] = i1_;
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output)
|
||||
{
|
||||
u64 r0 = output[0];
|
||||
u64 r1 = output[1];
|
||||
u64 r2 = output[2];
|
||||
u64 r3 = output[3];
|
||||
u64 r4 = output[4];
|
||||
u64 d0 = r0 * 2;
|
||||
u64 d1 = r1 * 2;
|
||||
u64 d2 = r2 * 2 * 19;
|
||||
u64 d419 = r4 * 19;
|
||||
u64 d4 = d419 * 2;
|
||||
u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) +
|
||||
(((u128)(d2) * (r3))));
|
||||
u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) +
|
||||
(((u128)(r3 * 19) * (r3))));
|
||||
u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) +
|
||||
(((u128)(d4) * (r3))));
|
||||
u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) +
|
||||
(((u128)(r4) * (d419))));
|
||||
u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) +
|
||||
(((u128)(r2) * (r2))));
|
||||
tmp[0] = s0;
|
||||
tmp[1] = s1;
|
||||
tmp[2] = s2;
|
||||
tmp[3] = s3;
|
||||
tmp[4] = s4;
|
||||
}
|
||||
|
||||
static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output)
|
||||
{
|
||||
u128 b4;
|
||||
u128 b0;
|
||||
u128 b4_;
|
||||
u128 b0_;
|
||||
u64 i0;
|
||||
u64 i1;
|
||||
u64 i0_;
|
||||
u64 i1_;
|
||||
fsquare_fsquare__(tmp, output);
|
||||
fproduct_carry_wide_(tmp);
|
||||
b4 = tmp[4];
|
||||
b0 = tmp[0];
|
||||
b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
|
||||
b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
|
||||
tmp[4] = b4_;
|
||||
tmp[0] = b0_;
|
||||
fproduct_copy_from_wide_(output, tmp);
|
||||
i0 = output[0];
|
||||
i1 = output[1];
|
||||
i0_ = i0 & 0x7ffffffffffffLLU;
|
||||
i1_ = i1 + (i0 >> 51);
|
||||
output[0] = i0_;
|
||||
output[1] = i1_;
|
||||
}
|
||||
|
||||
static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp,
|
||||
u32 count1)
|
||||
{
|
||||
u32 i;
|
||||
fsquare_fsquare_(tmp, output);
|
||||
for (i = 1; i < count1; ++i)
|
||||
fsquare_fsquare_(tmp, output);
|
||||
}
|
||||
|
||||
static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input,
|
||||
u32 count1)
|
||||
{
|
||||
u128 t[5];
|
||||
memcpy(output, input, 5 * sizeof(*input));
|
||||
fsquare_fsquare_times_(output, t, count1);
|
||||
}
|
||||
|
||||
static __always_inline void fsquare_fsquare_times_inplace(u64 *output,
|
||||
u32 count1)
|
||||
{
|
||||
u128 t[5];
|
||||
fsquare_fsquare_times_(output, t, count1);
|
||||
}
|
||||
|
||||
static __always_inline void crecip_crecip(u64 *out, u64 *z)
|
||||
{
|
||||
u64 buf[20] = { 0 };
|
||||
u64 *a0 = buf;
|
||||
u64 *t00 = buf + 5;
|
||||
u64 *b0 = buf + 10;
|
||||
u64 *t01;
|
||||
u64 *b1;
|
||||
u64 *c0;
|
||||
u64 *a;
|
||||
u64 *t0;
|
||||
u64 *b;
|
||||
u64 *c;
|
||||
fsquare_fsquare_times(a0, z, 1);
|
||||
fsquare_fsquare_times(t00, a0, 2);
|
||||
fmul_fmul(b0, t00, z);
|
||||
fmul_fmul(a0, b0, a0);
|
||||
fsquare_fsquare_times(t00, a0, 1);
|
||||
fmul_fmul(b0, t00, b0);
|
||||
fsquare_fsquare_times(t00, b0, 5);
|
||||
t01 = buf + 5;
|
||||
b1 = buf + 10;
|
||||
c0 = buf + 15;
|
||||
fmul_fmul(b1, t01, b1);
|
||||
fsquare_fsquare_times(t01, b1, 10);
|
||||
fmul_fmul(c0, t01, b1);
|
||||
fsquare_fsquare_times(t01, c0, 20);
|
||||
fmul_fmul(t01, t01, c0);
|
||||
fsquare_fsquare_times_inplace(t01, 10);
|
||||
fmul_fmul(b1, t01, b1);
|
||||
fsquare_fsquare_times(t01, b1, 50);
|
||||
a = buf;
|
||||
t0 = buf + 5;
|
||||
b = buf + 10;
|
||||
c = buf + 15;
|
||||
fmul_fmul(c, t0, b);
|
||||
fsquare_fsquare_times(t0, c, 100);
|
||||
fmul_fmul(t0, t0, c);
|
||||
fsquare_fsquare_times_inplace(t0, 50);
|
||||
fmul_fmul(t0, t0, b);
|
||||
fsquare_fsquare_times_inplace(t0, 5);
|
||||
fmul_fmul(out, t0, a);
|
||||
}
|
||||
|
||||
static __always_inline void fsum(u64 *a, u64 *b)
|
||||
{
|
||||
a[0] += b[0];
|
||||
a[1] += b[1];
|
||||
a[2] += b[2];
|
||||
a[3] += b[3];
|
||||
a[4] += b[4];
|
||||
}
|
||||
|
||||
static __always_inline void fdifference(u64 *a, u64 *b)
|
||||
{
|
||||
u64 tmp[5] = { 0 };
|
||||
u64 b0;
|
||||
u64 b1;
|
||||
u64 b2;
|
||||
u64 b3;
|
||||
u64 b4;
|
||||
memcpy(tmp, b, 5 * sizeof(*b));
|
||||
b0 = tmp[0];
|
||||
b1 = tmp[1];
|
||||
b2 = tmp[2];
|
||||
b3 = tmp[3];
|
||||
b4 = tmp[4];
|
||||
tmp[0] = b0 + 0x3fffffffffff68LLU;
|
||||
tmp[1] = b1 + 0x3ffffffffffff8LLU;
|
||||
tmp[2] = b2 + 0x3ffffffffffff8LLU;
|
||||
tmp[3] = b3 + 0x3ffffffffffff8LLU;
|
||||
tmp[4] = b4 + 0x3ffffffffffff8LLU;
|
||||
{
|
||||
u64 xi = a[0];
|
||||
u64 yi = tmp[0];
|
||||
a[0] = yi - xi;
|
||||
}
|
||||
{
|
||||
u64 xi = a[1];
|
||||
u64 yi = tmp[1];
|
||||
a[1] = yi - xi;
|
||||
}
|
||||
{
|
||||
u64 xi = a[2];
|
||||
u64 yi = tmp[2];
|
||||
a[2] = yi - xi;
|
||||
}
|
||||
{
|
||||
u64 xi = a[3];
|
||||
u64 yi = tmp[3];
|
||||
a[3] = yi - xi;
|
||||
}
|
||||
{
|
||||
u64 xi = a[4];
|
||||
u64 yi = tmp[4];
|
||||
a[4] = yi - xi;
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void fscalar(u64 *output, u64 *b, u64 s)
|
||||
{
|
||||
u128 tmp[5];
|
||||
u128 b4;
|
||||
u128 b0;
|
||||
u128 b4_;
|
||||
u128 b0_;
|
||||
{
|
||||
u64 xi = b[0];
|
||||
tmp[0] = ((u128)(xi) * (s));
|
||||
}
|
||||
{
|
||||
u64 xi = b[1];
|
||||
tmp[1] = ((u128)(xi) * (s));
|
||||
}
|
||||
{
|
||||
u64 xi = b[2];
|
||||
tmp[2] = ((u128)(xi) * (s));
|
||||
}
|
||||
{
|
||||
u64 xi = b[3];
|
||||
tmp[3] = ((u128)(xi) * (s));
|
||||
}
|
||||
{
|
||||
u64 xi = b[4];
|
||||
tmp[4] = ((u128)(xi) * (s));
|
||||
}
|
||||
fproduct_carry_wide_(tmp);
|
||||
b4 = tmp[4];
|
||||
b0 = tmp[0];
|
||||
b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
|
||||
b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
|
||||
tmp[4] = b4_;
|
||||
tmp[0] = b0_;
|
||||
fproduct_copy_from_wide_(output, tmp);
|
||||
}
|
||||
|
||||
static __always_inline void crecip(u64 *output, u64 *input)
|
||||
{
|
||||
crecip_crecip(output, input);
|
||||
}
|
||||
|
||||
static __always_inline void point_swap_conditional_step(u64 *a, u64 *b,
|
||||
u64 swap1, u32 ctr)
|
||||
{
|
||||
u32 i = ctr - 1;
|
||||
u64 ai = a[i];
|
||||
u64 bi = b[i];
|
||||
u64 x = swap1 & (ai ^ bi);
|
||||
u64 ai1 = ai ^ x;
|
||||
u64 bi1 = bi ^ x;
|
||||
a[i] = ai1;
|
||||
b[i] = bi1;
|
||||
}
|
||||
|
||||
static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1)
|
||||
{
|
||||
point_swap_conditional_step(a, b, swap1, 5);
|
||||
point_swap_conditional_step(a, b, swap1, 4);
|
||||
point_swap_conditional_step(a, b, swap1, 3);
|
||||
point_swap_conditional_step(a, b, swap1, 2);
|
||||
point_swap_conditional_step(a, b, swap1, 1);
|
||||
}
|
||||
|
||||
static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap)
|
||||
{
|
||||
u64 swap1 = 0 - iswap;
|
||||
point_swap_conditional5(a, b, swap1);
|
||||
point_swap_conditional5(a + 5, b + 5, swap1);
|
||||
}
|
||||
|
||||
static __always_inline void point_copy(u64 *output, u64 *input)
|
||||
{
|
||||
memcpy(output, input, 5 * sizeof(*input));
|
||||
memcpy(output + 5, input + 5, 5 * sizeof(*input));
|
||||
}
|
||||
|
||||
static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p,
|
||||
u64 *pq, u64 *qmqp)
|
||||
{
|
||||
u64 *qx = qmqp;
|
||||
u64 *x2 = pp;
|
||||
u64 *z2 = pp + 5;
|
||||
u64 *x3 = ppq;
|
||||
u64 *z3 = ppq + 5;
|
||||
u64 *x = p;
|
||||
u64 *z = p + 5;
|
||||
u64 *xprime = pq;
|
||||
u64 *zprime = pq + 5;
|
||||
u64 buf[40] = { 0 };
|
||||
u64 *origx = buf;
|
||||
u64 *origxprime0 = buf + 5;
|
||||
u64 *xxprime0;
|
||||
u64 *zzprime0;
|
||||
u64 *origxprime;
|
||||
xxprime0 = buf + 25;
|
||||
zzprime0 = buf + 30;
|
||||
memcpy(origx, x, 5 * sizeof(*x));
|
||||
fsum(x, z);
|
||||
fdifference(z, origx);
|
||||
memcpy(origxprime0, xprime, 5 * sizeof(*xprime));
|
||||
fsum(xprime, zprime);
|
||||
fdifference(zprime, origxprime0);
|
||||
fmul_fmul(xxprime0, xprime, z);
|
||||
fmul_fmul(zzprime0, x, zprime);
|
||||
origxprime = buf + 5;
|
||||
{
|
||||
u64 *xx0;
|
||||
u64 *zz0;
|
||||
u64 *xxprime;
|
||||
u64 *zzprime;
|
||||
u64 *zzzprime;
|
||||
xx0 = buf + 15;
|
||||
zz0 = buf + 20;
|
||||
xxprime = buf + 25;
|
||||
zzprime = buf + 30;
|
||||
zzzprime = buf + 35;
|
||||
memcpy(origxprime, xxprime, 5 * sizeof(*xxprime));
|
||||
fsum(xxprime, zzprime);
|
||||
fdifference(zzprime, origxprime);
|
||||
fsquare_fsquare_times(x3, xxprime, 1);
|
||||
fsquare_fsquare_times(zzzprime, zzprime, 1);
|
||||
fmul_fmul(z3, zzzprime, qx);
|
||||
fsquare_fsquare_times(xx0, x, 1);
|
||||
fsquare_fsquare_times(zz0, z, 1);
|
||||
{
|
||||
u64 *zzz;
|
||||
u64 *xx;
|
||||
u64 *zz;
|
||||
u64 scalar;
|
||||
zzz = buf + 10;
|
||||
xx = buf + 15;
|
||||
zz = buf + 20;
|
||||
fmul_fmul(x2, xx, zz);
|
||||
fdifference(zz, xx);
|
||||
scalar = 121665;
|
||||
fscalar(zzz, zz, scalar);
|
||||
fsum(zzz, xx);
|
||||
fmul_fmul(z2, zzz, zz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
|
||||
u64 *q, u8 byt)
|
||||
{
|
||||
u64 bit0 = (u64)(byt >> 7);
|
||||
u64 bit;
|
||||
point_swap_conditional(nq, nqpq, bit0);
|
||||
addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q);
|
||||
bit = (u64)(byt >> 7);
|
||||
point_swap_conditional(nq2, nqpq2, bit);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2,
|
||||
u64 *nqpq2, u64 *q, u8 byt)
|
||||
{
|
||||
u8 byt1;
|
||||
ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
|
||||
byt1 = byt << 1;
|
||||
ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
|
||||
u64 *q, u8 byt, u32 i)
|
||||
{
|
||||
while (i--) {
|
||||
ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2,
|
||||
nqpq2, q, byt);
|
||||
byt <<= 2;
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq,
|
||||
u64 *nqpq, u64 *nq2,
|
||||
u64 *nqpq2, u64 *q,
|
||||
u32 i)
|
||||
{
|
||||
while (i--) {
|
||||
u8 byte = n1[i];
|
||||
ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q,
|
||||
byte, 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void ladder_cmult(u64 *result, u8 *n1, u64 *q)
|
||||
{
|
||||
u64 point_buf[40] = { 0 };
|
||||
u64 *nq = point_buf;
|
||||
u64 *nqpq = point_buf + 10;
|
||||
u64 *nq2 = point_buf + 20;
|
||||
u64 *nqpq2 = point_buf + 30;
|
||||
point_copy(nqpq, q);
|
||||
nq[0] = 1;
|
||||
ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32);
|
||||
point_copy(result, nq);
|
||||
}
|
||||
|
||||
static __always_inline void format_fexpand(u64 *output, const u8 *input)
|
||||
{
|
||||
const u8 *x00 = input + 6;
|
||||
const u8 *x01 = input + 12;
|
||||
const u8 *x02 = input + 19;
|
||||
const u8 *x0 = input + 24;
|
||||
u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4;
|
||||
i0 = get_unaligned_le64(input);
|
||||
i1 = get_unaligned_le64(x00);
|
||||
i2 = get_unaligned_le64(x01);
|
||||
i3 = get_unaligned_le64(x02);
|
||||
i4 = get_unaligned_le64(x0);
|
||||
output0 = i0 & 0x7ffffffffffffLLU;
|
||||
output1 = i1 >> 3 & 0x7ffffffffffffLLU;
|
||||
output2 = i2 >> 6 & 0x7ffffffffffffLLU;
|
||||
output3 = i3 >> 1 & 0x7ffffffffffffLLU;
|
||||
output4 = i4 >> 12 & 0x7ffffffffffffLLU;
|
||||
output[0] = output0;
|
||||
output[1] = output1;
|
||||
output[2] = output2;
|
||||
output[3] = output3;
|
||||
output[4] = output4;
|
||||
}
|
||||
|
||||
static __always_inline void format_fcontract_first_carry_pass(u64 *input)
|
||||
{
|
||||
u64 t0 = input[0];
|
||||
u64 t1 = input[1];
|
||||
u64 t2 = input[2];
|
||||
u64 t3 = input[3];
|
||||
u64 t4 = input[4];
|
||||
u64 t1_ = t1 + (t0 >> 51);
|
||||
u64 t0_ = t0 & 0x7ffffffffffffLLU;
|
||||
u64 t2_ = t2 + (t1_ >> 51);
|
||||
u64 t1__ = t1_ & 0x7ffffffffffffLLU;
|
||||
u64 t3_ = t3 + (t2_ >> 51);
|
||||
u64 t2__ = t2_ & 0x7ffffffffffffLLU;
|
||||
u64 t4_ = t4 + (t3_ >> 51);
|
||||
u64 t3__ = t3_ & 0x7ffffffffffffLLU;
|
||||
input[0] = t0_;
|
||||
input[1] = t1__;
|
||||
input[2] = t2__;
|
||||
input[3] = t3__;
|
||||
input[4] = t4_;
|
||||
}
|
||||
|
||||
static __always_inline void format_fcontract_first_carry_full(u64 *input)
|
||||
{
|
||||
format_fcontract_first_carry_pass(input);
|
||||
modulo_carry_top(input);
|
||||
}
|
||||
|
||||
static __always_inline void format_fcontract_second_carry_pass(u64 *input)
|
||||
{
|
||||
u64 t0 = input[0];
|
||||
u64 t1 = input[1];
|
||||
u64 t2 = input[2];
|
||||
u64 t3 = input[3];
|
||||
u64 t4 = input[4];
|
||||
u64 t1_ = t1 + (t0 >> 51);
|
||||
u64 t0_ = t0 & 0x7ffffffffffffLLU;
|
||||
u64 t2_ = t2 + (t1_ >> 51);
|
||||
u64 t1__ = t1_ & 0x7ffffffffffffLLU;
|
||||
u64 t3_ = t3 + (t2_ >> 51);
|
||||
u64 t2__ = t2_ & 0x7ffffffffffffLLU;
|
||||
u64 t4_ = t4 + (t3_ >> 51);
|
||||
u64 t3__ = t3_ & 0x7ffffffffffffLLU;
|
||||
input[0] = t0_;
|
||||
input[1] = t1__;
|
||||
input[2] = t2__;
|
||||
input[3] = t3__;
|
||||
input[4] = t4_;
|
||||
}
|
||||
|
||||
static __always_inline void format_fcontract_second_carry_full(u64 *input)
|
||||
{
|
||||
u64 i0;
|
||||
u64 i1;
|
||||
u64 i0_;
|
||||
u64 i1_;
|
||||
format_fcontract_second_carry_pass(input);
|
||||
modulo_carry_top(input);
|
||||
i0 = input[0];
|
||||
i1 = input[1];
|
||||
i0_ = i0 & 0x7ffffffffffffLLU;
|
||||
i1_ = i1 + (i0 >> 51);
|
||||
input[0] = i0_;
|
||||
input[1] = i1_;
|
||||
}
|
||||
|
||||
static __always_inline void format_fcontract_trim(u64 *input)
|
||||
{
|
||||
u64 a0 = input[0];
|
||||
u64 a1 = input[1];
|
||||
u64 a2 = input[2];
|
||||
u64 a3 = input[3];
|
||||
u64 a4 = input[4];
|
||||
u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU);
|
||||
u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU);
|
||||
u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU);
|
||||
u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU);
|
||||
u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU);
|
||||
u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
|
||||
u64 a0_ = a0 - (0x7ffffffffffedLLU & mask);
|
||||
u64 a1_ = a1 - (0x7ffffffffffffLLU & mask);
|
||||
u64 a2_ = a2 - (0x7ffffffffffffLLU & mask);
|
||||
u64 a3_ = a3 - (0x7ffffffffffffLLU & mask);
|
||||
u64 a4_ = a4 - (0x7ffffffffffffLLU & mask);
|
||||
input[0] = a0_;
|
||||
input[1] = a1_;
|
||||
input[2] = a2_;
|
||||
input[3] = a3_;
|
||||
input[4] = a4_;
|
||||
}
|
||||
|
||||
static __always_inline void format_fcontract_store(u8 *output, u64 *input)
|
||||
{
|
||||
u64 t0 = input[0];
|
||||
u64 t1 = input[1];
|
||||
u64 t2 = input[2];
|
||||
u64 t3 = input[3];
|
||||
u64 t4 = input[4];
|
||||
u64 o0 = t1 << 51 | t0;
|
||||
u64 o1 = t2 << 38 | t1 >> 13;
|
||||
u64 o2 = t3 << 25 | t2 >> 26;
|
||||
u64 o3 = t4 << 12 | t3 >> 39;
|
||||
u8 *b0 = output;
|
||||
u8 *b1 = output + 8;
|
||||
u8 *b2 = output + 16;
|
||||
u8 *b3 = output + 24;
|
||||
put_unaligned_le64(o0, b0);
|
||||
put_unaligned_le64(o1, b1);
|
||||
put_unaligned_le64(o2, b2);
|
||||
put_unaligned_le64(o3, b3);
|
||||
}
|
||||
|
||||
static __always_inline void format_fcontract(u8 *output, u64 *input)
|
||||
{
|
||||
format_fcontract_first_carry_full(input);
|
||||
format_fcontract_second_carry_full(input);
|
||||
format_fcontract_trim(input);
|
||||
format_fcontract_store(output, input);
|
||||
}
|
||||
|
||||
static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point)
|
||||
{
|
||||
u64 *x = point;
|
||||
u64 *z = point + 5;
|
||||
u64 buf[10] __aligned(32) = { 0 };
|
||||
u64 *zmone = buf;
|
||||
u64 *sc = buf + 5;
|
||||
crecip(zmone, z);
|
||||
fmul_fmul(sc, x, zmone);
|
||||
format_fcontract(scalar, sc);
|
||||
}
|
||||
|
||||
static void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
u64 buf0[10] __aligned(32) = { 0 };
|
||||
u64 *x0 = buf0;
|
||||
u64 *z = buf0 + 5;
|
||||
u64 *q;
|
||||
format_fexpand(x0, basepoint);
|
||||
z[0] = 1;
|
||||
q = buf0;
|
||||
{
|
||||
u8 e[32] __aligned(32) = { 0 };
|
||||
u8 *scalar;
|
||||
memcpy(e, secret, 32);
|
||||
curve25519_clamp_secret(e);
|
||||
scalar = e;
|
||||
{
|
||||
u64 buf[15] = { 0 };
|
||||
u64 *nq = buf;
|
||||
u64 *x = nq;
|
||||
x[0] = 1;
|
||||
ladder_cmult(nq, scalar, q);
|
||||
format_scalar_of_point(mypublic, nq);
|
||||
memzero_explicit(buf, sizeof(buf));
|
||||
}
|
||||
memzero_explicit(e, sizeof(e));
|
||||
}
|
||||
memzero_explicit(buf0, sizeof(buf0));
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
|
||||
#include "curve25519-x86_64.c"
|
||||
|
||||
static bool curve25519_use_bmi2_adx __ro_after_init;
|
||||
static bool *const curve25519_nobs[] __initconst = {
|
||||
&curve25519_use_bmi2_adx };
|
||||
|
||||
static void __init curve25519_fpu_init(void)
|
||||
{
|
||||
curve25519_use_bmi2_adx = IS_ENABLED(CONFIG_AS_BMI2) &&
|
||||
IS_ENABLED(CONFIG_AS_ADX) &&
|
||||
boot_cpu_has(X86_FEATURE_BMI2) &&
|
||||
boot_cpu_has(X86_FEATURE_ADX);
|
||||
}
|
||||
|
||||
static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) &&
|
||||
curve25519_use_bmi2_adx) {
|
||||
curve25519_ever64(mypublic, secret, basepoint);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_AS_ADX) && IS_ENABLED(CONFIG_AS_BMI2) &&
|
||||
curve25519_use_bmi2_adx) {
|
||||
curve25519_ever64_base(pub, secret);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
1369
net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c
Normal file
1369
net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c
Normal file
File diff suppressed because it is too large
Load Diff
110
net/wireguard/crypto/zinc/curve25519/curve25519.c
Normal file
110
net/wireguard/crypto/zinc/curve25519/curve25519.c
Normal file
@ -0,0 +1,110 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is an implementation of the Curve25519 ECDH algorithm, using either
|
||||
* a 32-bit implementation or a 64-bit implementation with 128-bit integers,
|
||||
* depending on what is supported by the target compiler.
|
||||
*
|
||||
* Information: https://cr.yp.to/ecdh.html
|
||||
*/
|
||||
|
||||
#include <zinc/curve25519.h>
|
||||
#include "../selftest/run.h"
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <crypto/algapi.h> // For crypto_memneq.
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_X86_64)
|
||||
#include "curve25519-x86_64-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM)
|
||||
#include "curve25519-arm-glue.c"
|
||||
#else
|
||||
static bool *const curve25519_nobs[] __initconst = { };
|
||||
static void __init curve25519_fpu_init(void)
|
||||
{
|
||||
}
|
||||
static inline bool curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
#include "curve25519-hacl64.c"
|
||||
#else
|
||||
#include "curve25519-fiat32.c"
|
||||
#endif
|
||||
|
||||
static const u8 null_point[CURVE25519_KEY_SIZE] = { 0 };
|
||||
|
||||
bool curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE],
|
||||
const u8 basepoint[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
if (!curve25519_arch(mypublic, secret, basepoint))
|
||||
curve25519_generic(mypublic, secret, basepoint);
|
||||
return crypto_memneq(mypublic, null_point, CURVE25519_KEY_SIZE);
|
||||
}
|
||||
|
||||
bool curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
|
||||
const u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
static const u8 basepoint[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
|
||||
|
||||
if (unlikely(!crypto_memneq(secret, null_point, CURVE25519_KEY_SIZE)))
|
||||
return false;
|
||||
|
||||
if (curve25519_base_arch(pub, secret))
|
||||
return crypto_memneq(pub, null_point, CURVE25519_KEY_SIZE);
|
||||
return curve25519(pub, secret, basepoint);
|
||||
}
|
||||
|
||||
void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
|
||||
{
|
||||
get_random_bytes_wait(secret, CURVE25519_KEY_SIZE);
|
||||
curve25519_clamp_secret(secret);
|
||||
}
|
||||
|
||||
#include "../selftest/curve25519.c"
|
||||
|
||||
static bool nosimd __initdata = false;
|
||||
|
||||
#ifndef COMPAT_ZINC_IS_A_MODULE
|
||||
int __init curve25519_mod_init(void)
|
||||
#else
|
||||
static int __init mod_init(void)
|
||||
#endif
|
||||
{
|
||||
if (!nosimd)
|
||||
curve25519_fpu_init();
|
||||
if (!selftest_run("curve25519", curve25519_selftest, curve25519_nobs,
|
||||
ARRAY_SIZE(curve25519_nobs)))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_ZINC_IS_A_MODULE
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_param(nosimd, bool, 0);
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("Curve25519 scalar multiplication");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
#endif
|
140
net/wireguard/crypto/zinc/poly1305/poly1305-arm-glue.c
Normal file
140
net/wireguard/crypto/zinc/poly1305/poly1305-arm-glue.c
Normal file
@ -0,0 +1,140 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
|
||||
asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
|
||||
asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
|
||||
asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
|
||||
|
||||
static bool poly1305_use_neon __ro_after_init;
|
||||
static bool *const poly1305_nobs[] __initconst = { &poly1305_use_neon };
|
||||
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
poly1305_use_neon = cpu_have_named_feature(ASIMD);
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM)
|
||||
poly1305_use_neon = elf_hwcap & HWCAP_NEON;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
struct poly1305_arch_internal {
|
||||
union {
|
||||
u32 h[5];
|
||||
struct {
|
||||
u64 h0, h1, h2;
|
||||
};
|
||||
};
|
||||
u64 is_base2_26;
|
||||
u64 r[2];
|
||||
};
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM)
|
||||
struct poly1305_arch_internal {
|
||||
union {
|
||||
u32 h[5];
|
||||
struct {
|
||||
u64 h0, h1;
|
||||
u32 h2;
|
||||
} __packed;
|
||||
};
|
||||
u32 r[4];
|
||||
u32 is_base2_26;
|
||||
};
|
||||
#endif
|
||||
|
||||
/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
|
||||
* and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
|
||||
* and then having to go back to scalar -- because the user is silly and has
|
||||
* called the update function from two separate contexts -- then we need to
|
||||
* convert back to the original base before proceeding. The below function is
|
||||
* written for 64-bit integers, and so we have to swap words at the end on
|
||||
* big-endian 32-bit. It is possible to reason that the initial reduction below
|
||||
* is sufficient given the implementation invariants. However, for an avoidance
|
||||
* of doubt and because this is not performance critical, we do the full
|
||||
* reduction anyway.
|
||||
*/
|
||||
static void convert_to_base2_64(void *ctx)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
u32 cy;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
|
||||
return;
|
||||
|
||||
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
|
||||
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
|
||||
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
|
||||
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
|
||||
state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
|
||||
state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
|
||||
state->h2 = state->h[4] >> 24;
|
||||
if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
|
||||
state->h0 = rol64(state->h0, 32);
|
||||
state->h1 = rol64(state->h1, 32);
|
||||
}
|
||||
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
|
||||
cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
|
||||
state->h2 &= 3;
|
||||
state->h0 += cy;
|
||||
state->h1 += (cy = ULT(state->h0, cy));
|
||||
state->h2 += ULT(state->h1, cy);
|
||||
#undef ULT
|
||||
state->is_base2_26 = 0;
|
||||
}
|
||||
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
poly1305_init_arm(ctx, key);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
|
||||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
|
||||
!simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_blocks_arm(ctx, inp, len, padbit);
|
||||
return true;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
poly1305_blocks_neon(ctx, inp, bytes, padbit);
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
inp += bytes;
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
|
||||
!simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_emit_arm(ctx, mac, nonce);
|
||||
} else
|
||||
poly1305_emit_neon(ctx, mac, nonce);
|
||||
return true;
|
||||
}
|
1276
net/wireguard/crypto/zinc/poly1305/poly1305-arm.pl
Normal file
1276
net/wireguard/crypto/zinc/poly1305/poly1305-arm.pl
Normal file
File diff suppressed because it is too large
Load Diff
974
net/wireguard/crypto/zinc/poly1305/poly1305-arm64.pl
Normal file
974
net/wireguard/crypto/zinc/poly1305/poly1305-arm64.pl
Normal file
@ -0,0 +1,974 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
#
|
||||
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
|
||||
# has relicensed it under the licenses specified in the SPDX header above.
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# This module implements Poly1305 hash for ARMv8.
|
||||
#
|
||||
# June 2015
|
||||
#
|
||||
# Numbers are cycles per processed byte with poly1305_blocks alone.
|
||||
#
|
||||
# IALU/gcc-4.9 NEON
|
||||
#
|
||||
# Apple A7 1.86/+5% 0.72
|
||||
# Cortex-A53 2.69/+58% 1.47
|
||||
# Cortex-A57 2.70/+7% 1.14
|
||||
# Denver 1.64/+50% 1.18(*)
|
||||
# X-Gene 2.13/+68% 2.27
|
||||
# Mongoose 1.77/+75% 1.12
|
||||
# Kryo 2.70/+55% 1.13
|
||||
#
|
||||
# (*) estimate based on resources availability is less than 1.0,
|
||||
# i.e. measured result is worse than expected, presumably binary
|
||||
# translator is not almighty;
|
||||
|
||||
$flavour=shift;
|
||||
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
||||
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
|
||||
|
||||
if ($flavour && $flavour ne "void") {
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
||||
} else {
|
||||
open STDOUT,">$output";
|
||||
}
|
||||
|
||||
my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
|
||||
my ($mac,$nonce)=($inp,$len);
|
||||
|
||||
my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
|
||||
|
||||
$code.=<<___;
|
||||
#ifndef __KERNEL__
|
||||
# include "arm_arch.h"
|
||||
.extern OPENSSL_armcap_P
|
||||
#else
|
||||
# define poly1305_init poly1305_init_arm
|
||||
# define poly1305_blocks poly1305_blocks_arm
|
||||
# define poly1305_emit poly1305_emit_arm
|
||||
#endif
|
||||
|
||||
.text
|
||||
|
||||
// forward "declarations" are required for Apple
|
||||
.globl poly1305_blocks
|
||||
.globl poly1305_emit
|
||||
.globl poly1305_init
|
||||
.type poly1305_init,%function
|
||||
.align 5
|
||||
poly1305_init:
|
||||
cmp $inp,xzr
|
||||
stp xzr,xzr,[$ctx] // zero hash value
|
||||
stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
|
||||
|
||||
csel x0,xzr,x0,eq
|
||||
b.eq .Lno_key
|
||||
|
||||
#ifndef __KERNEL__
|
||||
# ifdef __ILP32__
|
||||
ldrsw $t1,.LOPENSSL_armcap_P
|
||||
# else
|
||||
ldr $t1,.LOPENSSL_armcap_P
|
||||
# endif
|
||||
adr $t0,.LOPENSSL_armcap_P
|
||||
ldr w17,[$t0,$t1]
|
||||
#endif
|
||||
|
||||
ldp $r0,$r1,[$inp] // load key
|
||||
mov $s1,#0xfffffffc0fffffff
|
||||
movk $s1,#0x0fff,lsl#48
|
||||
#ifdef __AARCH64EB__
|
||||
rev $r0,$r0 // flip bytes
|
||||
rev $r1,$r1
|
||||
#endif
|
||||
and $r0,$r0,$s1 // &=0ffffffc0fffffff
|
||||
and $s1,$s1,#-4
|
||||
and $r1,$r1,$s1 // &=0ffffffc0ffffffc
|
||||
stp $r0,$r1,[$ctx,#32] // save key value
|
||||
|
||||
#ifndef __KERNEL__
|
||||
tst w17,#ARMV7_NEON
|
||||
|
||||
adr $d0,poly1305_blocks
|
||||
adr $r0,poly1305_blocks_neon
|
||||
adr $d1,poly1305_emit
|
||||
adr $r1,poly1305_emit_neon
|
||||
|
||||
csel $d0,$d0,$r0,eq
|
||||
csel $d1,$d1,$r1,eq
|
||||
|
||||
# ifdef __ILP32__
|
||||
stp w12,w13,[$len]
|
||||
# else
|
||||
stp $d0,$d1,[$len]
|
||||
# endif
|
||||
|
||||
mov x0,#1
|
||||
#else
|
||||
mov x0,#0
|
||||
#endif
|
||||
.Lno_key:
|
||||
ret
|
||||
.size poly1305_init,.-poly1305_init
|
||||
|
||||
.type poly1305_blocks,%function
|
||||
.align 5
|
||||
poly1305_blocks:
|
||||
ands $len,$len,#-16
|
||||
b.eq .Lno_data
|
||||
|
||||
ldp $h0,$h1,[$ctx] // load hash value
|
||||
ldp $r0,$r1,[$ctx,#32] // load key value
|
||||
ldr $h2,[$ctx,#16]
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
b .Loop
|
||||
|
||||
.align 5
|
||||
.Loop:
|
||||
ldp $t0,$t1,[$inp],#16 // load input
|
||||
sub $len,$len,#16
|
||||
#ifdef __AARCH64EB__
|
||||
rev $t0,$t0
|
||||
rev $t1,$t1
|
||||
#endif
|
||||
adds $h0,$h0,$t0 // accumulate input
|
||||
adcs $h1,$h1,$t1
|
||||
|
||||
mul $d0,$h0,$r0 // h0*r0
|
||||
adc $h2,$h2,$padbit
|
||||
umulh $d1,$h0,$r0
|
||||
|
||||
mul $t0,$h1,$s1 // h1*5*r1
|
||||
umulh $t1,$h1,$s1
|
||||
|
||||
adds $d0,$d0,$t0
|
||||
mul $t0,$h0,$r1 // h0*r1
|
||||
adc $d1,$d1,$t1
|
||||
umulh $d2,$h0,$r1
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h1,$r0 // h1*r0
|
||||
adc $d2,$d2,xzr
|
||||
umulh $t1,$h1,$r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h2,$s1 // h2*5*r1
|
||||
adc $d2,$d2,$t1
|
||||
mul $t1,$h2,$r0 // h2*r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
adc $d2,$d2,$t1
|
||||
|
||||
and $t0,$d2,#-4 // final reduction
|
||||
and $h2,$d2,#3
|
||||
add $t0,$t0,$d2,lsr#2
|
||||
adds $h0,$d0,$t0
|
||||
adcs $h1,$d1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
cbnz $len,.Loop
|
||||
|
||||
stp $h0,$h1,[$ctx] // store hash value
|
||||
str $h2,[$ctx,#16]
|
||||
|
||||
.Lno_data:
|
||||
ret
|
||||
.size poly1305_blocks,.-poly1305_blocks
|
||||
|
||||
.type poly1305_emit,%function
|
||||
.align 5
|
||||
poly1305_emit:
|
||||
ldp $h0,$h1,[$ctx] // load hash base 2^64
|
||||
ldr $h2,[$ctx,#16]
|
||||
ldp $t0,$t1,[$nonce] // load nonce
|
||||
|
||||
adds $d0,$h0,#5 // compare to modulus
|
||||
adcs $d1,$h1,xzr
|
||||
adc $d2,$h2,xzr
|
||||
|
||||
tst $d2,#-4 // see if it's carried/borrowed
|
||||
|
||||
csel $h0,$h0,$d0,eq
|
||||
csel $h1,$h1,$d1,eq
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
ror $t0,$t0,#32 // flip nonce words
|
||||
ror $t1,$t1,#32
|
||||
#endif
|
||||
adds $h0,$h0,$t0 // accumulate nonce
|
||||
adc $h1,$h1,$t1
|
||||
#ifdef __AARCH64EB__
|
||||
rev $h0,$h0 // flip output bytes
|
||||
rev $h1,$h1
|
||||
#endif
|
||||
stp $h0,$h1,[$mac] // write result
|
||||
|
||||
ret
|
||||
.size poly1305_emit,.-poly1305_emit
|
||||
___
|
||||
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
|
||||
my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
|
||||
my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
|
||||
my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
|
||||
my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
|
||||
my ($T0,$T1,$MASK) = map("v$_",(29..31));
|
||||
|
||||
my ($in2,$zeros)=("x16","x17");
|
||||
my $is_base2_26 = $zeros; # borrow
|
||||
|
||||
$code.=<<___;
|
||||
.type __poly1305_mult,%function
|
||||
.align 5
|
||||
__poly1305_mult:
|
||||
mul $d0,$h0,$r0 // h0*r0
|
||||
umulh $d1,$h0,$r0
|
||||
|
||||
mul $t0,$h1,$s1 // h1*5*r1
|
||||
umulh $t1,$h1,$s1
|
||||
|
||||
adds $d0,$d0,$t0
|
||||
mul $t0,$h0,$r1 // h0*r1
|
||||
adc $d1,$d1,$t1
|
||||
umulh $d2,$h0,$r1
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h1,$r0 // h1*r0
|
||||
adc $d2,$d2,xzr
|
||||
umulh $t1,$h1,$r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
mul $t0,$h2,$s1 // h2*5*r1
|
||||
adc $d2,$d2,$t1
|
||||
mul $t1,$h2,$r0 // h2*r0
|
||||
|
||||
adds $d1,$d1,$t0
|
||||
adc $d2,$d2,$t1
|
||||
|
||||
and $t0,$d2,#-4 // final reduction
|
||||
and $h2,$d2,#3
|
||||
add $t0,$t0,$d2,lsr#2
|
||||
adds $h0,$d0,$t0
|
||||
adcs $h1,$d1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
ret
|
||||
.size __poly1305_mult,.-__poly1305_mult
|
||||
|
||||
.type __poly1305_splat,%function
|
||||
.align 5
|
||||
__poly1305_splat:
|
||||
and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x13,$h0,#26,#26
|
||||
extr x14,$h1,$h0,#52
|
||||
and x14,x14,#0x03ffffff
|
||||
ubfx x15,$h1,#14,#26
|
||||
extr x16,$h2,$h1,#40
|
||||
|
||||
str w12,[$ctx,#16*0] // r0
|
||||
add w12,w13,w13,lsl#2 // r1*5
|
||||
str w13,[$ctx,#16*1] // r1
|
||||
add w13,w14,w14,lsl#2 // r2*5
|
||||
str w12,[$ctx,#16*2] // s1
|
||||
str w14,[$ctx,#16*3] // r2
|
||||
add w14,w15,w15,lsl#2 // r3*5
|
||||
str w13,[$ctx,#16*4] // s2
|
||||
str w15,[$ctx,#16*5] // r3
|
||||
add w15,w16,w16,lsl#2 // r4*5
|
||||
str w14,[$ctx,#16*6] // s3
|
||||
str w16,[$ctx,#16*7] // r4
|
||||
str w15,[$ctx,#16*8] // s4
|
||||
|
||||
ret
|
||||
.size __poly1305_splat,.-__poly1305_splat
|
||||
|
||||
#if !defined(__KERNEL__) || defined(CONFIG_KERNEL_MODE_NEON)
|
||||
#ifdef __KERNEL__
|
||||
.globl poly1305_blocks_neon
|
||||
.globl poly1305_emit_neon
|
||||
#endif
|
||||
|
||||
.type poly1305_blocks_neon,%function
|
||||
.align 5
|
||||
poly1305_blocks_neon:
|
||||
ldr $is_base2_26,[$ctx,#24]
|
||||
cmp $len,#128
|
||||
b.hs .Lblocks_neon
|
||||
cbz $is_base2_26,poly1305_blocks
|
||||
|
||||
.Lblocks_neon:
|
||||
stp x29,x30,[sp,#-80]!
|
||||
add x29,sp,#0
|
||||
|
||||
ands $len,$len,#-16
|
||||
b.eq .Lno_data_neon
|
||||
|
||||
cbz $is_base2_26,.Lbase2_64_neon
|
||||
|
||||
ldp w10,w11,[$ctx] // load hash value base 2^26
|
||||
ldp w12,w13,[$ctx,#8]
|
||||
ldr w14,[$ctx,#16]
|
||||
|
||||
tst $len,#31
|
||||
b.eq .Leven_neon
|
||||
|
||||
ldp $r0,$r1,[$ctx,#32] // load key value
|
||||
|
||||
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
|
||||
lsr $h1,x12,#12
|
||||
adds $h0,$h0,x12,lsl#52
|
||||
add $h1,$h1,x13,lsl#14
|
||||
adc $h1,$h1,xzr
|
||||
lsr $h2,x14,#24
|
||||
adds $h1,$h1,x14,lsl#40
|
||||
adc $d2,$h2,xzr // can be partially reduced...
|
||||
|
||||
ldp $d0,$d1,[$inp],#16 // load input
|
||||
sub $len,$len,#16
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
|
||||
and $t0,$d2,#-4 // ... so reduce
|
||||
and $h2,$d2,#3
|
||||
add $t0,$t0,$d2,lsr#2
|
||||
adds $h0,$h0,$t0
|
||||
adcs $h1,$h1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
rev $d0,$d0
|
||||
rev $d1,$d1
|
||||
#endif
|
||||
adds $h0,$h0,$d0 // accumulate input
|
||||
adcs $h1,$h1,$d1
|
||||
adc $h2,$h2,$padbit
|
||||
|
||||
bl __poly1305_mult
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
cbz $padbit,.Lstore_base2_64_neon
|
||||
|
||||
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x11,$h0,#26,#26
|
||||
extr x12,$h1,$h0,#52
|
||||
and x12,x12,#0x03ffffff
|
||||
ubfx x13,$h1,#14,#26
|
||||
extr x14,$h2,$h1,#40
|
||||
|
||||
cbnz $len,.Leven_neon
|
||||
|
||||
stp w10,w11,[$ctx] // store hash value base 2^26
|
||||
stp w12,w13,[$ctx,#8]
|
||||
str w14,[$ctx,#16]
|
||||
b .Lno_data_neon
|
||||
|
||||
.align 4
|
||||
.Lstore_base2_64_neon:
|
||||
stp $h0,$h1,[$ctx] // store hash value base 2^64
|
||||
stp $h2,xzr,[$ctx,#16] // note that is_base2_26 is zeroed
|
||||
b .Lno_data_neon
|
||||
|
||||
.align 4
|
||||
.Lbase2_64_neon:
|
||||
ldp $r0,$r1,[$ctx,#32] // load key value
|
||||
|
||||
ldp $h0,$h1,[$ctx] // load hash value base 2^64
|
||||
ldr $h2,[$ctx,#16]
|
||||
|
||||
tst $len,#31
|
||||
b.eq .Linit_neon
|
||||
|
||||
ldp $d0,$d1,[$inp],#16 // load input
|
||||
sub $len,$len,#16
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
#ifdef __AARCH64EB__
|
||||
rev $d0,$d0
|
||||
rev $d1,$d1
|
||||
#endif
|
||||
adds $h0,$h0,$d0 // accumulate input
|
||||
adcs $h1,$h1,$d1
|
||||
adc $h2,$h2,$padbit
|
||||
|
||||
bl __poly1305_mult
|
||||
|
||||
.Linit_neon:
|
||||
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
|
||||
ubfx x11,$h0,#26,#26
|
||||
extr x12,$h1,$h0,#52
|
||||
and x12,x12,#0x03ffffff
|
||||
ubfx x13,$h1,#14,#26
|
||||
extr x14,$h2,$h1,#40
|
||||
|
||||
stp d8,d9,[sp,#16] // meet ABI requirements
|
||||
stp d10,d11,[sp,#32]
|
||||
stp d12,d13,[sp,#48]
|
||||
stp d14,d15,[sp,#64]
|
||||
|
||||
fmov ${H0},x10
|
||||
fmov ${H1},x11
|
||||
fmov ${H2},x12
|
||||
fmov ${H3},x13
|
||||
fmov ${H4},x14
|
||||
|
||||
////////////////////////////////// initialize r^n table
|
||||
mov $h0,$r0 // r^1
|
||||
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
|
||||
mov $h1,$r1
|
||||
mov $h2,xzr
|
||||
add $ctx,$ctx,#48+12
|
||||
bl __poly1305_splat
|
||||
|
||||
bl __poly1305_mult // r^2
|
||||
sub $ctx,$ctx,#4
|
||||
bl __poly1305_splat
|
||||
|
||||
bl __poly1305_mult // r^3
|
||||
sub $ctx,$ctx,#4
|
||||
bl __poly1305_splat
|
||||
|
||||
bl __poly1305_mult // r^4
|
||||
sub $ctx,$ctx,#4
|
||||
bl __poly1305_splat
|
||||
ldr x30,[sp,#8]
|
||||
|
||||
add $in2,$inp,#32
|
||||
adr $zeros,.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
mov x4,#1
|
||||
str x4,[$ctx,#-24] // set is_base2_26
|
||||
sub $ctx,$ctx,#48 // restore original $ctx
|
||||
b .Ldo_neon
|
||||
|
||||
.align 4
|
||||
.Leven_neon:
|
||||
add $in2,$inp,#32
|
||||
adr $zeros,.Lzeros
|
||||
subs $len,$len,#64
|
||||
csel $in2,$zeros,$in2,lo
|
||||
|
||||
stp d8,d9,[sp,#16] // meet ABI requirements
|
||||
stp d10,d11,[sp,#32]
|
||||
stp d12,d13,[sp,#48]
|
||||
stp d14,d15,[sp,#64]
|
||||
|
||||
fmov ${H0},x10
|
||||
fmov ${H1},x11
|
||||
fmov ${H2},x12
|
||||
fmov ${H3},x13
|
||||
fmov ${H4},x14
|
||||
|
||||
.Ldo_neon:
|
||||
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
|
||||
ldp x9,x13,[$in2],#48
|
||||
|
||||
lsl $padbit,$padbit,#24
|
||||
add x15,$ctx,#48
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
and x5,x9,#0x03ffffff
|
||||
ubfx x6,x8,#26,#26
|
||||
ubfx x7,x9,#26,#26
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
extr x8,x12,x8,#52
|
||||
extr x9,x13,x9,#52
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
fmov $IN23_0,x4
|
||||
and x8,x8,#0x03ffffff
|
||||
and x9,x9,#0x03ffffff
|
||||
ubfx x10,x12,#14,#26
|
||||
ubfx x11,x13,#14,#26
|
||||
add x12,$padbit,x12,lsr#40
|
||||
add x13,$padbit,x13,lsr#40
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
fmov $IN23_1,x6
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
fmov $IN23_2,x8
|
||||
fmov $IN23_3,x10
|
||||
fmov $IN23_4,x12
|
||||
|
||||
ldp x8,x12,[$inp],#16 // inp[0:1]
|
||||
ldp x9,x13,[$inp],#48
|
||||
|
||||
ld1 {$R0,$R1,$S1,$R2},[x15],#64
|
||||
ld1 {$S2,$R3,$S3,$R4},[x15],#64
|
||||
ld1 {$S4},[x15]
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
and x5,x9,#0x03ffffff
|
||||
ubfx x6,x8,#26,#26
|
||||
ubfx x7,x9,#26,#26
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
extr x8,x12,x8,#52
|
||||
extr x9,x13,x9,#52
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
fmov $IN01_0,x4
|
||||
and x8,x8,#0x03ffffff
|
||||
and x9,x9,#0x03ffffff
|
||||
ubfx x10,x12,#14,#26
|
||||
ubfx x11,x13,#14,#26
|
||||
add x12,$padbit,x12,lsr#40
|
||||
add x13,$padbit,x13,lsr#40
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
fmov $IN01_1,x6
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
movi $MASK.2d,#-1
|
||||
fmov $IN01_2,x8
|
||||
fmov $IN01_3,x10
|
||||
fmov $IN01_4,x12
|
||||
ushr $MASK.2d,$MASK.2d,#38
|
||||
|
||||
b.ls .Lskip_loop
|
||||
|
||||
.align 4
|
||||
.Loop_neon:
|
||||
////////////////////////////////////////////////////////////////
|
||||
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
|
||||
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
|
||||
// \___________________/
|
||||
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
|
||||
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
|
||||
// \___________________/ \____________________/
|
||||
//
|
||||
// Note that we start with inp[2:3]*r^2. This is because it
|
||||
// doesn't depend on reduction in previous iteration.
|
||||
////////////////////////////////////////////////////////////////
|
||||
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
|
||||
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
|
||||
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
|
||||
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
|
||||
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
|
||||
|
||||
subs $len,$len,#64
|
||||
umull $ACC4,$IN23_0,${R4}[2]
|
||||
csel $in2,$zeros,$in2,lo
|
||||
umull $ACC3,$IN23_0,${R3}[2]
|
||||
umull $ACC2,$IN23_0,${R2}[2]
|
||||
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
|
||||
umull $ACC1,$IN23_0,${R1}[2]
|
||||
ldp x9,x13,[$in2],#48
|
||||
umull $ACC0,$IN23_0,${R0}[2]
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
|
||||
umlal $ACC4,$IN23_1,${R3}[2]
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
umlal $ACC3,$IN23_1,${R2}[2]
|
||||
and x5,x9,#0x03ffffff
|
||||
umlal $ACC2,$IN23_1,${R1}[2]
|
||||
ubfx x6,x8,#26,#26
|
||||
umlal $ACC1,$IN23_1,${R0}[2]
|
||||
ubfx x7,x9,#26,#26
|
||||
umlal $ACC0,$IN23_1,${S4}[2]
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
|
||||
umlal $ACC4,$IN23_2,${R2}[2]
|
||||
extr x8,x12,x8,#52
|
||||
umlal $ACC3,$IN23_2,${R1}[2]
|
||||
extr x9,x13,x9,#52
|
||||
umlal $ACC2,$IN23_2,${R0}[2]
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
umlal $ACC1,$IN23_2,${S4}[2]
|
||||
fmov $IN23_0,x4
|
||||
umlal $ACC0,$IN23_2,${S3}[2]
|
||||
and x8,x8,#0x03ffffff
|
||||
|
||||
umlal $ACC4,$IN23_3,${R1}[2]
|
||||
and x9,x9,#0x03ffffff
|
||||
umlal $ACC3,$IN23_3,${R0}[2]
|
||||
ubfx x10,x12,#14,#26
|
||||
umlal $ACC2,$IN23_3,${S4}[2]
|
||||
ubfx x11,x13,#14,#26
|
||||
umlal $ACC1,$IN23_3,${S3}[2]
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
umlal $ACC0,$IN23_3,${S2}[2]
|
||||
fmov $IN23_1,x6
|
||||
|
||||
add $IN01_2,$IN01_2,$H2
|
||||
add x12,$padbit,x12,lsr#40
|
||||
umlal $ACC4,$IN23_4,${R0}[2]
|
||||
add x13,$padbit,x13,lsr#40
|
||||
umlal $ACC3,$IN23_4,${S4}[2]
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
umlal $ACC2,$IN23_4,${S3}[2]
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
umlal $ACC1,$IN23_4,${S2}[2]
|
||||
fmov $IN23_2,x8
|
||||
umlal $ACC0,$IN23_4,${S1}[2]
|
||||
fmov $IN23_3,x10
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// (hash+inp[0:1])*r^4 and accumulate
|
||||
|
||||
add $IN01_0,$IN01_0,$H0
|
||||
fmov $IN23_4,x12
|
||||
umlal $ACC3,$IN01_2,${R1}[0]
|
||||
ldp x8,x12,[$inp],#16 // inp[0:1]
|
||||
umlal $ACC0,$IN01_2,${S3}[0]
|
||||
ldp x9,x13,[$inp],#48
|
||||
umlal $ACC4,$IN01_2,${R2}[0]
|
||||
umlal $ACC1,$IN01_2,${S4}[0]
|
||||
umlal $ACC2,$IN01_2,${R0}[0]
|
||||
#ifdef __AARCH64EB__
|
||||
rev x8,x8
|
||||
rev x12,x12
|
||||
rev x9,x9
|
||||
rev x13,x13
|
||||
#endif
|
||||
|
||||
add $IN01_1,$IN01_1,$H1
|
||||
umlal $ACC3,$IN01_0,${R3}[0]
|
||||
umlal $ACC4,$IN01_0,${R4}[0]
|
||||
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
|
||||
umlal $ACC2,$IN01_0,${R2}[0]
|
||||
and x5,x9,#0x03ffffff
|
||||
umlal $ACC0,$IN01_0,${R0}[0]
|
||||
ubfx x6,x8,#26,#26
|
||||
umlal $ACC1,$IN01_0,${R1}[0]
|
||||
ubfx x7,x9,#26,#26
|
||||
|
||||
add $IN01_3,$IN01_3,$H3
|
||||
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
|
||||
umlal $ACC3,$IN01_1,${R2}[0]
|
||||
extr x8,x12,x8,#52
|
||||
umlal $ACC4,$IN01_1,${R3}[0]
|
||||
extr x9,x13,x9,#52
|
||||
umlal $ACC0,$IN01_1,${S4}[0]
|
||||
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
|
||||
umlal $ACC2,$IN01_1,${R1}[0]
|
||||
fmov $IN01_0,x4
|
||||
umlal $ACC1,$IN01_1,${R0}[0]
|
||||
and x8,x8,#0x03ffffff
|
||||
|
||||
add $IN01_4,$IN01_4,$H4
|
||||
and x9,x9,#0x03ffffff
|
||||
umlal $ACC3,$IN01_3,${R0}[0]
|
||||
ubfx x10,x12,#14,#26
|
||||
umlal $ACC0,$IN01_3,${S2}[0]
|
||||
ubfx x11,x13,#14,#26
|
||||
umlal $ACC4,$IN01_3,${R1}[0]
|
||||
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
|
||||
umlal $ACC1,$IN01_3,${S3}[0]
|
||||
fmov $IN01_1,x6
|
||||
umlal $ACC2,$IN01_3,${S4}[0]
|
||||
add x12,$padbit,x12,lsr#40
|
||||
|
||||
umlal $ACC3,$IN01_4,${S4}[0]
|
||||
add x13,$padbit,x13,lsr#40
|
||||
umlal $ACC0,$IN01_4,${S1}[0]
|
||||
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
|
||||
umlal $ACC4,$IN01_4,${R0}[0]
|
||||
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
|
||||
umlal $ACC1,$IN01_4,${S2}[0]
|
||||
fmov $IN01_2,x8
|
||||
umlal $ACC2,$IN01_4,${S3}[0]
|
||||
fmov $IN01_3,x10
|
||||
fmov $IN01_4,x12
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
|
||||
// and P. Schwabe
|
||||
//
|
||||
// [see discussion in poly1305-armv4 module]
|
||||
|
||||
ushr $T0.2d,$ACC3,#26
|
||||
xtn $H3,$ACC3
|
||||
ushr $T1.2d,$ACC0,#26
|
||||
and $ACC0,$ACC0,$MASK.2d
|
||||
add $ACC4,$ACC4,$T0.2d // h3 -> h4
|
||||
bic $H3,#0xfc,lsl#24 // &=0x03ffffff
|
||||
add $ACC1,$ACC1,$T1.2d // h0 -> h1
|
||||
|
||||
ushr $T0.2d,$ACC4,#26
|
||||
xtn $H4,$ACC4
|
||||
ushr $T1.2d,$ACC1,#26
|
||||
xtn $H1,$ACC1
|
||||
bic $H4,#0xfc,lsl#24
|
||||
add $ACC2,$ACC2,$T1.2d // h1 -> h2
|
||||
|
||||
add $ACC0,$ACC0,$T0.2d
|
||||
shl $T0.2d,$T0.2d,#2
|
||||
shrn $T1.2s,$ACC2,#26
|
||||
xtn $H2,$ACC2
|
||||
add $ACC0,$ACC0,$T0.2d // h4 -> h0
|
||||
bic $H1,#0xfc,lsl#24
|
||||
add $H3,$H3,$T1.2s // h2 -> h3
|
||||
bic $H2,#0xfc,lsl#24
|
||||
|
||||
shrn $T0.2s,$ACC0,#26
|
||||
xtn $H0,$ACC0
|
||||
ushr $T1.2s,$H3,#26
|
||||
bic $H3,#0xfc,lsl#24
|
||||
bic $H0,#0xfc,lsl#24
|
||||
add $H1,$H1,$T0.2s // h0 -> h1
|
||||
add $H4,$H4,$T1.2s // h3 -> h4
|
||||
|
||||
b.hi .Loop_neon
|
||||
|
||||
.Lskip_loop:
|
||||
dup $IN23_2,${IN23_2}[0]
|
||||
add $IN01_2,$IN01_2,$H2
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
|
||||
|
||||
adds $len,$len,#32
|
||||
b.ne .Long_tail
|
||||
|
||||
dup $IN23_2,${IN01_2}[0]
|
||||
add $IN23_0,$IN01_0,$H0
|
||||
add $IN23_3,$IN01_3,$H3
|
||||
add $IN23_1,$IN01_1,$H1
|
||||
add $IN23_4,$IN01_4,$H4
|
||||
|
||||
.Long_tail:
|
||||
dup $IN23_0,${IN23_0}[0]
|
||||
umull2 $ACC0,$IN23_2,${S3}
|
||||
umull2 $ACC3,$IN23_2,${R1}
|
||||
umull2 $ACC4,$IN23_2,${R2}
|
||||
umull2 $ACC2,$IN23_2,${R0}
|
||||
umull2 $ACC1,$IN23_2,${S4}
|
||||
|
||||
dup $IN23_1,${IN23_1}[0]
|
||||
umlal2 $ACC0,$IN23_0,${R0}
|
||||
umlal2 $ACC2,$IN23_0,${R2}
|
||||
umlal2 $ACC3,$IN23_0,${R3}
|
||||
umlal2 $ACC4,$IN23_0,${R4}
|
||||
umlal2 $ACC1,$IN23_0,${R1}
|
||||
|
||||
dup $IN23_3,${IN23_3}[0]
|
||||
umlal2 $ACC0,$IN23_1,${S4}
|
||||
umlal2 $ACC3,$IN23_1,${R2}
|
||||
umlal2 $ACC2,$IN23_1,${R1}
|
||||
umlal2 $ACC4,$IN23_1,${R3}
|
||||
umlal2 $ACC1,$IN23_1,${R0}
|
||||
|
||||
dup $IN23_4,${IN23_4}[0]
|
||||
umlal2 $ACC3,$IN23_3,${R0}
|
||||
umlal2 $ACC4,$IN23_3,${R1}
|
||||
umlal2 $ACC0,$IN23_3,${S2}
|
||||
umlal2 $ACC1,$IN23_3,${S3}
|
||||
umlal2 $ACC2,$IN23_3,${S4}
|
||||
|
||||
umlal2 $ACC3,$IN23_4,${S4}
|
||||
umlal2 $ACC0,$IN23_4,${S1}
|
||||
umlal2 $ACC4,$IN23_4,${R0}
|
||||
umlal2 $ACC1,$IN23_4,${S2}
|
||||
umlal2 $ACC2,$IN23_4,${S3}
|
||||
|
||||
b.eq .Lshort_tail
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// (hash+inp[0:1])*r^4:r^3 and accumulate
|
||||
|
||||
add $IN01_0,$IN01_0,$H0
|
||||
umlal $ACC3,$IN01_2,${R1}
|
||||
umlal $ACC0,$IN01_2,${S3}
|
||||
umlal $ACC4,$IN01_2,${R2}
|
||||
umlal $ACC1,$IN01_2,${S4}
|
||||
umlal $ACC2,$IN01_2,${R0}
|
||||
|
||||
add $IN01_1,$IN01_1,$H1
|
||||
umlal $ACC3,$IN01_0,${R3}
|
||||
umlal $ACC0,$IN01_0,${R0}
|
||||
umlal $ACC4,$IN01_0,${R4}
|
||||
umlal $ACC1,$IN01_0,${R1}
|
||||
umlal $ACC2,$IN01_0,${R2}
|
||||
|
||||
add $IN01_3,$IN01_3,$H3
|
||||
umlal $ACC3,$IN01_1,${R2}
|
||||
umlal $ACC0,$IN01_1,${S4}
|
||||
umlal $ACC4,$IN01_1,${R3}
|
||||
umlal $ACC1,$IN01_1,${R0}
|
||||
umlal $ACC2,$IN01_1,${R1}
|
||||
|
||||
add $IN01_4,$IN01_4,$H4
|
||||
umlal $ACC3,$IN01_3,${R0}
|
||||
umlal $ACC0,$IN01_3,${S2}
|
||||
umlal $ACC4,$IN01_3,${R1}
|
||||
umlal $ACC1,$IN01_3,${S3}
|
||||
umlal $ACC2,$IN01_3,${S4}
|
||||
|
||||
umlal $ACC3,$IN01_4,${S4}
|
||||
umlal $ACC0,$IN01_4,${S1}
|
||||
umlal $ACC4,$IN01_4,${R0}
|
||||
umlal $ACC1,$IN01_4,${S2}
|
||||
umlal $ACC2,$IN01_4,${S3}
|
||||
|
||||
.Lshort_tail:
|
||||
////////////////////////////////////////////////////////////////
|
||||
// horizontal add
|
||||
|
||||
addp $ACC3,$ACC3,$ACC3
|
||||
ldp d8,d9,[sp,#16] // meet ABI requirements
|
||||
addp $ACC0,$ACC0,$ACC0
|
||||
ldp d10,d11,[sp,#32]
|
||||
addp $ACC4,$ACC4,$ACC4
|
||||
ldp d12,d13,[sp,#48]
|
||||
addp $ACC1,$ACC1,$ACC1
|
||||
ldp d14,d15,[sp,#64]
|
||||
addp $ACC2,$ACC2,$ACC2
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// lazy reduction, but without narrowing
|
||||
|
||||
ushr $T0.2d,$ACC3,#26
|
||||
and $ACC3,$ACC3,$MASK.2d
|
||||
ushr $T1.2d,$ACC0,#26
|
||||
and $ACC0,$ACC0,$MASK.2d
|
||||
|
||||
add $ACC4,$ACC4,$T0.2d // h3 -> h4
|
||||
add $ACC1,$ACC1,$T1.2d // h0 -> h1
|
||||
|
||||
ushr $T0.2d,$ACC4,#26
|
||||
and $ACC4,$ACC4,$MASK.2d
|
||||
ushr $T1.2d,$ACC1,#26
|
||||
and $ACC1,$ACC1,$MASK.2d
|
||||
add $ACC2,$ACC2,$T1.2d // h1 -> h2
|
||||
|
||||
add $ACC0,$ACC0,$T0.2d
|
||||
shl $T0.2d,$T0.2d,#2
|
||||
ushr $T1.2d,$ACC2,#26
|
||||
and $ACC2,$ACC2,$MASK.2d
|
||||
add $ACC0,$ACC0,$T0.2d // h4 -> h0
|
||||
add $ACC3,$ACC3,$T1.2d // h2 -> h3
|
||||
|
||||
ushr $T0.2d,$ACC0,#26
|
||||
and $ACC0,$ACC0,$MASK.2d
|
||||
ushr $T1.2d,$ACC3,#26
|
||||
and $ACC3,$ACC3,$MASK.2d
|
||||
add $ACC1,$ACC1,$T0.2d // h0 -> h1
|
||||
add $ACC4,$ACC4,$T1.2d // h3 -> h4
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// write the result, can be partially reduced
|
||||
|
||||
st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
|
||||
st1 {$ACC4}[0],[$ctx]
|
||||
|
||||
.Lno_data_neon:
|
||||
ldr x29,[sp],#80
|
||||
ret
|
||||
.size poly1305_blocks_neon,.-poly1305_blocks_neon
|
||||
|
||||
.type poly1305_emit_neon,%function
|
||||
.align 5
|
||||
poly1305_emit_neon:
|
||||
ldr $is_base2_26,[$ctx,#24]
|
||||
cbz $is_base2_26,poly1305_emit
|
||||
|
||||
ldp w10,w11,[$ctx] // load hash value base 2^26
|
||||
ldp w12,w13,[$ctx,#8]
|
||||
ldr w14,[$ctx,#16]
|
||||
|
||||
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
|
||||
lsr $h1,x12,#12
|
||||
adds $h0,$h0,x12,lsl#52
|
||||
add $h1,$h1,x13,lsl#14
|
||||
adc $h1,$h1,xzr
|
||||
lsr $h2,x14,#24
|
||||
adds $h1,$h1,x14,lsl#40
|
||||
adc $h2,$h2,xzr // can be partially reduced...
|
||||
|
||||
ldp $t0,$t1,[$nonce] // load nonce
|
||||
|
||||
and $d0,$h2,#-4 // ... so reduce
|
||||
add $d0,$d0,$h2,lsr#2
|
||||
and $h2,$h2,#3
|
||||
adds $h0,$h0,$d0
|
||||
adcs $h1,$h1,xzr
|
||||
adc $h2,$h2,xzr
|
||||
|
||||
adds $d0,$h0,#5 // compare to modulus
|
||||
adcs $d1,$h1,xzr
|
||||
adc $d2,$h2,xzr
|
||||
|
||||
tst $d2,#-4 // see if it's carried/borrowed
|
||||
|
||||
csel $h0,$h0,$d0,eq
|
||||
csel $h1,$h1,$d1,eq
|
||||
|
||||
#ifdef __AARCH64EB__
|
||||
ror $t0,$t0,#32 // flip nonce words
|
||||
ror $t1,$t1,#32
|
||||
#endif
|
||||
adds $h0,$h0,$t0 // accumulate nonce
|
||||
adc $h1,$h1,$t1
|
||||
#ifdef __AARCH64EB__
|
||||
rev $h0,$h0 // flip output bytes
|
||||
rev $h1,$h1
|
||||
#endif
|
||||
stp $h0,$h1,[$mac] // write result
|
||||
|
||||
ret
|
||||
.size poly1305_emit_neon,.-poly1305_emit_neon
|
||||
#endif
|
||||
|
||||
.align 5
|
||||
.Lzeros:
|
||||
.long 0,0,0,0,0,0,0,0
|
||||
#ifndef __KERNEL__
|
||||
.LOPENSSL_armcap_P:
|
||||
#ifdef __ILP32__
|
||||
.long OPENSSL_armcap_P-.
|
||||
#else
|
||||
.quad OPENSSL_armcap_P-.
|
||||
#endif
|
||||
#endif
|
||||
.align 2
|
||||
___
|
||||
|
||||
open SELF,$0;
|
||||
while(<SELF>) {
|
||||
next if (/^#!/);
|
||||
last if (!s/^#/\/\// and !/^$/);
|
||||
print;
|
||||
}
|
||||
close SELF;
|
||||
|
||||
foreach (split("\n",$code)) {
|
||||
s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
|
||||
s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
|
||||
(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
|
||||
(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
|
||||
(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
|
||||
(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
|
||||
(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
|
||||
|
||||
s/\.[124]([sd])\[/.$1\[/;
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
close STDOUT;
|
205
net/wireguard/crypto/zinc/poly1305/poly1305-donna32.c
Normal file
205
net/wireguard/crypto/zinc/poly1305/poly1305-donna32.c
Normal file
@ -0,0 +1,205 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is based in part on Andrew Moon's poly1305-donna, which is in the
|
||||
* public domain.
|
||||
*/
|
||||
|
||||
struct poly1305_internal {
|
||||
u32 h[5];
|
||||
u32 r[5];
|
||||
u32 s[4];
|
||||
};
|
||||
|
||||
static void poly1305_init_generic(void *ctx, const u8 key[16])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
|
||||
st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
|
||||
st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
|
||||
st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
|
||||
st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
|
||||
|
||||
/* s = 5*r */
|
||||
st->s[0] = st->r[1] * 5;
|
||||
st->s[1] = st->r[2] * 5;
|
||||
st->s[2] = st->r[3] * 5;
|
||||
st->s[3] = st->r[4] * 5;
|
||||
|
||||
/* h = 0 */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
st->h[3] = 0;
|
||||
st->h[4] = 0;
|
||||
}
|
||||
|
||||
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
|
||||
const u32 padbit)
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
const u32 hibit = padbit << 24;
|
||||
u32 r0, r1, r2, r3, r4;
|
||||
u32 s1, s2, s3, s4;
|
||||
u32 h0, h1, h2, h3, h4;
|
||||
u64 d0, d1, d2, d3, d4;
|
||||
u32 c;
|
||||
|
||||
r0 = st->r[0];
|
||||
r1 = st->r[1];
|
||||
r2 = st->r[2];
|
||||
r3 = st->r[3];
|
||||
r4 = st->r[4];
|
||||
|
||||
s1 = st->s[0];
|
||||
s2 = st->s[1];
|
||||
s3 = st->s[2];
|
||||
s4 = st->s[3];
|
||||
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
h3 = st->h[3];
|
||||
h4 = st->h[4];
|
||||
|
||||
while (len >= POLY1305_BLOCK_SIZE) {
|
||||
/* h += m[i] */
|
||||
h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
|
||||
h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
|
||||
h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
|
||||
h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
|
||||
h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
|
||||
|
||||
/* h *= r */
|
||||
d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
|
||||
((u64)h2 * s3) + ((u64)h3 * s2) +
|
||||
((u64)h4 * s1);
|
||||
d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
|
||||
((u64)h2 * s4) + ((u64)h3 * s3) +
|
||||
((u64)h4 * s2);
|
||||
d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
|
||||
((u64)h2 * r0) + ((u64)h3 * s4) +
|
||||
((u64)h4 * s3);
|
||||
d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
|
||||
((u64)h2 * r1) + ((u64)h3 * r0) +
|
||||
((u64)h4 * s4);
|
||||
d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
|
||||
((u64)h2 * r2) + ((u64)h3 * r1) +
|
||||
((u64)h4 * r0);
|
||||
|
||||
/* (partial) h %= p */
|
||||
c = (u32)(d0 >> 26);
|
||||
h0 = (u32)d0 & 0x3ffffff;
|
||||
d1 += c;
|
||||
c = (u32)(d1 >> 26);
|
||||
h1 = (u32)d1 & 0x3ffffff;
|
||||
d2 += c;
|
||||
c = (u32)(d2 >> 26);
|
||||
h2 = (u32)d2 & 0x3ffffff;
|
||||
d3 += c;
|
||||
c = (u32)(d3 >> 26);
|
||||
h3 = (u32)d3 & 0x3ffffff;
|
||||
d4 += c;
|
||||
c = (u32)(d4 >> 26);
|
||||
h4 = (u32)d4 & 0x3ffffff;
|
||||
h0 += c * 5;
|
||||
c = (h0 >> 26);
|
||||
h0 = h0 & 0x3ffffff;
|
||||
h1 += c;
|
||||
|
||||
input += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
st->h[0] = h0;
|
||||
st->h[1] = h1;
|
||||
st->h[2] = h2;
|
||||
st->h[3] = h3;
|
||||
st->h[4] = h4;
|
||||
}
|
||||
|
||||
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
u32 h0, h1, h2, h3, h4, c;
|
||||
u32 g0, g1, g2, g3, g4;
|
||||
u64 f;
|
||||
u32 mask;
|
||||
|
||||
/* fully carry h */
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
h3 = st->h[3];
|
||||
h4 = st->h[4];
|
||||
|
||||
c = h1 >> 26;
|
||||
h1 = h1 & 0x3ffffff;
|
||||
h2 += c;
|
||||
c = h2 >> 26;
|
||||
h2 = h2 & 0x3ffffff;
|
||||
h3 += c;
|
||||
c = h3 >> 26;
|
||||
h3 = h3 & 0x3ffffff;
|
||||
h4 += c;
|
||||
c = h4 >> 26;
|
||||
h4 = h4 & 0x3ffffff;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 26;
|
||||
h0 = h0 & 0x3ffffff;
|
||||
h1 += c;
|
||||
|
||||
/* compute h + -p */
|
||||
g0 = h0 + 5;
|
||||
c = g0 >> 26;
|
||||
g0 &= 0x3ffffff;
|
||||
g1 = h1 + c;
|
||||
c = g1 >> 26;
|
||||
g1 &= 0x3ffffff;
|
||||
g2 = h2 + c;
|
||||
c = g2 >> 26;
|
||||
g2 &= 0x3ffffff;
|
||||
g3 = h3 + c;
|
||||
c = g3 >> 26;
|
||||
g3 &= 0x3ffffff;
|
||||
g4 = h4 + c - (1UL << 26);
|
||||
|
||||
/* select h if h < p, or h + -p if h >= p */
|
||||
mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
|
||||
g0 &= mask;
|
||||
g1 &= mask;
|
||||
g2 &= mask;
|
||||
g3 &= mask;
|
||||
g4 &= mask;
|
||||
mask = ~mask;
|
||||
|
||||
h0 = (h0 & mask) | g0;
|
||||
h1 = (h1 & mask) | g1;
|
||||
h2 = (h2 & mask) | g2;
|
||||
h3 = (h3 & mask) | g3;
|
||||
h4 = (h4 & mask) | g4;
|
||||
|
||||
/* h = h % (2^128) */
|
||||
h0 = ((h0) | (h1 << 26)) & 0xffffffff;
|
||||
h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
|
||||
h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
|
||||
h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
|
||||
|
||||
/* mac = (h + nonce) % (2^128) */
|
||||
f = (u64)h0 + nonce[0];
|
||||
h0 = (u32)f;
|
||||
f = (u64)h1 + nonce[1] + (f >> 32);
|
||||
h1 = (u32)f;
|
||||
f = (u64)h2 + nonce[2] + (f >> 32);
|
||||
h2 = (u32)f;
|
||||
f = (u64)h3 + nonce[3] + (f >> 32);
|
||||
h3 = (u32)f;
|
||||
|
||||
put_unaligned_le32(h0, &mac[0]);
|
||||
put_unaligned_le32(h1, &mac[4]);
|
||||
put_unaligned_le32(h2, &mac[8]);
|
||||
put_unaligned_le32(h3, &mac[12]);
|
||||
}
|
182
net/wireguard/crypto/zinc/poly1305/poly1305-donna64.c
Normal file
182
net/wireguard/crypto/zinc/poly1305/poly1305-donna64.c
Normal file
@ -0,0 +1,182 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This is based in part on Andrew Moon's poly1305-donna, which is in the
|
||||
* public domain.
|
||||
*/
|
||||
|
||||
typedef __uint128_t u128;
|
||||
|
||||
struct poly1305_internal {
|
||||
u64 r[3];
|
||||
u64 h[3];
|
||||
u64 s[2];
|
||||
};
|
||||
|
||||
static void poly1305_init_generic(void *ctx, const u8 key[16])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
u64 t0, t1;
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
t0 = get_unaligned_le64(&key[0]);
|
||||
t1 = get_unaligned_le64(&key[8]);
|
||||
|
||||
st->r[0] = t0 & 0xffc0fffffffULL;
|
||||
st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
|
||||
st->r[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
|
||||
|
||||
/* s = 20*r */
|
||||
st->s[0] = st->r[1] * 20;
|
||||
st->s[1] = st->r[2] * 20;
|
||||
|
||||
/* h = 0 */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
}
|
||||
|
||||
static void poly1305_blocks_generic(void *ctx, const u8 *input, size_t len,
|
||||
const u32 padbit)
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
const u64 hibit = ((u64)padbit) << 40;
|
||||
u64 r0, r1, r2;
|
||||
u64 s1, s2;
|
||||
u64 h0, h1, h2;
|
||||
u64 c;
|
||||
u128 d0, d1, d2, d;
|
||||
|
||||
r0 = st->r[0];
|
||||
r1 = st->r[1];
|
||||
r2 = st->r[2];
|
||||
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
|
||||
s1 = st->s[0];
|
||||
s2 = st->s[1];
|
||||
|
||||
while (len >= POLY1305_BLOCK_SIZE) {
|
||||
u64 t0, t1;
|
||||
|
||||
/* h += m[i] */
|
||||
t0 = get_unaligned_le64(&input[0]);
|
||||
t1 = get_unaligned_le64(&input[8]);
|
||||
|
||||
h0 += t0 & 0xfffffffffffULL;
|
||||
h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
|
||||
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit;
|
||||
|
||||
/* h *= r */
|
||||
d0 = (u128)h0 * r0;
|
||||
d = (u128)h1 * s2;
|
||||
d0 += d;
|
||||
d = (u128)h2 * s1;
|
||||
d0 += d;
|
||||
d1 = (u128)h0 * r1;
|
||||
d = (u128)h1 * r0;
|
||||
d1 += d;
|
||||
d = (u128)h2 * s2;
|
||||
d1 += d;
|
||||
d2 = (u128)h0 * r2;
|
||||
d = (u128)h1 * r1;
|
||||
d2 += d;
|
||||
d = (u128)h2 * r0;
|
||||
d2 += d;
|
||||
|
||||
/* (partial) h %= p */
|
||||
c = (u64)(d0 >> 44);
|
||||
h0 = (u64)d0 & 0xfffffffffffULL;
|
||||
d1 += c;
|
||||
c = (u64)(d1 >> 44);
|
||||
h1 = (u64)d1 & 0xfffffffffffULL;
|
||||
d2 += c;
|
||||
c = (u64)(d2 >> 42);
|
||||
h2 = (u64)d2 & 0x3ffffffffffULL;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 44;
|
||||
h0 = h0 & 0xfffffffffffULL;
|
||||
h1 += c;
|
||||
|
||||
input += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
st->h[0] = h0;
|
||||
st->h[1] = h1;
|
||||
st->h[2] = h2;
|
||||
}
|
||||
|
||||
static void poly1305_emit_generic(void *ctx, u8 mac[16], const u32 nonce[4])
|
||||
{
|
||||
struct poly1305_internal *st = (struct poly1305_internal *)ctx;
|
||||
u64 h0, h1, h2, c;
|
||||
u64 g0, g1, g2;
|
||||
u64 t0, t1;
|
||||
|
||||
/* fully carry h */
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
|
||||
c = h1 >> 44;
|
||||
h1 &= 0xfffffffffffULL;
|
||||
h2 += c;
|
||||
c = h2 >> 42;
|
||||
h2 &= 0x3ffffffffffULL;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 44;
|
||||
h0 &= 0xfffffffffffULL;
|
||||
h1 += c;
|
||||
c = h1 >> 44;
|
||||
h1 &= 0xfffffffffffULL;
|
||||
h2 += c;
|
||||
c = h2 >> 42;
|
||||
h2 &= 0x3ffffffffffULL;
|
||||
h0 += c * 5;
|
||||
c = h0 >> 44;
|
||||
h0 &= 0xfffffffffffULL;
|
||||
h1 += c;
|
||||
|
||||
/* compute h + -p */
|
||||
g0 = h0 + 5;
|
||||
c = g0 >> 44;
|
||||
g0 &= 0xfffffffffffULL;
|
||||
g1 = h1 + c;
|
||||
c = g1 >> 44;
|
||||
g1 &= 0xfffffffffffULL;
|
||||
g2 = h2 + c - (1ULL << 42);
|
||||
|
||||
/* select h if h < p, or h + -p if h >= p */
|
||||
c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
|
||||
g0 &= c;
|
||||
g1 &= c;
|
||||
g2 &= c;
|
||||
c = ~c;
|
||||
h0 = (h0 & c) | g0;
|
||||
h1 = (h1 & c) | g1;
|
||||
h2 = (h2 & c) | g2;
|
||||
|
||||
/* h = (h + nonce) */
|
||||
t0 = ((u64)nonce[1] << 32) | nonce[0];
|
||||
t1 = ((u64)nonce[3] << 32) | nonce[2];
|
||||
|
||||
h0 += t0 & 0xfffffffffffULL;
|
||||
c = h0 >> 44;
|
||||
h0 &= 0xfffffffffffULL;
|
||||
h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
|
||||
c = h1 >> 44;
|
||||
h1 &= 0xfffffffffffULL;
|
||||
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
|
||||
h2 &= 0x3ffffffffffULL;
|
||||
|
||||
/* mac = h % (2^128) */
|
||||
h0 = h0 | (h1 << 44);
|
||||
h1 = (h1 >> 20) | (h2 << 24);
|
||||
|
||||
put_unaligned_le64(h0, &mac[0]);
|
||||
put_unaligned_le64(h1, &mac[8]);
|
||||
}
|
37
net/wireguard/crypto/zinc/poly1305/poly1305-mips-glue.c
Normal file
37
net/wireguard/crypto/zinc/poly1305/poly1305-mips-glue.c
Normal file
@ -0,0 +1,37 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
|
||||
asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
|
||||
|
||||
static bool *const poly1305_nobs[] __initconst = { };
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
poly1305_init_mips(ctx, key);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
poly1305_blocks_mips(ctx, inp, len, padbit);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
poly1305_emit_mips(ctx, mac, nonce);
|
||||
return true;
|
||||
}
|
407
net/wireguard/crypto/zinc/poly1305/poly1305-mips.S
Normal file
407
net/wireguard/crypto/zinc/poly1305/poly1305-mips.S
Normal file
@ -0,0 +1,407 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com> All Rights Reserved.
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define MSB 0
|
||||
#define LSB 3
|
||||
#else
|
||||
#define MSB 3
|
||||
#define LSB 0
|
||||
#endif
|
||||
|
||||
#define POLY1305_BLOCK_SIZE 16
|
||||
.text
|
||||
#define H0 $t0
|
||||
#define H1 $t1
|
||||
#define H2 $t2
|
||||
#define H3 $t3
|
||||
#define H4 $t4
|
||||
|
||||
#define R0 $t5
|
||||
#define R1 $t6
|
||||
#define R2 $t7
|
||||
#define R3 $t8
|
||||
|
||||
#define O0 $s0
|
||||
#define O1 $s4
|
||||
#define O2 $v1
|
||||
#define O3 $t9
|
||||
#define O4 $s5
|
||||
|
||||
#define S1 $s1
|
||||
#define S2 $s2
|
||||
#define S3 $s3
|
||||
|
||||
#define SC $at
|
||||
#define CA $v0
|
||||
|
||||
/* Input arguments */
|
||||
#define poly $a0
|
||||
#define src $a1
|
||||
#define srclen $a2
|
||||
#define hibit $a3
|
||||
|
||||
/* Location in the opaque buffer
|
||||
* R[0..3], CA, H[0..4]
|
||||
*/
|
||||
#define PTR_POLY1305_R(n) ( 0 + (n*4)) ## ($a0)
|
||||
#define PTR_POLY1305_CA (16 ) ## ($a0)
|
||||
#define PTR_POLY1305_H(n) (20 + (n*4)) ## ($a0)
|
||||
|
||||
#define POLY1305_BLOCK_SIZE 16
|
||||
#define POLY1305_STACK_SIZE 32
|
||||
|
||||
.set noat
|
||||
.align 4
|
||||
.globl poly1305_blocks_mips
|
||||
.ent poly1305_blocks_mips
|
||||
poly1305_blocks_mips:
|
||||
.frame $sp, POLY1305_STACK_SIZE, $ra
|
||||
/* srclen &= 0xFFFFFFF0 */
|
||||
ins srclen, $zero, 0, 4
|
||||
|
||||
addiu $sp, -(POLY1305_STACK_SIZE)
|
||||
|
||||
/* check srclen >= 16 bytes */
|
||||
beqz srclen, .Lpoly1305_blocks_mips_end
|
||||
|
||||
/* Calculate last round based on src address pointer.
|
||||
* last round src ptr (srclen) = src + (srclen & 0xFFFFFFF0)
|
||||
*/
|
||||
addu srclen, src
|
||||
|
||||
lw R0, PTR_POLY1305_R(0)
|
||||
lw R1, PTR_POLY1305_R(1)
|
||||
lw R2, PTR_POLY1305_R(2)
|
||||
lw R3, PTR_POLY1305_R(3)
|
||||
|
||||
/* store the used save registers. */
|
||||
sw $s0, 0($sp)
|
||||
sw $s1, 4($sp)
|
||||
sw $s2, 8($sp)
|
||||
sw $s3, 12($sp)
|
||||
sw $s4, 16($sp)
|
||||
sw $s5, 20($sp)
|
||||
|
||||
/* load Hx and Carry */
|
||||
lw CA, PTR_POLY1305_CA
|
||||
lw H0, PTR_POLY1305_H(0)
|
||||
lw H1, PTR_POLY1305_H(1)
|
||||
lw H2, PTR_POLY1305_H(2)
|
||||
lw H3, PTR_POLY1305_H(3)
|
||||
lw H4, PTR_POLY1305_H(4)
|
||||
|
||||
/* Sx = Rx + (Rx >> 2) */
|
||||
srl S1, R1, 2
|
||||
srl S2, R2, 2
|
||||
srl S3, R3, 2
|
||||
addu S1, R1
|
||||
addu S2, R2
|
||||
addu S3, R3
|
||||
|
||||
addiu SC, $zero, 1
|
||||
|
||||
.Lpoly1305_loop:
|
||||
lwl O0, 0+MSB(src)
|
||||
lwl O1, 4+MSB(src)
|
||||
lwl O2, 8+MSB(src)
|
||||
lwl O3,12+MSB(src)
|
||||
lwr O0, 0+LSB(src)
|
||||
lwr O1, 4+LSB(src)
|
||||
lwr O2, 8+LSB(src)
|
||||
lwr O3,12+LSB(src)
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
wsbh O0
|
||||
wsbh O1
|
||||
wsbh O2
|
||||
wsbh O3
|
||||
rotr O0, 16
|
||||
rotr O1, 16
|
||||
rotr O2, 16
|
||||
rotr O3, 16
|
||||
#endif
|
||||
|
||||
/* h0 = (u32)(d0 = (u64)h0 + inp[0] + c 'Carry_previous cycle'); */
|
||||
addu H0, CA
|
||||
sltu CA, H0, CA
|
||||
addu O0, H0
|
||||
sltu H0, O0, H0
|
||||
addu CA, H0
|
||||
|
||||
/* h1 = (u32)(d1 = (u64)h1 + (d0 >> 32) + inp[4]); */
|
||||
addu H1, CA
|
||||
sltu CA, H1, CA
|
||||
addu O1, H1
|
||||
sltu H1, O1, H1
|
||||
addu CA, H1
|
||||
|
||||
/* h2 = (u32)(d2 = (u64)h2 + (d1 >> 32) + inp[8]); */
|
||||
addu H2, CA
|
||||
sltu CA, H2, CA
|
||||
addu O2, H2
|
||||
sltu H2, O2, H2
|
||||
addu CA, H2
|
||||
|
||||
/* h3 = (u32)(d3 = (u64)h3 + (d2 >> 32) + inp[12]); */
|
||||
addu H3, CA
|
||||
sltu CA, H3, CA
|
||||
addu O3, H3
|
||||
sltu H3, O3, H3
|
||||
addu CA, H3
|
||||
|
||||
/* h4 += (u32)(d3 >> 32) + padbit; */
|
||||
addu H4, hibit
|
||||
addu O4, H4, CA
|
||||
|
||||
/* D0 */
|
||||
multu O0, R0
|
||||
maddu O1, S3
|
||||
maddu O2, S2
|
||||
maddu O3, S1
|
||||
mfhi CA
|
||||
mflo H0
|
||||
|
||||
/* D1 */
|
||||
multu O0, R1
|
||||
maddu O1, R0
|
||||
maddu O2, S3
|
||||
maddu O3, S2
|
||||
maddu O4, S1
|
||||
maddu CA, SC
|
||||
mfhi CA
|
||||
mflo H1
|
||||
|
||||
/* D2 */
|
||||
multu O0, R2
|
||||
maddu O1, R1
|
||||
maddu O2, R0
|
||||
maddu O3, S3
|
||||
maddu O4, S2
|
||||
maddu CA, SC
|
||||
mfhi CA
|
||||
mflo H2
|
||||
|
||||
/* D4 */
|
||||
mul H4, O4, R0
|
||||
|
||||
/* D3 */
|
||||
multu O0, R3
|
||||
maddu O1, R2
|
||||
maddu O2, R1
|
||||
maddu O3, R0
|
||||
maddu O4, S3
|
||||
maddu CA, SC
|
||||
mfhi CA
|
||||
mflo H3
|
||||
|
||||
addiu src, POLY1305_BLOCK_SIZE
|
||||
|
||||
/* h4 += (u32)(d3 >> 32); */
|
||||
addu O4, H4, CA
|
||||
/* h4 &= 3 */
|
||||
andi H4, O4, 3
|
||||
/* c = (h4 >> 2) + (h4 & ~3U); */
|
||||
srl CA, O4, 2
|
||||
ins O4, $zero, 0, 2
|
||||
|
||||
addu CA, O4
|
||||
|
||||
/* able to do a 16 byte block. */
|
||||
bne src, srclen, .Lpoly1305_loop
|
||||
|
||||
/* restore the used save registers. */
|
||||
lw $s0, 0($sp)
|
||||
lw $s1, 4($sp)
|
||||
lw $s2, 8($sp)
|
||||
lw $s3, 12($sp)
|
||||
lw $s4, 16($sp)
|
||||
lw $s5, 20($sp)
|
||||
|
||||
/* store Hx and Carry */
|
||||
sw CA, PTR_POLY1305_CA
|
||||
sw H0, PTR_POLY1305_H(0)
|
||||
sw H1, PTR_POLY1305_H(1)
|
||||
sw H2, PTR_POLY1305_H(2)
|
||||
sw H3, PTR_POLY1305_H(3)
|
||||
sw H4, PTR_POLY1305_H(4)
|
||||
|
||||
.Lpoly1305_blocks_mips_end:
|
||||
addiu $sp, POLY1305_STACK_SIZE
|
||||
|
||||
/* Jump Back */
|
||||
jr $ra
|
||||
.end poly1305_blocks_mips
|
||||
.set at
|
||||
|
||||
/* Input arguments CTX=$a0, MAC=$a1, NONCE=$a2 */
|
||||
#define MAC $a1
|
||||
#define NONCE $a2
|
||||
|
||||
#define G0 $t5
|
||||
#define G1 $t6
|
||||
#define G2 $t7
|
||||
#define G3 $t8
|
||||
#define G4 $t9
|
||||
|
||||
.set noat
|
||||
.align 4
|
||||
.globl poly1305_emit_mips
|
||||
.ent poly1305_emit_mips
|
||||
poly1305_emit_mips:
|
||||
/* load Hx and Carry */
|
||||
lw CA, PTR_POLY1305_CA
|
||||
lw H0, PTR_POLY1305_H(0)
|
||||
lw H1, PTR_POLY1305_H(1)
|
||||
lw H2, PTR_POLY1305_H(2)
|
||||
lw H3, PTR_POLY1305_H(3)
|
||||
lw H4, PTR_POLY1305_H(4)
|
||||
|
||||
/* Add left over carry */
|
||||
addu H0, CA
|
||||
sltu CA, H0, CA
|
||||
addu H1, CA
|
||||
sltu CA, H1, CA
|
||||
addu H2, CA
|
||||
sltu CA, H2, CA
|
||||
addu H3, CA
|
||||
sltu CA, H3, CA
|
||||
addu H4, CA
|
||||
|
||||
/* compare to modulus by computing h + -p */
|
||||
addiu G0, H0, 5
|
||||
sltu CA, G0, H0
|
||||
addu G1, H1, CA
|
||||
sltu CA, G1, H1
|
||||
addu G2, H2, CA
|
||||
sltu CA, G2, H2
|
||||
addu G3, H3, CA
|
||||
sltu CA, G3, H3
|
||||
addu G4, H4, CA
|
||||
|
||||
srl SC, G4, 2
|
||||
|
||||
/* if there was carry into 131st bit, h3:h0 = g3:g0 */
|
||||
movn H0, G0, SC
|
||||
movn H1, G1, SC
|
||||
movn H2, G2, SC
|
||||
movn H3, G3, SC
|
||||
|
||||
lwl G0, 0+MSB(NONCE)
|
||||
lwl G1, 4+MSB(NONCE)
|
||||
lwl G2, 8+MSB(NONCE)
|
||||
lwl G3,12+MSB(NONCE)
|
||||
lwr G0, 0+LSB(NONCE)
|
||||
lwr G1, 4+LSB(NONCE)
|
||||
lwr G2, 8+LSB(NONCE)
|
||||
lwr G3,12+LSB(NONCE)
|
||||
|
||||
/* mac = (h + nonce) % (2^128) */
|
||||
addu H0, G0
|
||||
sltu CA, H0, G0
|
||||
|
||||
/* H1 */
|
||||
addu H1, CA
|
||||
sltu CA, H1, CA
|
||||
addu H1, G1
|
||||
sltu G1, H1, G1
|
||||
addu CA, G1
|
||||
|
||||
/* H2 */
|
||||
addu H2, CA
|
||||
sltu CA, H2, CA
|
||||
addu H2, G2
|
||||
sltu G2, H2, G2
|
||||
addu CA, G2
|
||||
|
||||
/* H3 */
|
||||
addu H3, CA
|
||||
addu H3, G3
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
wsbh H0
|
||||
wsbh H1
|
||||
wsbh H2
|
||||
wsbh H3
|
||||
rotr H0, 16
|
||||
rotr H1, 16
|
||||
rotr H2, 16
|
||||
rotr H3, 16
|
||||
#endif
|
||||
|
||||
/* store MAC */
|
||||
swl H0, 0+MSB(MAC)
|
||||
swl H1, 4+MSB(MAC)
|
||||
swl H2, 8+MSB(MAC)
|
||||
swl H3,12+MSB(MAC)
|
||||
swr H0, 0+LSB(MAC)
|
||||
swr H1, 4+LSB(MAC)
|
||||
swr H2, 8+LSB(MAC)
|
||||
swr H3,12+LSB(MAC)
|
||||
|
||||
jr $ra
|
||||
.end poly1305_emit_mips
|
||||
|
||||
#define PR0 $t0
|
||||
#define PR1 $t1
|
||||
#define PR2 $t2
|
||||
#define PR3 $t3
|
||||
#define PT0 $t4
|
||||
|
||||
/* Input arguments CTX=$a0, KEY=$a1 */
|
||||
|
||||
.align 4
|
||||
.globl poly1305_init_mips
|
||||
.ent poly1305_init_mips
|
||||
poly1305_init_mips:
|
||||
lwl PR0, 0+MSB($a1)
|
||||
lwl PR1, 4+MSB($a1)
|
||||
lwl PR2, 8+MSB($a1)
|
||||
lwl PR3,12+MSB($a1)
|
||||
lwr PR0, 0+LSB($a1)
|
||||
lwr PR1, 4+LSB($a1)
|
||||
lwr PR2, 8+LSB($a1)
|
||||
lwr PR3,12+LSB($a1)
|
||||
|
||||
/* store Hx and Carry */
|
||||
sw $zero, PTR_POLY1305_CA
|
||||
sw $zero, PTR_POLY1305_H(0)
|
||||
sw $zero, PTR_POLY1305_H(1)
|
||||
sw $zero, PTR_POLY1305_H(2)
|
||||
sw $zero, PTR_POLY1305_H(3)
|
||||
sw $zero, PTR_POLY1305_H(4)
|
||||
|
||||
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
wsbh PR0
|
||||
wsbh PR1
|
||||
wsbh PR2
|
||||
wsbh PR3
|
||||
rotr PR0, 16
|
||||
rotr PR1, 16
|
||||
rotr PR2, 16
|
||||
rotr PR3, 16
|
||||
#endif
|
||||
|
||||
lui PT0, 0x0FFF
|
||||
ori PT0, 0xFFFC
|
||||
|
||||
/* AND 0x0fffffff; */
|
||||
ext PR0, PR0, 0, (32-4)
|
||||
|
||||
/* AND 0x0ffffffc; */
|
||||
and PR1, PT0
|
||||
and PR2, PT0
|
||||
and PR3, PT0
|
||||
|
||||
/* store Rx */
|
||||
sw PR0, PTR_POLY1305_R(0)
|
||||
sw PR1, PTR_POLY1305_R(1)
|
||||
sw PR2, PTR_POLY1305_R(2)
|
||||
sw PR3, PTR_POLY1305_R(3)
|
||||
|
||||
/* Jump Back */
|
||||
jr $ra
|
||||
.end poly1305_init_mips
|
467
net/wireguard/crypto/zinc/poly1305/poly1305-mips64.pl
Normal file
467
net/wireguard/crypto/zinc/poly1305/poly1305-mips64.pl
Normal file
@ -0,0 +1,467 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
#
|
||||
# This code is taken from the OpenSSL project but the author, Andy Polyakov,
|
||||
# has relicensed it under the licenses specified in the SPDX header above.
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
# ====================================================================
|
||||
#
|
||||
# Poly1305 hash for MIPS64.
|
||||
#
|
||||
# May 2016
|
||||
#
|
||||
# Numbers are cycles per processed byte with poly1305_blocks alone.
|
||||
#
|
||||
# IALU/gcc
|
||||
# R1x000 5.64/+120% (big-endian)
|
||||
# Octeon II 3.80/+280% (little-endian)
|
||||
|
||||
######################################################################
|
||||
# There is a number of MIPS ABI in use, O32 and N32/64 are most
|
||||
# widely used. Then there is a new contender: NUBI. It appears that if
|
||||
# one picks the latter, it's possible to arrange code in ABI neutral
|
||||
# manner. Therefore let's stick to NUBI register layout:
|
||||
#
|
||||
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
|
||||
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
||||
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
|
||||
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
|
||||
#
|
||||
# The return value is placed in $a0. Following coding rules facilitate
|
||||
# interoperability:
|
||||
#
|
||||
# - never ever touch $tp, "thread pointer", former $gp [o32 can be
|
||||
# excluded from the rule, because it's specified volatile];
|
||||
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
|
||||
# old code];
|
||||
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
|
||||
#
|
||||
# For reference here is register layout for N32/64 MIPS ABIs:
|
||||
#
|
||||
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
|
||||
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
|
||||
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
|
||||
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
|
||||
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
|
||||
#
|
||||
# <appro@openssl.org>
|
||||
#
|
||||
######################################################################
|
||||
|
||||
$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
|
||||
|
||||
die "MIPS64 only" unless ($flavour =~ /64|n32/i);
|
||||
|
||||
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
|
||||
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
|
||||
|
||||
($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
|
||||
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
|
||||
|
||||
$code.=<<___;
|
||||
#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
|
||||
defined(_MIPS_ARCH_MIPS64R6)) \\
|
||||
&& !defined(_MIPS_ARCH_MIPS64R2)
|
||||
# define _MIPS_ARCH_MIPS64R2
|
||||
#endif
|
||||
|
||||
#if defined(_MIPS_ARCH_MIPS64R6)
|
||||
# define dmultu(rs,rt)
|
||||
# define mflo(rd,rs,rt) dmulu rd,rs,rt
|
||||
# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
|
||||
#else
|
||||
# define dmultu(rs,rt) dmultu rs,rt
|
||||
# define mflo(rd,rs,rt) mflo rd
|
||||
# define mfhi(rd,rs,rt) mfhi rd
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL__
|
||||
# define poly1305_init poly1305_init_mips
|
||||
# define poly1305_blocks poly1305_blocks_mips
|
||||
# define poly1305_emit poly1305_emit_mips
|
||||
#endif
|
||||
|
||||
#if defined(__MIPSEB__) && !defined(MIPSEB)
|
||||
# define MIPSEB
|
||||
#endif
|
||||
|
||||
#ifdef MIPSEB
|
||||
# define MSB 0
|
||||
# define LSB 7
|
||||
#else
|
||||
# define MSB 7
|
||||
# define LSB 0
|
||||
#endif
|
||||
|
||||
.text
|
||||
.set noat
|
||||
.set noreorder
|
||||
|
||||
.align 5
|
||||
.globl poly1305_init
|
||||
.ent poly1305_init
|
||||
poly1305_init:
|
||||
.frame $sp,0,$ra
|
||||
.set reorder
|
||||
|
||||
sd $zero,0($ctx)
|
||||
sd $zero,8($ctx)
|
||||
sd $zero,16($ctx)
|
||||
|
||||
beqz $inp,.Lno_key
|
||||
|
||||
#if defined(_MIPS_ARCH_MIPS64R6)
|
||||
ld $in0,0($inp)
|
||||
ld $in1,8($inp)
|
||||
#else
|
||||
ldl $in0,0+MSB($inp)
|
||||
ldl $in1,8+MSB($inp)
|
||||
ldr $in0,0+LSB($inp)
|
||||
ldr $in1,8+LSB($inp)
|
||||
#endif
|
||||
#ifdef MIPSEB
|
||||
# if defined(_MIPS_ARCH_MIPS64R2)
|
||||
dsbh $in0,$in0 # byte swap
|
||||
dsbh $in1,$in1
|
||||
dshd $in0,$in0
|
||||
dshd $in1,$in1
|
||||
# else
|
||||
ori $tmp0,$zero,0xFF
|
||||
dsll $tmp2,$tmp0,32
|
||||
or $tmp0,$tmp2 # 0x000000FF000000FF
|
||||
|
||||
and $tmp1,$in0,$tmp0 # byte swap
|
||||
and $tmp3,$in1,$tmp0
|
||||
dsrl $tmp2,$in0,24
|
||||
dsrl $tmp4,$in1,24
|
||||
dsll $tmp1,24
|
||||
dsll $tmp3,24
|
||||
and $tmp2,$tmp0
|
||||
and $tmp4,$tmp0
|
||||
dsll $tmp0,8 # 0x0000FF000000FF00
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
and $tmp2,$in0,$tmp0
|
||||
and $tmp4,$in1,$tmp0
|
||||
dsrl $in0,8
|
||||
dsrl $in1,8
|
||||
dsll $tmp2,8
|
||||
dsll $tmp4,8
|
||||
and $in0,$tmp0
|
||||
and $in1,$tmp0
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
dsrl $tmp1,$in0,32
|
||||
dsrl $tmp3,$in1,32
|
||||
dsll $in0,32
|
||||
dsll $in1,32
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
# endif
|
||||
#endif
|
||||
li $tmp0,1
|
||||
dsll $tmp0,32
|
||||
daddiu $tmp0,-63
|
||||
dsll $tmp0,28
|
||||
daddiu $tmp0,-1 # 0ffffffc0fffffff
|
||||
|
||||
and $in0,$tmp0
|
||||
daddiu $tmp0,-3 # 0ffffffc0ffffffc
|
||||
and $in1,$tmp0
|
||||
|
||||
sd $in0,24($ctx)
|
||||
dsrl $tmp0,$in1,2
|
||||
sd $in1,32($ctx)
|
||||
daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
|
||||
sd $tmp0,40($ctx)
|
||||
|
||||
.Lno_key:
|
||||
li $v0,0 # return 0
|
||||
jr $ra
|
||||
.end poly1305_init
|
||||
___
|
||||
{
|
||||
my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
|
||||
($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
.globl poly1305_blocks
|
||||
.ent poly1305_blocks
|
||||
poly1305_blocks:
|
||||
.set noreorder
|
||||
dsrl $len,4 # number of complete blocks
|
||||
bnez $len,poly1305_blocks_internal
|
||||
nop
|
||||
jr $ra
|
||||
nop
|
||||
.end poly1305_blocks
|
||||
|
||||
.align 5
|
||||
.ent poly1305_blocks_internal
|
||||
poly1305_blocks_internal:
|
||||
.frame $sp,6*8,$ra
|
||||
.mask $SAVED_REGS_MASK,-8
|
||||
.set noreorder
|
||||
dsubu $sp,6*8
|
||||
sd $s5,40($sp)
|
||||
sd $s4,32($sp)
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
|
||||
sd $s3,24($sp)
|
||||
sd $s2,16($sp)
|
||||
sd $s1,8($sp)
|
||||
sd $s0,0($sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
|
||||
ld $h0,0($ctx) # load hash value
|
||||
ld $h1,8($ctx)
|
||||
ld $h2,16($ctx)
|
||||
|
||||
ld $r0,24($ctx) # load key
|
||||
ld $r1,32($ctx)
|
||||
ld $s1,40($ctx)
|
||||
|
||||
.Loop:
|
||||
#if defined(_MIPS_ARCH_MIPS64R6)
|
||||
ld $in0,0($inp) # load input
|
||||
ld $in1,8($inp)
|
||||
#else
|
||||
ldl $in0,0+MSB($inp) # load input
|
||||
ldl $in1,8+MSB($inp)
|
||||
ldr $in0,0+LSB($inp)
|
||||
ldr $in1,8+LSB($inp)
|
||||
#endif
|
||||
daddiu $len,-1
|
||||
daddiu $inp,16
|
||||
#ifdef MIPSEB
|
||||
# if defined(_MIPS_ARCH_MIPS64R2)
|
||||
dsbh $in0,$in0 # byte swap
|
||||
dsbh $in1,$in1
|
||||
dshd $in0,$in0
|
||||
dshd $in1,$in1
|
||||
# else
|
||||
ori $tmp0,$zero,0xFF
|
||||
dsll $tmp2,$tmp0,32
|
||||
or $tmp0,$tmp2 # 0x000000FF000000FF
|
||||
|
||||
and $tmp1,$in0,$tmp0 # byte swap
|
||||
and $tmp3,$in1,$tmp0
|
||||
dsrl $tmp2,$in0,24
|
||||
dsrl $tmp4,$in1,24
|
||||
dsll $tmp1,24
|
||||
dsll $tmp3,24
|
||||
and $tmp2,$tmp0
|
||||
and $tmp4,$tmp0
|
||||
dsll $tmp0,8 # 0x0000FF000000FF00
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
and $tmp2,$in0,$tmp0
|
||||
and $tmp4,$in1,$tmp0
|
||||
dsrl $in0,8
|
||||
dsrl $in1,8
|
||||
dsll $tmp2,8
|
||||
dsll $tmp4,8
|
||||
and $in0,$tmp0
|
||||
and $in1,$tmp0
|
||||
or $tmp1,$tmp2
|
||||
or $tmp3,$tmp4
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
dsrl $tmp1,$in0,32
|
||||
dsrl $tmp3,$in1,32
|
||||
dsll $in0,32
|
||||
dsll $in1,32
|
||||
or $in0,$tmp1
|
||||
or $in1,$tmp3
|
||||
# endif
|
||||
#endif
|
||||
daddu $h0,$in0 # accumulate input
|
||||
daddu $h1,$in1
|
||||
sltu $tmp0,$h0,$in0
|
||||
sltu $tmp1,$h1,$in1
|
||||
daddu $h1,$tmp0
|
||||
|
||||
dmultu ($r0,$h0) # h0*r0
|
||||
daddu $h2,$padbit
|
||||
sltu $tmp0,$h1,$tmp0
|
||||
mflo ($d0,$r0,$h0)
|
||||
mfhi ($d1,$r0,$h0)
|
||||
|
||||
dmultu ($s1,$h1) # h1*5*r1
|
||||
daddu $tmp0,$tmp1
|
||||
daddu $h2,$tmp0
|
||||
mflo ($tmp0,$s1,$h1)
|
||||
mfhi ($tmp1,$s1,$h1)
|
||||
|
||||
dmultu ($r1,$h0) # h0*r1
|
||||
daddu $d0,$tmp0
|
||||
daddu $d1,$tmp1
|
||||
mflo ($tmp2,$r1,$h0)
|
||||
mfhi ($d2,$r1,$h0)
|
||||
sltu $tmp0,$d0,$tmp0
|
||||
daddu $d1,$tmp0
|
||||
|
||||
dmultu ($r0,$h1) # h1*r0
|
||||
daddu $d1,$tmp2
|
||||
sltu $tmp2,$d1,$tmp2
|
||||
mflo ($tmp0,$r0,$h1)
|
||||
mfhi ($tmp1,$r0,$h1)
|
||||
daddu $d2,$tmp2
|
||||
|
||||
dmultu ($s1,$h2) # h2*5*r1
|
||||
daddu $d1,$tmp0
|
||||
daddu $d2,$tmp1
|
||||
mflo ($tmp2,$s1,$h2)
|
||||
|
||||
dmultu ($r0,$h2) # h2*r0
|
||||
sltu $tmp0,$d1,$tmp0
|
||||
daddu $d2,$tmp0
|
||||
mflo ($tmp3,$r0,$h2)
|
||||
|
||||
daddu $d1,$tmp2
|
||||
daddu $d2,$tmp3
|
||||
sltu $tmp2,$d1,$tmp2
|
||||
daddu $d2,$tmp2
|
||||
|
||||
li $tmp0,-4 # final reduction
|
||||
and $tmp0,$d2
|
||||
dsrl $tmp1,$d2,2
|
||||
andi $h2,$d2,3
|
||||
daddu $tmp0,$tmp1
|
||||
daddu $h0,$d0,$tmp0
|
||||
sltu $tmp0,$h0,$tmp0
|
||||
daddu $h1,$d1,$tmp0
|
||||
sltu $tmp0,$h1,$tmp0
|
||||
daddu $h2,$h2,$tmp0
|
||||
|
||||
bnez $len,.Loop
|
||||
|
||||
sd $h0,0($ctx) # store hash value
|
||||
sd $h1,8($ctx)
|
||||
sd $h2,16($ctx)
|
||||
|
||||
.set noreorder
|
||||
ld $s5,40($sp) # epilogue
|
||||
ld $s4,32($sp)
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
|
||||
ld $s3,24($sp)
|
||||
ld $s2,16($sp)
|
||||
ld $s1,8($sp)
|
||||
ld $s0,0($sp)
|
||||
___
|
||||
$code.=<<___;
|
||||
jr $ra
|
||||
daddu $sp,6*8
|
||||
.end poly1305_blocks_internal
|
||||
___
|
||||
}
|
||||
{
|
||||
my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
.globl poly1305_emit
|
||||
.ent poly1305_emit
|
||||
poly1305_emit:
|
||||
.frame $sp,0,$ra
|
||||
.set reorder
|
||||
|
||||
ld $tmp0,0($ctx)
|
||||
ld $tmp1,8($ctx)
|
||||
ld $tmp2,16($ctx)
|
||||
|
||||
daddiu $in0,$tmp0,5 # compare to modulus
|
||||
sltiu $tmp3,$in0,5
|
||||
daddu $in1,$tmp1,$tmp3
|
||||
sltu $tmp3,$in1,$tmp3
|
||||
daddu $tmp2,$tmp2,$tmp3
|
||||
|
||||
dsrl $tmp2,2 # see if it carried/borrowed
|
||||
dsubu $tmp2,$zero,$tmp2
|
||||
nor $tmp3,$zero,$tmp2
|
||||
|
||||
and $in0,$tmp2
|
||||
and $tmp0,$tmp3
|
||||
and $in1,$tmp2
|
||||
and $tmp1,$tmp3
|
||||
or $in0,$tmp0
|
||||
or $in1,$tmp1
|
||||
|
||||
lwu $tmp0,0($nonce) # load nonce
|
||||
lwu $tmp1,4($nonce)
|
||||
lwu $tmp2,8($nonce)
|
||||
lwu $tmp3,12($nonce)
|
||||
dsll $tmp1,32
|
||||
dsll $tmp3,32
|
||||
or $tmp0,$tmp1
|
||||
or $tmp2,$tmp3
|
||||
|
||||
daddu $in0,$tmp0 # accumulate nonce
|
||||
daddu $in1,$tmp2
|
||||
sltu $tmp0,$in0,$tmp0
|
||||
daddu $in1,$tmp0
|
||||
|
||||
dsrl $tmp0,$in0,8 # write mac value
|
||||
dsrl $tmp1,$in0,16
|
||||
dsrl $tmp2,$in0,24
|
||||
sb $in0,0($mac)
|
||||
dsrl $tmp3,$in0,32
|
||||
sb $tmp0,1($mac)
|
||||
dsrl $tmp0,$in0,40
|
||||
sb $tmp1,2($mac)
|
||||
dsrl $tmp1,$in0,48
|
||||
sb $tmp2,3($mac)
|
||||
dsrl $tmp2,$in0,56
|
||||
sb $tmp3,4($mac)
|
||||
dsrl $tmp3,$in1,8
|
||||
sb $tmp0,5($mac)
|
||||
dsrl $tmp0,$in1,16
|
||||
sb $tmp1,6($mac)
|
||||
dsrl $tmp1,$in1,24
|
||||
sb $tmp2,7($mac)
|
||||
|
||||
sb $in1,8($mac)
|
||||
dsrl $tmp2,$in1,32
|
||||
sb $tmp3,9($mac)
|
||||
dsrl $tmp3,$in1,40
|
||||
sb $tmp0,10($mac)
|
||||
dsrl $tmp0,$in1,48
|
||||
sb $tmp1,11($mac)
|
||||
dsrl $tmp1,$in1,56
|
||||
sb $tmp2,12($mac)
|
||||
sb $tmp3,13($mac)
|
||||
sb $tmp0,14($mac)
|
||||
sb $tmp1,15($mac)
|
||||
|
||||
jr $ra
|
||||
.end poly1305_emit
|
||||
.rdata
|
||||
.align 2
|
||||
___
|
||||
}
|
||||
|
||||
open SELF,$0;
|
||||
while(<SELF>) {
|
||||
next if (/^#!/);
|
||||
last if (!s/^#/\/\// and !/^$/);
|
||||
print;
|
||||
}
|
||||
close SELF;
|
||||
|
||||
$output=pop and open STDOUT,">$output";
|
||||
print $code;
|
||||
close STDOUT;
|
||||
|
156
net/wireguard/crypto/zinc/poly1305/poly1305-x86_64-glue.c
Normal file
156
net/wireguard/crypto/zinc/poly1305/poly1305-x86_64-glue.c
Normal file
@ -0,0 +1,156 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel-family.h>
|
||||
|
||||
asmlinkage void poly1305_init_x86_64(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE]);
|
||||
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
|
||||
const size_t len, const u32 padbit);
|
||||
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4]);
|
||||
asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4]);
|
||||
asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
|
||||
const u32 padbit);
|
||||
asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
|
||||
const size_t len, const u32 padbit);
|
||||
|
||||
static bool poly1305_use_avx __ro_after_init;
|
||||
static bool poly1305_use_avx2 __ro_after_init;
|
||||
static bool poly1305_use_avx512 __ro_after_init;
|
||||
static bool *const poly1305_nobs[] __initconst = {
|
||||
&poly1305_use_avx, &poly1305_use_avx2, &poly1305_use_avx512 };
|
||||
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
poly1305_use_avx =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
poly1305_use_avx2 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
|
||||
#ifndef COMPAT_CANNOT_USE_AVX512
|
||||
poly1305_use_avx512 =
|
||||
boot_cpu_has(X86_FEATURE_AVX) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX2) &&
|
||||
boot_cpu_has(X86_FEATURE_AVX512F) &&
|
||||
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
|
||||
XFEATURE_MASK_AVX512, NULL) &&
|
||||
/* Skylake downclocks unacceptably much when using zmm. */
|
||||
boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
poly1305_init_x86_64(ctx, key);
|
||||
return true;
|
||||
}
|
||||
|
||||
struct poly1305_arch_internal {
|
||||
union {
|
||||
struct {
|
||||
u32 h[5];
|
||||
u32 is_base2_26;
|
||||
};
|
||||
u64 hs[3];
|
||||
};
|
||||
u64 r[2];
|
||||
u64 pad;
|
||||
struct { u32 r2, r1, r4, r3; } rn[9];
|
||||
};
|
||||
|
||||
/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
|
||||
* the unfortunate situation of using AVX and then having to go back to scalar
|
||||
* -- because the user is silly and has called the update function from two
|
||||
* separate contexts -- then we need to convert back to the original base before
|
||||
* proceeding. It is possible to reason that the initial reduction below is
|
||||
* sufficient given the implementation invariants. However, for an avoidance of
|
||||
* doubt and because this is not performance critical, we do the full reduction
|
||||
* anyway.
|
||||
*/
|
||||
static void convert_to_base2_64(void *ctx)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
u32 cy;
|
||||
|
||||
if (!state->is_base2_26)
|
||||
return;
|
||||
|
||||
cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
|
||||
cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
|
||||
cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
|
||||
cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
|
||||
state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
|
||||
state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
|
||||
state->hs[2] = state->h[4] >> 24;
|
||||
#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
|
||||
cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
|
||||
state->hs[2] &= 3;
|
||||
state->hs[0] += cy;
|
||||
state->hs[1] += (cy = ULT(state->hs[0], cy));
|
||||
state->hs[2] += ULT(state->hs[1], cy);
|
||||
#undef ULT
|
||||
state->is_base2_26 = 0;
|
||||
}
|
||||
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
|
||||
/* SIMD disables preemption, so relax after processing each page. */
|
||||
BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
|
||||
PAGE_SIZE % POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
|
||||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
|
||||
!simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_blocks_x86_64(ctx, inp, len, padbit);
|
||||
return true;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
const size_t bytes = min_t(size_t, len, PAGE_SIZE);
|
||||
|
||||
if (IS_ENABLED(CONFIG_AS_AVX512) && poly1305_use_avx512)
|
||||
poly1305_blocks_avx512(ctx, inp, bytes, padbit);
|
||||
else if (IS_ENABLED(CONFIG_AS_AVX2) && poly1305_use_avx2)
|
||||
poly1305_blocks_avx2(ctx, inp, bytes, padbit);
|
||||
else
|
||||
poly1305_blocks_avx(ctx, inp, bytes, padbit);
|
||||
len -= bytes;
|
||||
if (!len)
|
||||
break;
|
||||
inp += bytes;
|
||||
simd_relax(simd_context);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct poly1305_arch_internal *state = ctx;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
|
||||
!state->is_base2_26 || !simd_use(simd_context)) {
|
||||
convert_to_base2_64(ctx);
|
||||
poly1305_emit_x86_64(ctx, mac, nonce);
|
||||
} else
|
||||
poly1305_emit_avx(ctx, mac, nonce);
|
||||
return true;
|
||||
}
|
4266
net/wireguard/crypto/zinc/poly1305/poly1305-x86_64.pl
Normal file
4266
net/wireguard/crypto/zinc/poly1305/poly1305-x86_64.pl
Normal file
File diff suppressed because it is too large
Load Diff
165
net/wireguard/crypto/zinc/poly1305/poly1305.c
Normal file
165
net/wireguard/crypto/zinc/poly1305/poly1305.c
Normal file
@ -0,0 +1,165 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR MIT
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* Implementation of the Poly1305 message authenticator.
|
||||
*
|
||||
* Information: https://cr.yp.to/mac.html
|
||||
*/
|
||||
|
||||
#include <zinc/poly1305.h>
|
||||
#include "../selftest/run.h"
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#if defined(CONFIG_ZINC_ARCH_X86_64)
|
||||
#include "poly1305-x86_64-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
|
||||
#include "poly1305-arm-glue.c"
|
||||
#elif defined(CONFIG_ZINC_ARCH_MIPS) || defined(CONFIG_ZINC_ARCH_MIPS64)
|
||||
#include "poly1305-mips-glue.c"
|
||||
#else
|
||||
static inline bool poly1305_init_arch(void *ctx,
|
||||
const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool poly1305_blocks_arch(void *ctx, const u8 *input,
|
||||
size_t len, const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static bool *const poly1305_nobs[] __initconst = { };
|
||||
static void __init poly1305_fpu_init(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
|
||||
#include "poly1305-donna64.c"
|
||||
#else
|
||||
#include "poly1305-donna32.c"
|
||||
#endif
|
||||
|
||||
void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE])
|
||||
{
|
||||
ctx->nonce[0] = get_unaligned_le32(&key[16]);
|
||||
ctx->nonce[1] = get_unaligned_le32(&key[20]);
|
||||
ctx->nonce[2] = get_unaligned_le32(&key[24]);
|
||||
ctx->nonce[3] = get_unaligned_le32(&key[28]);
|
||||
|
||||
if (!poly1305_init_arch(ctx->opaque, key))
|
||||
poly1305_init_generic(ctx->opaque, key);
|
||||
|
||||
ctx->num = 0;
|
||||
}
|
||||
|
||||
static inline void poly1305_blocks(void *ctx, const u8 *input, const size_t len,
|
||||
const u32 padbit,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!poly1305_blocks_arch(ctx, input, len, padbit, simd_context))
|
||||
poly1305_blocks_generic(ctx, input, len, padbit);
|
||||
}
|
||||
|
||||
static inline void poly1305_emit(void *ctx, u8 mac[POLY1305_KEY_SIZE],
|
||||
const u32 nonce[4],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
if (!poly1305_emit_arch(ctx, mac, nonce, simd_context))
|
||||
poly1305_emit_generic(ctx, mac, nonce);
|
||||
}
|
||||
|
||||
void poly1305_update(struct poly1305_ctx *ctx, const u8 *input, size_t len,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
const size_t num = ctx->num;
|
||||
size_t rem;
|
||||
|
||||
if (num) {
|
||||
rem = POLY1305_BLOCK_SIZE - num;
|
||||
if (len < rem) {
|
||||
memcpy(ctx->data + num, input, len);
|
||||
ctx->num = num + len;
|
||||
return;
|
||||
}
|
||||
memcpy(ctx->data + num, input, rem);
|
||||
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 1,
|
||||
simd_context);
|
||||
input += rem;
|
||||
len -= rem;
|
||||
}
|
||||
|
||||
rem = len % POLY1305_BLOCK_SIZE;
|
||||
len -= rem;
|
||||
|
||||
if (len >= POLY1305_BLOCK_SIZE) {
|
||||
poly1305_blocks(ctx->opaque, input, len, 1, simd_context);
|
||||
input += len;
|
||||
}
|
||||
|
||||
if (rem)
|
||||
memcpy(ctx->data, input, rem);
|
||||
|
||||
ctx->num = rem;
|
||||
}
|
||||
|
||||
void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
size_t num = ctx->num;
|
||||
|
||||
if (num) {
|
||||
ctx->data[num++] = 1;
|
||||
while (num < POLY1305_BLOCK_SIZE)
|
||||
ctx->data[num++] = 0;
|
||||
poly1305_blocks(ctx->opaque, ctx->data, POLY1305_BLOCK_SIZE, 0,
|
||||
simd_context);
|
||||
}
|
||||
|
||||
poly1305_emit(ctx->opaque, mac, ctx->nonce, simd_context);
|
||||
|
||||
memzero_explicit(ctx, sizeof(*ctx));
|
||||
}
|
||||
|
||||
#include "../selftest/poly1305.c"
|
||||
|
||||
static bool nosimd __initdata = false;
|
||||
|
||||
#ifndef COMPAT_ZINC_IS_A_MODULE
|
||||
int __init poly1305_mod_init(void)
|
||||
#else
|
||||
static int __init mod_init(void)
|
||||
#endif
|
||||
{
|
||||
if (!nosimd)
|
||||
poly1305_fpu_init();
|
||||
if (!selftest_run("poly1305", poly1305_selftest, poly1305_nobs,
|
||||
ARRAY_SIZE(poly1305_nobs)))
|
||||
return -ENOTRECOVERABLE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef COMPAT_ZINC_IS_A_MODULE
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
module_param(nosimd, bool, 0);
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("Poly1305 one-time authenticator");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
#endif
|
2090
net/wireguard/crypto/zinc/selftest/blake2s.c
Normal file
2090
net/wireguard/crypto/zinc/selftest/blake2s.c
Normal file
File diff suppressed because it is too large
Load Diff
2698
net/wireguard/crypto/zinc/selftest/chacha20.c
Normal file
2698
net/wireguard/crypto/zinc/selftest/chacha20.c
Normal file
File diff suppressed because it is too large
Load Diff
9076
net/wireguard/crypto/zinc/selftest/chacha20poly1305.c
Normal file
9076
net/wireguard/crypto/zinc/selftest/chacha20poly1305.c
Normal file
File diff suppressed because it is too large
Load Diff
1315
net/wireguard/crypto/zinc/selftest/curve25519.c
Normal file
1315
net/wireguard/crypto/zinc/selftest/curve25519.c
Normal file
File diff suppressed because it is too large
Load Diff
1107
net/wireguard/crypto/zinc/selftest/poly1305.c
Normal file
1107
net/wireguard/crypto/zinc/selftest/poly1305.c
Normal file
File diff suppressed because it is too large
Load Diff
48
net/wireguard/crypto/zinc/selftest/run.h
Normal file
48
net/wireguard/crypto/zinc/selftest/run.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 OR MIT */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZINC_SELFTEST_RUN_H
|
||||
#define _ZINC_SELFTEST_RUN_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/bug.h>
|
||||
|
||||
static inline bool selftest_run(const char *name, bool (*selftest)(void),
|
||||
bool *const nobs[], unsigned int nobs_len)
|
||||
{
|
||||
unsigned long set = 0, subset = 0, largest_subset = 0;
|
||||
unsigned int i;
|
||||
|
||||
BUILD_BUG_ON(!__builtin_constant_p(nobs_len) ||
|
||||
nobs_len >= BITS_PER_LONG);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_ZINC_SELFTEST))
|
||||
return true;
|
||||
|
||||
for (i = 0; i < nobs_len; ++i)
|
||||
set |= ((unsigned long)*nobs[i]) << i;
|
||||
|
||||
do {
|
||||
for (i = 0; i < nobs_len; ++i)
|
||||
*nobs[i] = BIT(i) & subset;
|
||||
if (selftest())
|
||||
largest_subset = max(subset, largest_subset);
|
||||
else
|
||||
pr_err("%s self-test combination 0x%lx: FAIL\n", name,
|
||||
subset);
|
||||
subset = (subset - set) & set;
|
||||
} while (subset);
|
||||
|
||||
for (i = 0; i < nobs_len; ++i)
|
||||
*nobs[i] = BIT(i) & largest_subset;
|
||||
|
||||
if (largest_subset == set)
|
||||
pr_info("%s self-tests: pass\n", name);
|
||||
|
||||
return !WARN_ON(largest_subset != set);
|
||||
}
|
||||
|
||||
#endif
|
470
net/wireguard/device.c
Normal file
470
net/wireguard/device.c
Normal file
@ -0,0 +1,470 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "queueing.h"
|
||||
#include "socket.h"
|
||||
#include "timers.h"
|
||||
#include "device.h"
|
||||
#include "ratelimiter.h"
|
||||
#include "peer.h"
|
||||
#include "messages.h"
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/inet.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/inetdevice.h>
|
||||
#include <linux/if_arp.h>
|
||||
#include <linux/icmp.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <net/icmp.h>
|
||||
#include <net/rtnetlink.h>
|
||||
#include <net/ip_tunnels.h>
|
||||
#include <net/addrconf.h>
|
||||
|
||||
static LIST_HEAD(device_list);
|
||||
|
||||
static int wg_open(struct net_device *dev)
|
||||
{
|
||||
struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
|
||||
#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
|
||||
struct inet6_dev *dev_v6 = __in6_dev_get(dev);
|
||||
#endif
|
||||
struct wg_device *wg = netdev_priv(dev);
|
||||
struct wg_peer *peer;
|
||||
int ret;
|
||||
|
||||
if (dev_v4) {
|
||||
/* At some point we might put this check near the ip_rt_send_
|
||||
* redirect call of ip_forward in net/ipv4/ip_forward.c, similar
|
||||
* to the current secpath check.
|
||||
*/
|
||||
IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
|
||||
IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
|
||||
}
|
||||
#ifndef COMPAT_CANNOT_USE_IN6_DEV_GET
|
||||
if (dev_v6)
|
||||
#ifndef COMPAT_CANNOT_USE_DEV_CNF
|
||||
dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
|
||||
#else
|
||||
dev_v6->addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
mutex_lock(&wg->device_update_lock);
|
||||
ret = wg_socket_init(wg, wg->incoming_port);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
list_for_each_entry(peer, &wg->peer_list, peer_list) {
|
||||
wg_packet_send_staged_packets(peer);
|
||||
if (peer->persistent_keepalive_interval)
|
||||
wg_packet_send_keepalive(peer);
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&wg->device_update_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
|
||||
void *data)
|
||||
{
|
||||
struct wg_device *wg;
|
||||
struct wg_peer *peer;
|
||||
|
||||
/* If the machine is constantly suspending and resuming, as part of
|
||||
* its normal operation rather than as a somewhat rare event, then we
|
||||
* don't actually want to clear keys.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID))
|
||||
return 0;
|
||||
|
||||
if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
|
||||
return 0;
|
||||
|
||||
rtnl_lock();
|
||||
list_for_each_entry(wg, &device_list, device_list) {
|
||||
mutex_lock(&wg->device_update_lock);
|
||||
list_for_each_entry(peer, &wg->peer_list, peer_list) {
|
||||
del_timer(&peer->timer_zero_key_material);
|
||||
wg_noise_handshake_clear(&peer->handshake);
|
||||
wg_noise_keypairs_clear(&peer->keypairs);
|
||||
}
|
||||
mutex_unlock(&wg->device_update_lock);
|
||||
}
|
||||
rtnl_unlock();
|
||||
rcu_barrier();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
|
||||
#endif
|
||||
|
||||
static int wg_stop(struct net_device *dev)
|
||||
{
|
||||
struct wg_device *wg = netdev_priv(dev);
|
||||
struct wg_peer *peer;
|
||||
|
||||
mutex_lock(&wg->device_update_lock);
|
||||
list_for_each_entry(peer, &wg->peer_list, peer_list) {
|
||||
wg_packet_purge_staged_packets(peer);
|
||||
wg_timers_stop(peer);
|
||||
wg_noise_handshake_clear(&peer->handshake);
|
||||
wg_noise_keypairs_clear(&peer->keypairs);
|
||||
wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
|
||||
}
|
||||
mutex_unlock(&wg->device_update_lock);
|
||||
skb_queue_purge(&wg->incoming_handshakes);
|
||||
wg_socket_reinit(wg, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct wg_device *wg = netdev_priv(dev);
|
||||
struct sk_buff_head packets;
|
||||
struct wg_peer *peer;
|
||||
struct sk_buff *next;
|
||||
sa_family_t family;
|
||||
u32 mtu;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!wg_check_packet_protocol(skb))) {
|
||||
ret = -EPROTONOSUPPORT;
|
||||
net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
|
||||
goto err;
|
||||
}
|
||||
|
||||
peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb);
|
||||
if (unlikely(!peer)) {
|
||||
ret = -ENOKEY;
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n",
|
||||
dev->name, &ip_hdr(skb)->daddr);
|
||||
else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
|
||||
dev->name, &ipv6_hdr(skb)->daddr);
|
||||
goto err;
|
||||
}
|
||||
|
||||
family = READ_ONCE(peer->endpoint.addr.sa_family);
|
||||
if (unlikely(family != AF_INET && family != AF_INET6)) {
|
||||
ret = -EDESTADDRREQ;
|
||||
net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n",
|
||||
dev->name, peer->internal_id);
|
||||
goto err_peer;
|
||||
}
|
||||
|
||||
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
|
||||
|
||||
__skb_queue_head_init(&packets);
|
||||
if (!skb_is_gso(skb)) {
|
||||
skb_mark_not_on_list(skb);
|
||||
} else {
|
||||
struct sk_buff *segs = skb_gso_segment(skb, 0);
|
||||
|
||||
if (unlikely(IS_ERR(segs))) {
|
||||
ret = PTR_ERR(segs);
|
||||
goto err_peer;
|
||||
}
|
||||
dev_kfree_skb(skb);
|
||||
skb = segs;
|
||||
}
|
||||
|
||||
skb_list_walk_safe(skb, skb, next) {
|
||||
skb_mark_not_on_list(skb);
|
||||
|
||||
skb = skb_share_check(skb, GFP_ATOMIC);
|
||||
if (unlikely(!skb))
|
||||
continue;
|
||||
|
||||
/* We only need to keep the original dst around for icmp,
|
||||
* so at this point we're in a position to drop it.
|
||||
*/
|
||||
skb_dst_drop(skb);
|
||||
|
||||
PACKET_CB(skb)->mtu = mtu;
|
||||
|
||||
__skb_queue_tail(&packets, skb);
|
||||
}
|
||||
|
||||
spin_lock_bh(&peer->staged_packet_queue.lock);
|
||||
/* If the queue is getting too big, we start removing the oldest packets
|
||||
* until it's small again. We do this before adding the new packet, so
|
||||
* we don't remove GSO segments that are in excess.
|
||||
*/
|
||||
while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
|
||||
dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
|
||||
++dev->stats.tx_dropped;
|
||||
}
|
||||
skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
|
||||
spin_unlock_bh(&peer->staged_packet_queue.lock);
|
||||
|
||||
wg_packet_send_staged_packets(peer);
|
||||
|
||||
wg_peer_put(peer);
|
||||
return NETDEV_TX_OK;
|
||||
|
||||
err_peer:
|
||||
wg_peer_put(peer);
|
||||
err:
|
||||
++dev->stats.tx_errors;
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
|
||||
else if (skb->protocol == htons(ETH_P_IPV6))
|
||||
icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
|
||||
kfree_skb(skb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct net_device_ops netdev_ops = {
|
||||
.ndo_open = wg_open,
|
||||
.ndo_stop = wg_stop,
|
||||
.ndo_start_xmit = wg_xmit,
|
||||
.ndo_get_stats64 = ip_tunnel_get_stats64
|
||||
};
|
||||
|
||||
static void wg_destruct(struct net_device *dev)
|
||||
{
|
||||
struct wg_device *wg = netdev_priv(dev);
|
||||
|
||||
rtnl_lock();
|
||||
list_del(&wg->device_list);
|
||||
rtnl_unlock();
|
||||
mutex_lock(&wg->device_update_lock);
|
||||
rcu_assign_pointer(wg->creating_net, NULL);
|
||||
wg->incoming_port = 0;
|
||||
wg_socket_reinit(wg, NULL, NULL);
|
||||
/* The final references are cleared in the below calls to destroy_workqueue. */
|
||||
wg_peer_remove_all(wg);
|
||||
destroy_workqueue(wg->handshake_receive_wq);
|
||||
destroy_workqueue(wg->handshake_send_wq);
|
||||
destroy_workqueue(wg->packet_crypt_wq);
|
||||
wg_packet_queue_free(&wg->decrypt_queue, true);
|
||||
wg_packet_queue_free(&wg->encrypt_queue, true);
|
||||
rcu_barrier(); /* Wait for all the peers to be actually freed. */
|
||||
wg_ratelimiter_uninit();
|
||||
memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
|
||||
skb_queue_purge(&wg->incoming_handshakes);
|
||||
free_percpu(dev->tstats);
|
||||
free_percpu(wg->incoming_handshakes_worker);
|
||||
kvfree(wg->index_hashtable);
|
||||
kvfree(wg->peer_hashtable);
|
||||
mutex_unlock(&wg->device_update_lock);
|
||||
|
||||
pr_debug("%s: Interface destroyed\n", dev->name);
|
||||
free_netdev(dev);
|
||||
}
|
||||
|
||||
static const struct device_type device_type = { .name = KBUILD_MODNAME };
|
||||
|
||||
static void wg_setup(struct net_device *dev)
|
||||
{
|
||||
struct wg_device *wg = netdev_priv(dev);
|
||||
enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
|
||||
NETIF_F_SG | NETIF_F_GSO |
|
||||
NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
|
||||
const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
|
||||
max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
|
||||
|
||||
dev->netdev_ops = &netdev_ops;
|
||||
dev->header_ops = &ip_tunnel_header_ops;
|
||||
dev->hard_header_len = 0;
|
||||
dev->addr_len = 0;
|
||||
dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
|
||||
dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
|
||||
dev->type = ARPHRD_NONE;
|
||||
dev->flags = IFF_POINTOPOINT | IFF_NOARP;
|
||||
#ifndef COMPAT_CANNOT_USE_IFF_NO_QUEUE
|
||||
dev->priv_flags |= IFF_NO_QUEUE;
|
||||
#else
|
||||
dev->tx_queue_len = 0;
|
||||
#endif
|
||||
dev->features |= NETIF_F_LLTX;
|
||||
dev->features |= WG_NETDEV_FEATURES;
|
||||
dev->hw_features |= WG_NETDEV_FEATURES;
|
||||
dev->hw_enc_features |= WG_NETDEV_FEATURES;
|
||||
dev->mtu = ETH_DATA_LEN - overhead;
|
||||
#ifndef COMPAT_CANNOT_USE_MAX_MTU
|
||||
dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
|
||||
#endif
|
||||
|
||||
SET_NETDEV_DEVTYPE(dev, &device_type);
|
||||
|
||||
/* We need to keep the dst around in case of icmp replies. */
|
||||
netif_keep_dst(dev);
|
||||
|
||||
memset(wg, 0, sizeof(*wg));
|
||||
wg->dev = dev;
|
||||
}
|
||||
|
||||
static int wg_newlink(struct net *src_net, struct net_device *dev,
|
||||
struct nlattr *tb[], struct nlattr *data[],
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct wg_device *wg = netdev_priv(dev);
|
||||
int ret = -ENOMEM;
|
||||
|
||||
rcu_assign_pointer(wg->creating_net, src_net);
|
||||
init_rwsem(&wg->static_identity.lock);
|
||||
mutex_init(&wg->socket_update_lock);
|
||||
mutex_init(&wg->device_update_lock);
|
||||
skb_queue_head_init(&wg->incoming_handshakes);
|
||||
wg_allowedips_init(&wg->peer_allowedips);
|
||||
wg_cookie_checker_init(&wg->cookie_checker, wg);
|
||||
INIT_LIST_HEAD(&wg->peer_list);
|
||||
wg->device_update_gen = 1;
|
||||
|
||||
wg->peer_hashtable = wg_pubkey_hashtable_alloc();
|
||||
if (!wg->peer_hashtable)
|
||||
return ret;
|
||||
|
||||
wg->index_hashtable = wg_index_hashtable_alloc();
|
||||
if (!wg->index_hashtable)
|
||||
goto err_free_peer_hashtable;
|
||||
|
||||
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
|
||||
if (!dev->tstats)
|
||||
goto err_free_index_hashtable;
|
||||
|
||||
wg->incoming_handshakes_worker =
|
||||
wg_packet_percpu_multicore_worker_alloc(
|
||||
wg_packet_handshake_receive_worker, wg);
|
||||
if (!wg->incoming_handshakes_worker)
|
||||
goto err_free_tstats;
|
||||
|
||||
wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
|
||||
WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
|
||||
if (!wg->handshake_receive_wq)
|
||||
goto err_free_incoming_handshakes;
|
||||
|
||||
wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
|
||||
WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
|
||||
if (!wg->handshake_send_wq)
|
||||
goto err_destroy_handshake_receive;
|
||||
|
||||
wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s",
|
||||
WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
|
||||
if (!wg->packet_crypt_wq)
|
||||
goto err_destroy_handshake_send;
|
||||
|
||||
ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
|
||||
true, MAX_QUEUED_PACKETS);
|
||||
if (ret < 0)
|
||||
goto err_destroy_packet_crypt;
|
||||
|
||||
ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
|
||||
true, MAX_QUEUED_PACKETS);
|
||||
if (ret < 0)
|
||||
goto err_free_encrypt_queue;
|
||||
|
||||
ret = wg_ratelimiter_init();
|
||||
if (ret < 0)
|
||||
goto err_free_decrypt_queue;
|
||||
|
||||
ret = register_netdevice(dev);
|
||||
if (ret < 0)
|
||||
goto err_uninit_ratelimiter;
|
||||
|
||||
list_add(&wg->device_list, &device_list);
|
||||
|
||||
/* We wait until the end to assign priv_destructor, so that
|
||||
* register_netdevice doesn't call it for us if it fails.
|
||||
*/
|
||||
dev->priv_destructor = wg_destruct;
|
||||
|
||||
pr_debug("%s: Interface created\n", dev->name);
|
||||
return ret;
|
||||
|
||||
err_uninit_ratelimiter:
|
||||
wg_ratelimiter_uninit();
|
||||
err_free_decrypt_queue:
|
||||
wg_packet_queue_free(&wg->decrypt_queue, true);
|
||||
err_free_encrypt_queue:
|
||||
wg_packet_queue_free(&wg->encrypt_queue, true);
|
||||
err_destroy_packet_crypt:
|
||||
destroy_workqueue(wg->packet_crypt_wq);
|
||||
err_destroy_handshake_send:
|
||||
destroy_workqueue(wg->handshake_send_wq);
|
||||
err_destroy_handshake_receive:
|
||||
destroy_workqueue(wg->handshake_receive_wq);
|
||||
err_free_incoming_handshakes:
|
||||
free_percpu(wg->incoming_handshakes_worker);
|
||||
err_free_tstats:
|
||||
free_percpu(dev->tstats);
|
||||
err_free_index_hashtable:
|
||||
kvfree(wg->index_hashtable);
|
||||
err_free_peer_hashtable:
|
||||
kvfree(wg->peer_hashtable);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct rtnl_link_ops link_ops __read_mostly = {
|
||||
.kind = KBUILD_MODNAME,
|
||||
.priv_size = sizeof(struct wg_device),
|
||||
.setup = wg_setup,
|
||||
.newlink = wg_newlink,
|
||||
};
|
||||
|
||||
static void wg_netns_pre_exit(struct net *net)
|
||||
{
|
||||
struct wg_device *wg;
|
||||
|
||||
rtnl_lock();
|
||||
list_for_each_entry(wg, &device_list, device_list) {
|
||||
if (rcu_access_pointer(wg->creating_net) == net) {
|
||||
pr_debug("%s: Creating namespace exiting\n", wg->dev->name);
|
||||
netif_carrier_off(wg->dev);
|
||||
mutex_lock(&wg->device_update_lock);
|
||||
rcu_assign_pointer(wg->creating_net, NULL);
|
||||
wg_socket_reinit(wg, NULL, NULL);
|
||||
mutex_unlock(&wg->device_update_lock);
|
||||
}
|
||||
}
|
||||
rtnl_unlock();
|
||||
}
|
||||
|
||||
static struct pernet_operations pernet_ops = {
|
||||
.pre_exit = wg_netns_pre_exit
|
||||
};
|
||||
|
||||
int __init wg_device_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
ret = register_pm_notifier(&pm_notifier);
|
||||
if (ret)
|
||||
return ret;
|
||||
#endif
|
||||
|
||||
ret = register_pernet_device(&pernet_ops);
|
||||
if (ret)
|
||||
goto error_pm;
|
||||
|
||||
ret = rtnl_link_register(&link_ops);
|
||||
if (ret)
|
||||
goto error_pernet;
|
||||
|
||||
return 0;
|
||||
|
||||
error_pernet:
|
||||
unregister_pernet_device(&pernet_ops);
|
||||
error_pm:
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
unregister_pm_notifier(&pm_notifier);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
void wg_device_uninit(void)
|
||||
{
|
||||
rtnl_link_unregister(&link_ops);
|
||||
unregister_pernet_device(&pernet_ops);
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
unregister_pm_notifier(&pm_notifier);
|
||||
#endif
|
||||
rcu_barrier();
|
||||
}
|
64
net/wireguard/device.h
Normal file
64
net/wireguard/device.h
Normal file
@ -0,0 +1,64 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_DEVICE_H
|
||||
#define _WG_DEVICE_H
|
||||
|
||||
#include "noise.h"
|
||||
#include "allowedips.h"
|
||||
#include "peerlookup.h"
|
||||
#include "cookie.h"
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/net.h>
|
||||
#include <linux/ptr_ring.h>
|
||||
|
||||
struct wg_device;
|
||||
|
||||
struct multicore_worker {
|
||||
void *ptr;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
struct crypt_queue {
|
||||
struct ptr_ring ring;
|
||||
union {
|
||||
struct {
|
||||
struct multicore_worker __percpu *worker;
|
||||
int last_cpu;
|
||||
};
|
||||
struct work_struct work;
|
||||
};
|
||||
};
|
||||
|
||||
struct wg_device {
|
||||
struct net_device *dev;
|
||||
struct crypt_queue encrypt_queue, decrypt_queue;
|
||||
struct sock __rcu *sock4, *sock6;
|
||||
struct net __rcu *creating_net;
|
||||
struct noise_static_identity static_identity;
|
||||
struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
|
||||
struct workqueue_struct *packet_crypt_wq;
|
||||
struct sk_buff_head incoming_handshakes;
|
||||
int incoming_handshake_cpu;
|
||||
struct multicore_worker __percpu *incoming_handshakes_worker;
|
||||
struct cookie_checker cookie_checker;
|
||||
struct pubkey_hashtable *peer_hashtable;
|
||||
struct index_hashtable *index_hashtable;
|
||||
struct allowedips peer_allowedips;
|
||||
struct mutex device_update_lock, socket_update_lock;
|
||||
struct list_head device_list, peer_list;
|
||||
unsigned int num_peers, device_update_gen;
|
||||
u32 fwmark;
|
||||
u16 incoming_port;
|
||||
};
|
||||
|
||||
int wg_device_init(void);
|
||||
void wg_device_uninit(void);
|
||||
|
||||
#endif /* _WG_DEVICE_H */
|
9
net/wireguard/dkms.conf
Normal file
9
net/wireguard/dkms.conf
Normal file
@ -0,0 +1,9 @@
|
||||
PACKAGE_NAME="wireguard"
|
||||
PACKAGE_VERSION="1.0.20200908"
|
||||
AUTOINSTALL=yes
|
||||
|
||||
BUILT_MODULE_NAME="wireguard"
|
||||
DEST_MODULE_LOCATION="/kernel/net"
|
||||
|
||||
# requires kernel 3.10 - 5.5, inclusive:
|
||||
BUILD_EXCLUSIVE_KERNEL="^((5\.[0-5]($|[.-]))|(4\.)|(3\.1[0-9]))"
|
69
net/wireguard/main.c
Normal file
69
net/wireguard/main.c
Normal file
@ -0,0 +1,69 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "version.h"
|
||||
#include "device.h"
|
||||
#include "noise.h"
|
||||
#include "queueing.h"
|
||||
#include "ratelimiter.h"
|
||||
#include "netlink.h"
|
||||
#include "uapi/wireguard.h"
|
||||
#include "crypto/zinc.h"
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/genetlink.h>
|
||||
#include <net/rtnetlink.h>
|
||||
|
||||
static int __init mod_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if ((ret = chacha20_mod_init()) || (ret = poly1305_mod_init()) ||
|
||||
(ret = chacha20poly1305_mod_init()) || (ret = blake2s_mod_init()) ||
|
||||
(ret = curve25519_mod_init()))
|
||||
return ret;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
|
||||
!wg_ratelimiter_selftest())
|
||||
return -ENOTRECOVERABLE;
|
||||
#endif
|
||||
wg_noise_init();
|
||||
|
||||
ret = wg_device_init();
|
||||
if (ret < 0)
|
||||
goto err_device;
|
||||
|
||||
ret = wg_genetlink_init();
|
||||
if (ret < 0)
|
||||
goto err_netlink;
|
||||
|
||||
pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n");
|
||||
pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n");
|
||||
|
||||
return 0;
|
||||
|
||||
err_netlink:
|
||||
wg_device_uninit();
|
||||
err_device:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit mod_exit(void)
|
||||
{
|
||||
wg_genetlink_uninit();
|
||||
wg_device_uninit();
|
||||
}
|
||||
|
||||
module_init(mod_init);
|
||||
module_exit(mod_exit);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_DESCRIPTION("WireGuard secure network tunnel");
|
||||
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
|
||||
MODULE_VERSION(WIREGUARD_VERSION);
|
||||
MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
|
||||
MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME);
|
||||
MODULE_INFO(intree, "Y");
|
128
net/wireguard/messages.h
Normal file
128
net/wireguard/messages.h
Normal file
@ -0,0 +1,128 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_MESSAGES_H
|
||||
#define _WG_MESSAGES_H
|
||||
|
||||
#include <zinc/curve25519.h>
|
||||
#include <zinc/chacha20poly1305.h>
|
||||
#include <zinc/blake2s.h>
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/param.h>
|
||||
#include <linux/skbuff.h>
|
||||
|
||||
enum noise_lengths {
|
||||
NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
|
||||
NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
|
||||
NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
|
||||
NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
|
||||
NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
|
||||
};
|
||||
|
||||
#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
|
||||
|
||||
enum cookie_values {
|
||||
COOKIE_SECRET_MAX_AGE = 2 * 60,
|
||||
COOKIE_SECRET_LATENCY = 5,
|
||||
COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
|
||||
COOKIE_LEN = 16
|
||||
};
|
||||
|
||||
enum counter_values {
|
||||
COUNTER_BITS_TOTAL = 8192,
|
||||
COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
|
||||
COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
|
||||
};
|
||||
|
||||
enum limits {
|
||||
REKEY_AFTER_MESSAGES = 1ULL << 60,
|
||||
REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
|
||||
REKEY_TIMEOUT = 5,
|
||||
REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3,
|
||||
REKEY_AFTER_TIME = 120,
|
||||
REJECT_AFTER_TIME = 180,
|
||||
INITIATIONS_PER_SECOND = 50,
|
||||
MAX_PEERS_PER_DEVICE = 1U << 20,
|
||||
KEEPALIVE_TIMEOUT = 10,
|
||||
MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
|
||||
MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
|
||||
MAX_STAGED_PACKETS = 128,
|
||||
MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
|
||||
};
|
||||
|
||||
enum message_type {
|
||||
MESSAGE_INVALID = 0,
|
||||
MESSAGE_HANDSHAKE_INITIATION = 1,
|
||||
MESSAGE_HANDSHAKE_RESPONSE = 2,
|
||||
MESSAGE_HANDSHAKE_COOKIE = 3,
|
||||
MESSAGE_DATA = 4
|
||||
};
|
||||
|
||||
struct message_header {
|
||||
/* The actual layout of this that we want is:
|
||||
* u8 type
|
||||
* u8 reserved_zero[3]
|
||||
*
|
||||
* But it turns out that by encoding this as little endian,
|
||||
* we achieve the same thing, and it makes checking faster.
|
||||
*/
|
||||
__le32 type;
|
||||
};
|
||||
|
||||
struct message_macs {
|
||||
u8 mac1[COOKIE_LEN];
|
||||
u8 mac2[COOKIE_LEN];
|
||||
};
|
||||
|
||||
struct message_handshake_initiation {
|
||||
struct message_header header;
|
||||
__le32 sender_index;
|
||||
u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
|
||||
u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
|
||||
struct message_macs macs;
|
||||
};
|
||||
|
||||
struct message_handshake_response {
|
||||
struct message_header header;
|
||||
__le32 sender_index;
|
||||
__le32 receiver_index;
|
||||
u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 encrypted_nothing[noise_encrypted_len(0)];
|
||||
struct message_macs macs;
|
||||
};
|
||||
|
||||
struct message_handshake_cookie {
|
||||
struct message_header header;
|
||||
__le32 receiver_index;
|
||||
u8 nonce[COOKIE_NONCE_LEN];
|
||||
u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
|
||||
};
|
||||
|
||||
struct message_data {
|
||||
struct message_header header;
|
||||
__le32 key_idx;
|
||||
__le64 counter;
|
||||
u8 encrypted_data[];
|
||||
};
|
||||
|
||||
#define message_data_len(plain_len) \
|
||||
(noise_encrypted_len(plain_len) + sizeof(struct message_data))
|
||||
|
||||
enum message_alignments {
|
||||
MESSAGE_PADDING_MULTIPLE = 16,
|
||||
MESSAGE_MINIMUM_LENGTH = message_data_len(0)
|
||||
};
|
||||
|
||||
#define SKB_HEADER_LEN \
|
||||
(max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \
|
||||
sizeof(struct udphdr) + NET_SKB_PAD)
|
||||
#define DATA_PACKET_HEAD_ROOM \
|
||||
ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
|
||||
|
||||
enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ };
|
||||
|
||||
#endif /* _WG_MESSAGES_H */
|
658
net/wireguard/netlink.c
Normal file
658
net/wireguard/netlink.c
Normal file
@ -0,0 +1,658 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "netlink.h"
|
||||
#include "device.h"
|
||||
#include "peer.h"
|
||||
#include "socket.h"
|
||||
#include "queueing.h"
|
||||
#include "messages.h"
|
||||
#include "uapi/wireguard.h"
|
||||
#include <linux/if.h>
|
||||
#include <net/genetlink.h>
|
||||
#include <net/sock.h>
|
||||
#include <crypto/algapi.h>
|
||||
|
||||
static struct genl_family genl_family;
|
||||
|
||||
static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
|
||||
[WGDEVICE_A_IFINDEX] = { .type = NLA_U32 },
|
||||
[WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
|
||||
[WGDEVICE_A_PRIVATE_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
|
||||
[WGDEVICE_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
|
||||
[WGDEVICE_A_FLAGS] = { .type = NLA_U32 },
|
||||
[WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 },
|
||||
[WGDEVICE_A_FWMARK] = { .type = NLA_U32 },
|
||||
[WGDEVICE_A_PEERS] = { .type = NLA_NESTED }
|
||||
};
|
||||
|
||||
static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
|
||||
[WGPEER_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN),
|
||||
[WGPEER_A_PRESHARED_KEY] = NLA_POLICY_EXACT_LEN(NOISE_SYMMETRIC_KEY_LEN),
|
||||
[WGPEER_A_FLAGS] = { .type = NLA_U32 },
|
||||
[WGPEER_A_ENDPOINT] = NLA_POLICY_MIN_LEN(sizeof(struct sockaddr)),
|
||||
[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 },
|
||||
[WGPEER_A_LAST_HANDSHAKE_TIME] = NLA_POLICY_EXACT_LEN(sizeof(struct __kernel_timespec)),
|
||||
[WGPEER_A_RX_BYTES] = { .type = NLA_U64 },
|
||||
[WGPEER_A_TX_BYTES] = { .type = NLA_U64 },
|
||||
[WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED },
|
||||
[WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 }
|
||||
};
|
||||
|
||||
static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
|
||||
[WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 },
|
||||
[WGALLOWEDIP_A_IPADDR] = NLA_POLICY_MIN_LEN(sizeof(struct in_addr)),
|
||||
[WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 }
|
||||
};
|
||||
|
||||
static struct wg_device *lookup_interface(struct nlattr **attrs,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct net_device *dev = NULL;
|
||||
|
||||
if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME])
|
||||
return ERR_PTR(-EBADR);
|
||||
if (attrs[WGDEVICE_A_IFINDEX])
|
||||
dev = dev_get_by_index(sock_net(skb->sk),
|
||||
nla_get_u32(attrs[WGDEVICE_A_IFINDEX]));
|
||||
else if (attrs[WGDEVICE_A_IFNAME])
|
||||
dev = dev_get_by_name(sock_net(skb->sk),
|
||||
nla_data(attrs[WGDEVICE_A_IFNAME]));
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENODEV);
|
||||
if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind ||
|
||||
strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) {
|
||||
dev_put(dev);
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
return netdev_priv(dev);
|
||||
}
|
||||
|
||||
static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr,
|
||||
int family)
|
||||
{
|
||||
struct nlattr *allowedip_nest;
|
||||
|
||||
allowedip_nest = nla_nest_start(skb, 0);
|
||||
if (!allowedip_nest)
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) ||
|
||||
nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) ||
|
||||
nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ?
|
||||
sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) {
|
||||
nla_nest_cancel(skb, allowedip_nest);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
nla_nest_end(skb, allowedip_nest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct dump_ctx {
|
||||
struct wg_device *wg;
|
||||
struct wg_peer *next_peer;
|
||||
u64 allowedips_seq;
|
||||
struct allowedips_node *next_allowedip;
|
||||
};
|
||||
|
||||
#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args)
|
||||
|
||||
static int
|
||||
get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx)
|
||||
{
|
||||
|
||||
struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0);
|
||||
struct allowedips_node *allowedips_node = ctx->next_allowedip;
|
||||
bool fail;
|
||||
|
||||
if (!peer_nest)
|
||||
return -EMSGSIZE;
|
||||
|
||||
down_read(&peer->handshake.lock);
|
||||
fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN,
|
||||
peer->handshake.remote_static);
|
||||
up_read(&peer->handshake.lock);
|
||||
if (fail)
|
||||
goto err;
|
||||
|
||||
if (!allowedips_node) {
|
||||
const struct __kernel_timespec last_handshake = {
|
||||
.tv_sec = peer->walltime_last_handshake.tv_sec,
|
||||
.tv_nsec = peer->walltime_last_handshake.tv_nsec
|
||||
};
|
||||
|
||||
down_read(&peer->handshake.lock);
|
||||
fail = nla_put(skb, WGPEER_A_PRESHARED_KEY,
|
||||
NOISE_SYMMETRIC_KEY_LEN,
|
||||
peer->handshake.preshared_key);
|
||||
up_read(&peer->handshake.lock);
|
||||
if (fail)
|
||||
goto err;
|
||||
|
||||
if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME,
|
||||
sizeof(last_handshake), &last_handshake) ||
|
||||
nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
|
||||
peer->persistent_keepalive_interval) ||
|
||||
nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes,
|
||||
WGPEER_A_UNSPEC) ||
|
||||
nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes,
|
||||
WGPEER_A_UNSPEC) ||
|
||||
nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1))
|
||||
goto err;
|
||||
|
||||
read_lock_bh(&peer->endpoint_lock);
|
||||
if (peer->endpoint.addr.sa_family == AF_INET)
|
||||
fail = nla_put(skb, WGPEER_A_ENDPOINT,
|
||||
sizeof(peer->endpoint.addr4),
|
||||
&peer->endpoint.addr4);
|
||||
else if (peer->endpoint.addr.sa_family == AF_INET6)
|
||||
fail = nla_put(skb, WGPEER_A_ENDPOINT,
|
||||
sizeof(peer->endpoint.addr6),
|
||||
&peer->endpoint.addr6);
|
||||
read_unlock_bh(&peer->endpoint_lock);
|
||||
if (fail)
|
||||
goto err;
|
||||
allowedips_node =
|
||||
list_first_entry_or_null(&peer->allowedips_list,
|
||||
struct allowedips_node, peer_list);
|
||||
}
|
||||
if (!allowedips_node)
|
||||
goto no_allowedips;
|
||||
if (!ctx->allowedips_seq)
|
||||
ctx->allowedips_seq = peer->device->peer_allowedips.seq;
|
||||
else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq)
|
||||
goto no_allowedips;
|
||||
|
||||
allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS);
|
||||
if (!allowedips_nest)
|
||||
goto err;
|
||||
|
||||
list_for_each_entry_from(allowedips_node, &peer->allowedips_list,
|
||||
peer_list) {
|
||||
u8 cidr, ip[16] __aligned(__alignof(u64));
|
||||
int family;
|
||||
|
||||
family = wg_allowedips_read_node(allowedips_node, ip, &cidr);
|
||||
if (get_allowedips(skb, ip, cidr, family)) {
|
||||
nla_nest_end(skb, allowedips_nest);
|
||||
nla_nest_end(skb, peer_nest);
|
||||
ctx->next_allowedip = allowedips_node;
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
}
|
||||
nla_nest_end(skb, allowedips_nest);
|
||||
no_allowedips:
|
||||
nla_nest_end(skb, peer_nest);
|
||||
ctx->next_allowedip = NULL;
|
||||
ctx->allowedips_seq = 0;
|
||||
return 0;
|
||||
err:
|
||||
nla_nest_cancel(skb, peer_nest);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int wg_get_device_start(struct netlink_callback *cb)
|
||||
{
|
||||
struct wg_device *wg;
|
||||
|
||||
wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb);
|
||||
if (IS_ERR(wg))
|
||||
return PTR_ERR(wg);
|
||||
DUMP_CTX(cb)->wg = wg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct wg_peer *peer, *next_peer_cursor;
|
||||
struct dump_ctx *ctx = DUMP_CTX(cb);
|
||||
struct wg_device *wg = ctx->wg;
|
||||
struct nlattr *peers_nest;
|
||||
int ret = -EMSGSIZE;
|
||||
bool done = true;
|
||||
void *hdr;
|
||||
|
||||
rtnl_lock();
|
||||
mutex_lock(&wg->device_update_lock);
|
||||
cb->seq = wg->device_update_gen;
|
||||
next_peer_cursor = ctx->next_peer;
|
||||
|
||||
hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
||||
&genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE);
|
||||
if (!hdr)
|
||||
goto out;
|
||||
genl_dump_check_consistent(cb, hdr);
|
||||
|
||||
if (!ctx->next_peer) {
|
||||
if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT,
|
||||
wg->incoming_port) ||
|
||||
nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) ||
|
||||
nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) ||
|
||||
nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name))
|
||||
goto out;
|
||||
|
||||
down_read(&wg->static_identity.lock);
|
||||
if (wg->static_identity.has_identity) {
|
||||
if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY,
|
||||
NOISE_PUBLIC_KEY_LEN,
|
||||
wg->static_identity.static_private) ||
|
||||
nla_put(skb, WGDEVICE_A_PUBLIC_KEY,
|
||||
NOISE_PUBLIC_KEY_LEN,
|
||||
wg->static_identity.static_public)) {
|
||||
up_read(&wg->static_identity.lock);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
up_read(&wg->static_identity.lock);
|
||||
}
|
||||
|
||||
peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS);
|
||||
if (!peers_nest)
|
||||
goto out;
|
||||
ret = 0;
|
||||
/* If the last cursor was removed via list_del_init in peer_remove, then
|
||||
* we just treat this the same as there being no more peers left. The
|
||||
* reason is that seq_nr should indicate to userspace that this isn't a
|
||||
* coherent dump anyway, so they'll try again.
|
||||
*/
|
||||
if (list_empty(&wg->peer_list) ||
|
||||
(ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) {
|
||||
nla_nest_cancel(skb, peers_nest);
|
||||
goto out;
|
||||
}
|
||||
lockdep_assert_held(&wg->device_update_lock);
|
||||
peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list);
|
||||
list_for_each_entry_continue(peer, &wg->peer_list, peer_list) {
|
||||
if (get_peer(peer, skb, ctx)) {
|
||||
done = false;
|
||||
break;
|
||||
}
|
||||
next_peer_cursor = peer;
|
||||
}
|
||||
nla_nest_end(skb, peers_nest);
|
||||
|
||||
out:
|
||||
if (!ret && !done && next_peer_cursor)
|
||||
wg_peer_get(next_peer_cursor);
|
||||
wg_peer_put(ctx->next_peer);
|
||||
mutex_unlock(&wg->device_update_lock);
|
||||
rtnl_unlock();
|
||||
|
||||
if (ret) {
|
||||
genlmsg_cancel(skb, hdr);
|
||||
return ret;
|
||||
}
|
||||
genlmsg_end(skb, hdr);
|
||||
if (done) {
|
||||
ctx->next_peer = NULL;
|
||||
return 0;
|
||||
}
|
||||
ctx->next_peer = next_peer_cursor;
|
||||
return skb->len;
|
||||
|
||||
/* At this point, we can't really deal ourselves with safely zeroing out
|
||||
* the private key material after usage. This will need an additional API
|
||||
* in the kernel for marking skbs as zero_on_free.
|
||||
*/
|
||||
}
|
||||
|
||||
static int wg_get_device_done(struct netlink_callback *cb)
|
||||
{
|
||||
struct dump_ctx *ctx = DUMP_CTX(cb);
|
||||
|
||||
if (ctx->wg)
|
||||
dev_put(ctx->wg->dev);
|
||||
wg_peer_put(ctx->next_peer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_port(struct wg_device *wg, u16 port)
|
||||
{
|
||||
struct wg_peer *peer;
|
||||
|
||||
if (wg->incoming_port == port)
|
||||
return 0;
|
||||
list_for_each_entry(peer, &wg->peer_list, peer_list)
|
||||
wg_socket_clear_peer_endpoint_src(peer);
|
||||
if (!netif_running(wg->dev)) {
|
||||
wg->incoming_port = port;
|
||||
return 0;
|
||||
}
|
||||
return wg_socket_init(wg, port);
|
||||
}
|
||||
|
||||
static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
u16 family;
|
||||
u8 cidr;
|
||||
|
||||
if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] ||
|
||||
!attrs[WGALLOWEDIP_A_CIDR_MASK])
|
||||
return ret;
|
||||
family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]);
|
||||
cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]);
|
||||
|
||||
if (family == AF_INET && cidr <= 32 &&
|
||||
nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr))
|
||||
ret = wg_allowedips_insert_v4(
|
||||
&peer->device->peer_allowedips,
|
||||
nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
|
||||
&peer->device->device_update_lock);
|
||||
else if (family == AF_INET6 && cidr <= 128 &&
|
||||
nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr))
|
||||
ret = wg_allowedips_insert_v6(
|
||||
&peer->device->peer_allowedips,
|
||||
nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
|
||||
&peer->device->device_update_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int set_peer(struct wg_device *wg, struct nlattr **attrs)
|
||||
{
|
||||
u8 *public_key = NULL, *preshared_key = NULL;
|
||||
struct wg_peer *peer = NULL;
|
||||
u32 flags = 0;
|
||||
int ret;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (attrs[WGPEER_A_PUBLIC_KEY] &&
|
||||
nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN)
|
||||
public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]);
|
||||
else
|
||||
goto out;
|
||||
if (attrs[WGPEER_A_PRESHARED_KEY] &&
|
||||
nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN)
|
||||
preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]);
|
||||
|
||||
if (attrs[WGPEER_A_FLAGS])
|
||||
flags = nla_get_u32(attrs[WGPEER_A_FLAGS]);
|
||||
ret = -EOPNOTSUPP;
|
||||
if (flags & ~__WGPEER_F_ALL)
|
||||
goto out;
|
||||
|
||||
ret = -EPFNOSUPPORT;
|
||||
if (attrs[WGPEER_A_PROTOCOL_VERSION]) {
|
||||
if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1)
|
||||
goto out;
|
||||
}
|
||||
|
||||
peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
|
||||
nla_data(attrs[WGPEER_A_PUBLIC_KEY]));
|
||||
ret = 0;
|
||||
if (!peer) { /* Peer doesn't exist yet. Add a new one. */
|
||||
if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY))
|
||||
goto out;
|
||||
|
||||
/* The peer is new, so there aren't allowed IPs to remove. */
|
||||
flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS;
|
||||
|
||||
down_read(&wg->static_identity.lock);
|
||||
if (wg->static_identity.has_identity &&
|
||||
!memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]),
|
||||
wg->static_identity.static_public,
|
||||
NOISE_PUBLIC_KEY_LEN)) {
|
||||
/* We silently ignore peers that have the same public
|
||||
* key as the device. The reason we do it silently is
|
||||
* that we'd like for people to be able to reuse the
|
||||
* same set of API calls across peers.
|
||||
*/
|
||||
up_read(&wg->static_identity.lock);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
up_read(&wg->static_identity.lock);
|
||||
|
||||
peer = wg_peer_create(wg, public_key, preshared_key);
|
||||
if (IS_ERR(peer)) {
|
||||
ret = PTR_ERR(peer);
|
||||
peer = NULL;
|
||||
goto out;
|
||||
}
|
||||
/* Take additional reference, as though we've just been
|
||||
* looked up.
|
||||
*/
|
||||
wg_peer_get(peer);
|
||||
}
|
||||
|
||||
if (flags & WGPEER_F_REMOVE_ME) {
|
||||
wg_peer_remove(peer);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (preshared_key) {
|
||||
down_write(&peer->handshake.lock);
|
||||
memcpy(&peer->handshake.preshared_key, preshared_key,
|
||||
NOISE_SYMMETRIC_KEY_LEN);
|
||||
up_write(&peer->handshake.lock);
|
||||
}
|
||||
|
||||
if (attrs[WGPEER_A_ENDPOINT]) {
|
||||
struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
|
||||
size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
|
||||
|
||||
if ((len == sizeof(struct sockaddr_in) &&
|
||||
addr->sa_family == AF_INET) ||
|
||||
(len == sizeof(struct sockaddr_in6) &&
|
||||
addr->sa_family == AF_INET6)) {
|
||||
struct endpoint endpoint = { { { 0 } } };
|
||||
|
||||
memcpy(&endpoint.addr, addr, len);
|
||||
wg_socket_set_peer_endpoint(peer, &endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & WGPEER_F_REPLACE_ALLOWEDIPS)
|
||||
wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer,
|
||||
&wg->device_update_lock);
|
||||
|
||||
if (attrs[WGPEER_A_ALLOWEDIPS]) {
|
||||
struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1];
|
||||
int rem;
|
||||
|
||||
nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) {
|
||||
ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX,
|
||||
attr, allowedip_policy, NULL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = set_allowedip(peer, allowedip);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) {
|
||||
const u16 persistent_keepalive_interval = nla_get_u16(
|
||||
attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]);
|
||||
const bool send_keepalive =
|
||||
!peer->persistent_keepalive_interval &&
|
||||
persistent_keepalive_interval &&
|
||||
netif_running(wg->dev);
|
||||
|
||||
peer->persistent_keepalive_interval = persistent_keepalive_interval;
|
||||
if (send_keepalive)
|
||||
wg_packet_send_keepalive(peer);
|
||||
}
|
||||
|
||||
if (netif_running(wg->dev))
|
||||
wg_packet_send_staged_packets(peer);
|
||||
|
||||
out:
|
||||
wg_peer_put(peer);
|
||||
if (attrs[WGPEER_A_PRESHARED_KEY])
|
||||
memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]),
|
||||
nla_len(attrs[WGPEER_A_PRESHARED_KEY]));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
struct wg_device *wg = lookup_interface(info->attrs, skb);
|
||||
u32 flags = 0;
|
||||
int ret;
|
||||
|
||||
if (IS_ERR(wg)) {
|
||||
ret = PTR_ERR(wg);
|
||||
goto out_nodev;
|
||||
}
|
||||
|
||||
rtnl_lock();
|
||||
mutex_lock(&wg->device_update_lock);
|
||||
|
||||
if (info->attrs[WGDEVICE_A_FLAGS])
|
||||
flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]);
|
||||
ret = -EOPNOTSUPP;
|
||||
if (flags & ~__WGDEVICE_F_ALL)
|
||||
goto out;
|
||||
|
||||
if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) {
|
||||
struct net *net;
|
||||
rcu_read_lock();
|
||||
net = rcu_dereference(wg->creating_net);
|
||||
ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0;
|
||||
rcu_read_unlock();
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
++wg->device_update_gen;
|
||||
|
||||
if (info->attrs[WGDEVICE_A_FWMARK]) {
|
||||
struct wg_peer *peer;
|
||||
|
||||
wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]);
|
||||
list_for_each_entry(peer, &wg->peer_list, peer_list)
|
||||
wg_socket_clear_peer_endpoint_src(peer);
|
||||
}
|
||||
|
||||
if (info->attrs[WGDEVICE_A_LISTEN_PORT]) {
|
||||
ret = set_port(wg,
|
||||
nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT]));
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (flags & WGDEVICE_F_REPLACE_PEERS)
|
||||
wg_peer_remove_all(wg);
|
||||
|
||||
if (info->attrs[WGDEVICE_A_PRIVATE_KEY] &&
|
||||
nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) ==
|
||||
NOISE_PUBLIC_KEY_LEN) {
|
||||
u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
|
||||
u8 public_key[NOISE_PUBLIC_KEY_LEN];
|
||||
struct wg_peer *peer, *temp;
|
||||
|
||||
if (!crypto_memneq(wg->static_identity.static_private,
|
||||
private_key, NOISE_PUBLIC_KEY_LEN))
|
||||
goto skip_set_private_key;
|
||||
|
||||
/* We remove before setting, to prevent race, which means doing
|
||||
* two 25519-genpub ops.
|
||||
*/
|
||||
if (curve25519_generate_public(public_key, private_key)) {
|
||||
peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
|
||||
public_key);
|
||||
if (peer) {
|
||||
wg_peer_put(peer);
|
||||
wg_peer_remove(peer);
|
||||
}
|
||||
}
|
||||
|
||||
down_write(&wg->static_identity.lock);
|
||||
wg_noise_set_static_identity_private_key(&wg->static_identity,
|
||||
private_key);
|
||||
list_for_each_entry_safe(peer, temp, &wg->peer_list,
|
||||
peer_list) {
|
||||
wg_noise_precompute_static_static(peer);
|
||||
wg_noise_expire_current_peer_keypairs(peer);
|
||||
}
|
||||
wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
|
||||
up_write(&wg->static_identity.lock);
|
||||
}
|
||||
skip_set_private_key:
|
||||
|
||||
if (info->attrs[WGDEVICE_A_PEERS]) {
|
||||
struct nlattr *attr, *peer[WGPEER_A_MAX + 1];
|
||||
int rem;
|
||||
|
||||
nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) {
|
||||
ret = nla_parse_nested(peer, WGPEER_A_MAX, attr,
|
||||
peer_policy, NULL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = set_peer(wg, peer);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
mutex_unlock(&wg->device_update_lock);
|
||||
rtnl_unlock();
|
||||
dev_put(wg->dev);
|
||||
out_nodev:
|
||||
if (info->attrs[WGDEVICE_A_PRIVATE_KEY])
|
||||
memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]),
|
||||
nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]));
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef COMPAT_CANNOT_USE_CONST_GENL_OPS
|
||||
static const
|
||||
#else
|
||||
static
|
||||
#endif
|
||||
struct genl_ops genl_ops[] = {
|
||||
{
|
||||
.cmd = WG_CMD_GET_DEVICE,
|
||||
#ifndef COMPAT_CANNOT_USE_NETLINK_START
|
||||
.start = wg_get_device_start,
|
||||
#endif
|
||||
.dumpit = wg_get_device_dump,
|
||||
.done = wg_get_device_done,
|
||||
#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
|
||||
.policy = device_policy,
|
||||
#endif
|
||||
.flags = GENL_UNS_ADMIN_PERM
|
||||
}, {
|
||||
.cmd = WG_CMD_SET_DEVICE,
|
||||
.doit = wg_set_device,
|
||||
#ifdef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
|
||||
.policy = device_policy,
|
||||
#endif
|
||||
.flags = GENL_UNS_ADMIN_PERM
|
||||
}
|
||||
};
|
||||
|
||||
static struct genl_family genl_family
|
||||
#ifndef COMPAT_CANNOT_USE_GENL_NOPS
|
||||
__ro_after_init = {
|
||||
.ops = genl_ops,
|
||||
.n_ops = ARRAY_SIZE(genl_ops),
|
||||
#else
|
||||
= {
|
||||
#endif
|
||||
.name = WG_GENL_NAME,
|
||||
.version = WG_GENL_VERSION,
|
||||
.maxattr = WGDEVICE_A_MAX,
|
||||
.module = THIS_MODULE,
|
||||
#ifndef COMPAT_CANNOT_INDIVIDUAL_NETLINK_OPS_POLICY
|
||||
.policy = device_policy,
|
||||
#endif
|
||||
.netnsok = true
|
||||
};
|
||||
|
||||
int __init wg_genetlink_init(void)
|
||||
{
|
||||
return genl_register_family(&genl_family);
|
||||
}
|
||||
|
||||
void __exit wg_genetlink_uninit(void)
|
||||
{
|
||||
genl_unregister_family(&genl_family);
|
||||
}
|
12
net/wireguard/netlink.h
Normal file
12
net/wireguard/netlink.h
Normal file
@ -0,0 +1,12 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_NETLINK_H
|
||||
#define _WG_NETLINK_H
|
||||
|
||||
int wg_genetlink_init(void);
|
||||
void wg_genetlink_uninit(void);
|
||||
|
||||
#endif /* _WG_NETLINK_H */
|
830
net/wireguard/noise.c
Normal file
830
net/wireguard/noise.c
Normal file
@ -0,0 +1,830 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "noise.h"
|
||||
#include "device.h"
|
||||
#include "peer.h"
|
||||
#include "messages.h"
|
||||
#include "queueing.h"
|
||||
#include "peerlookup.h"
|
||||
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <crypto/algapi.h>
|
||||
|
||||
/* This implements Noise_IKpsk2:
|
||||
*
|
||||
* <- s
|
||||
* ******
|
||||
* -> e, es, s, ss, {t}
|
||||
* <- e, ee, se, psk, {}
|
||||
*/
|
||||
|
||||
static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
|
||||
static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com";
|
||||
static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
|
||||
static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
|
||||
static atomic64_t keypair_counter = ATOMIC64_INIT(0);
|
||||
|
||||
void __init wg_noise_init(void)
|
||||
{
|
||||
struct blake2s_state blake;
|
||||
|
||||
blake2s(handshake_init_chaining_key, handshake_name, NULL,
|
||||
NOISE_HASH_LEN, sizeof(handshake_name), 0);
|
||||
blake2s_init(&blake, NOISE_HASH_LEN);
|
||||
blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
|
||||
blake2s_update(&blake, identifier_name, sizeof(identifier_name));
|
||||
blake2s_final(&blake, handshake_init_hash);
|
||||
}
|
||||
|
||||
/* Must hold peer->handshake.static_identity->lock */
|
||||
void wg_noise_precompute_static_static(struct wg_peer *peer)
|
||||
{
|
||||
down_write(&peer->handshake.lock);
|
||||
if (!peer->handshake.static_identity->has_identity ||
|
||||
!curve25519(peer->handshake.precomputed_static_static,
|
||||
peer->handshake.static_identity->static_private,
|
||||
peer->handshake.remote_static))
|
||||
memset(peer->handshake.precomputed_static_static, 0,
|
||||
NOISE_PUBLIC_KEY_LEN);
|
||||
up_write(&peer->handshake.lock);
|
||||
}
|
||||
|
||||
void wg_noise_handshake_init(struct noise_handshake *handshake,
|
||||
struct noise_static_identity *static_identity,
|
||||
const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
struct wg_peer *peer)
|
||||
{
|
||||
memset(handshake, 0, sizeof(*handshake));
|
||||
init_rwsem(&handshake->lock);
|
||||
handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
|
||||
handshake->entry.peer = peer;
|
||||
memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
|
||||
if (peer_preshared_key)
|
||||
memcpy(handshake->preshared_key, peer_preshared_key,
|
||||
NOISE_SYMMETRIC_KEY_LEN);
|
||||
handshake->static_identity = static_identity;
|
||||
handshake->state = HANDSHAKE_ZEROED;
|
||||
wg_noise_precompute_static_static(peer);
|
||||
}
|
||||
|
||||
static void handshake_zero(struct noise_handshake *handshake)
|
||||
{
|
||||
memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
|
||||
memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
|
||||
memset(&handshake->hash, 0, NOISE_HASH_LEN);
|
||||
memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
|
||||
handshake->remote_index = 0;
|
||||
handshake->state = HANDSHAKE_ZEROED;
|
||||
}
|
||||
|
||||
void wg_noise_handshake_clear(struct noise_handshake *handshake)
|
||||
{
|
||||
down_write(&handshake->lock);
|
||||
wg_index_hashtable_remove(
|
||||
handshake->entry.peer->device->index_hashtable,
|
||||
&handshake->entry);
|
||||
handshake_zero(handshake);
|
||||
up_write(&handshake->lock);
|
||||
}
|
||||
|
||||
static struct noise_keypair *keypair_create(struct wg_peer *peer)
|
||||
{
|
||||
struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL);
|
||||
|
||||
if (unlikely(!keypair))
|
||||
return NULL;
|
||||
spin_lock_init(&keypair->receiving_counter.lock);
|
||||
keypair->internal_id = atomic64_inc_return(&keypair_counter);
|
||||
keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
|
||||
keypair->entry.peer = peer;
|
||||
kref_init(&keypair->refcount);
|
||||
return keypair;
|
||||
}
|
||||
|
||||
static void keypair_free_rcu(struct rcu_head *rcu)
|
||||
{
|
||||
kfree_sensitive(container_of(rcu, struct noise_keypair, rcu));
|
||||
}
|
||||
|
||||
static void keypair_free_kref(struct kref *kref)
|
||||
{
|
||||
struct noise_keypair *keypair =
|
||||
container_of(kref, struct noise_keypair, refcount);
|
||||
|
||||
net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n",
|
||||
keypair->entry.peer->device->dev->name,
|
||||
keypair->internal_id,
|
||||
keypair->entry.peer->internal_id);
|
||||
wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable,
|
||||
&keypair->entry);
|
||||
call_rcu(&keypair->rcu, keypair_free_rcu);
|
||||
}
|
||||
|
||||
void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now)
|
||||
{
|
||||
if (unlikely(!keypair))
|
||||
return;
|
||||
if (unlikely(unreference_now))
|
||||
wg_index_hashtable_remove(
|
||||
keypair->entry.peer->device->index_hashtable,
|
||||
&keypair->entry);
|
||||
kref_put(&keypair->refcount, keypair_free_kref);
|
||||
}
|
||||
|
||||
struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
|
||||
"Taking noise keypair reference without holding the RCU BH read lock");
|
||||
if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
|
||||
return NULL;
|
||||
return keypair;
|
||||
}
|
||||
|
||||
void wg_noise_keypairs_clear(struct noise_keypairs *keypairs)
|
||||
{
|
||||
struct noise_keypair *old;
|
||||
|
||||
spin_lock_bh(&keypairs->keypair_update_lock);
|
||||
|
||||
/* We zero the next_keypair before zeroing the others, so that
|
||||
* wg_noise_received_with_keypair returns early before subsequent ones
|
||||
* are zeroed.
|
||||
*/
|
||||
old = rcu_dereference_protected(keypairs->next_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock));
|
||||
RCU_INIT_POINTER(keypairs->next_keypair, NULL);
|
||||
wg_noise_keypair_put(old, true);
|
||||
|
||||
old = rcu_dereference_protected(keypairs->previous_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock));
|
||||
RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
|
||||
wg_noise_keypair_put(old, true);
|
||||
|
||||
old = rcu_dereference_protected(keypairs->current_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock));
|
||||
RCU_INIT_POINTER(keypairs->current_keypair, NULL);
|
||||
wg_noise_keypair_put(old, true);
|
||||
|
||||
spin_unlock_bh(&keypairs->keypair_update_lock);
|
||||
}
|
||||
|
||||
void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer)
|
||||
{
|
||||
struct noise_keypair *keypair;
|
||||
|
||||
wg_noise_handshake_clear(&peer->handshake);
|
||||
wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
|
||||
|
||||
spin_lock_bh(&peer->keypairs.keypair_update_lock);
|
||||
keypair = rcu_dereference_protected(peer->keypairs.next_keypair,
|
||||
lockdep_is_held(&peer->keypairs.keypair_update_lock));
|
||||
if (keypair)
|
||||
keypair->sending.is_valid = false;
|
||||
keypair = rcu_dereference_protected(peer->keypairs.current_keypair,
|
||||
lockdep_is_held(&peer->keypairs.keypair_update_lock));
|
||||
if (keypair)
|
||||
keypair->sending.is_valid = false;
|
||||
spin_unlock_bh(&peer->keypairs.keypair_update_lock);
|
||||
}
|
||||
|
||||
static void add_new_keypair(struct noise_keypairs *keypairs,
|
||||
struct noise_keypair *new_keypair)
|
||||
{
|
||||
struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
|
||||
|
||||
spin_lock_bh(&keypairs->keypair_update_lock);
|
||||
previous_keypair = rcu_dereference_protected(keypairs->previous_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock));
|
||||
next_keypair = rcu_dereference_protected(keypairs->next_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock));
|
||||
current_keypair = rcu_dereference_protected(keypairs->current_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock));
|
||||
if (new_keypair->i_am_the_initiator) {
|
||||
/* If we're the initiator, it means we've sent a handshake, and
|
||||
* received a confirmation response, which means this new
|
||||
* keypair can now be used.
|
||||
*/
|
||||
if (next_keypair) {
|
||||
/* If there already was a next keypair pending, we
|
||||
* demote it to be the previous keypair, and free the
|
||||
* existing current. Note that this means KCI can result
|
||||
* in this transition. It would perhaps be more sound to
|
||||
* always just get rid of the unused next keypair
|
||||
* instead of putting it in the previous slot, but this
|
||||
* might be a bit less robust. Something to think about
|
||||
* for the future.
|
||||
*/
|
||||
RCU_INIT_POINTER(keypairs->next_keypair, NULL);
|
||||
rcu_assign_pointer(keypairs->previous_keypair,
|
||||
next_keypair);
|
||||
wg_noise_keypair_put(current_keypair, true);
|
||||
} else /* If there wasn't an existing next keypair, we replace
|
||||
* the previous with the current one.
|
||||
*/
|
||||
rcu_assign_pointer(keypairs->previous_keypair,
|
||||
current_keypair);
|
||||
/* At this point we can get rid of the old previous keypair, and
|
||||
* set up the new keypair.
|
||||
*/
|
||||
wg_noise_keypair_put(previous_keypair, true);
|
||||
rcu_assign_pointer(keypairs->current_keypair, new_keypair);
|
||||
} else {
|
||||
/* If we're the responder, it means we can't use the new keypair
|
||||
* until we receive confirmation via the first data packet, so
|
||||
* we get rid of the existing previous one, the possibly
|
||||
* existing next one, and slide in the new next one.
|
||||
*/
|
||||
rcu_assign_pointer(keypairs->next_keypair, new_keypair);
|
||||
wg_noise_keypair_put(next_keypair, true);
|
||||
RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
|
||||
wg_noise_keypair_put(previous_keypair, true);
|
||||
}
|
||||
spin_unlock_bh(&keypairs->keypair_update_lock);
|
||||
}
|
||||
|
||||
bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
|
||||
struct noise_keypair *received_keypair)
|
||||
{
|
||||
struct noise_keypair *old_keypair;
|
||||
bool key_is_new;
|
||||
|
||||
/* We first check without taking the spinlock. */
|
||||
key_is_new = received_keypair ==
|
||||
rcu_access_pointer(keypairs->next_keypair);
|
||||
if (likely(!key_is_new))
|
||||
return false;
|
||||
|
||||
spin_lock_bh(&keypairs->keypair_update_lock);
|
||||
/* After locking, we double check that things didn't change from
|
||||
* beneath us.
|
||||
*/
|
||||
if (unlikely(received_keypair !=
|
||||
rcu_dereference_protected(keypairs->next_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock)))) {
|
||||
spin_unlock_bh(&keypairs->keypair_update_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* When we've finally received the confirmation, we slide the next
|
||||
* into the current, the current into the previous, and get rid of
|
||||
* the old previous.
|
||||
*/
|
||||
old_keypair = rcu_dereference_protected(keypairs->previous_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock));
|
||||
rcu_assign_pointer(keypairs->previous_keypair,
|
||||
rcu_dereference_protected(keypairs->current_keypair,
|
||||
lockdep_is_held(&keypairs->keypair_update_lock)));
|
||||
wg_noise_keypair_put(old_keypair, true);
|
||||
rcu_assign_pointer(keypairs->current_keypair, received_keypair);
|
||||
RCU_INIT_POINTER(keypairs->next_keypair, NULL);
|
||||
|
||||
spin_unlock_bh(&keypairs->keypair_update_lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Must hold static_identity->lock */
|
||||
void wg_noise_set_static_identity_private_key(
|
||||
struct noise_static_identity *static_identity,
|
||||
const u8 private_key[NOISE_PUBLIC_KEY_LEN])
|
||||
{
|
||||
memcpy(static_identity->static_private, private_key,
|
||||
NOISE_PUBLIC_KEY_LEN);
|
||||
curve25519_clamp_secret(static_identity->static_private);
|
||||
static_identity->has_identity = curve25519_generate_public(
|
||||
static_identity->static_public, private_key);
|
||||
}
|
||||
|
||||
/* This is Hugo Krawczyk's HKDF:
|
||||
* - https://eprint.iacr.org/2010/264.pdf
|
||||
* - https://tools.ietf.org/html/rfc5869
|
||||
*/
|
||||
static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
|
||||
size_t first_len, size_t second_len, size_t third_len,
|
||||
size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
|
||||
{
|
||||
u8 output[BLAKE2S_HASH_SIZE + 1];
|
||||
u8 secret[BLAKE2S_HASH_SIZE];
|
||||
|
||||
WARN_ON(IS_ENABLED(DEBUG) &&
|
||||
(first_len > BLAKE2S_HASH_SIZE ||
|
||||
second_len > BLAKE2S_HASH_SIZE ||
|
||||
third_len > BLAKE2S_HASH_SIZE ||
|
||||
((second_len || second_dst || third_len || third_dst) &&
|
||||
(!first_len || !first_dst)) ||
|
||||
((third_len || third_dst) && (!second_len || !second_dst))));
|
||||
|
||||
/* Extract entropy from data into secret */
|
||||
blake2s_hmac(secret, data, chaining_key, BLAKE2S_HASH_SIZE, data_len,
|
||||
NOISE_HASH_LEN);
|
||||
|
||||
if (!first_dst || !first_len)
|
||||
goto out;
|
||||
|
||||
/* Expand first key: key = secret, data = 0x1 */
|
||||
output[0] = 1;
|
||||
blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE, 1,
|
||||
BLAKE2S_HASH_SIZE);
|
||||
memcpy(first_dst, output, first_len);
|
||||
|
||||
if (!second_dst || !second_len)
|
||||
goto out;
|
||||
|
||||
/* Expand second key: key = secret, data = first-key || 0x2 */
|
||||
output[BLAKE2S_HASH_SIZE] = 2;
|
||||
blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE,
|
||||
BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
|
||||
memcpy(second_dst, output, second_len);
|
||||
|
||||
if (!third_dst || !third_len)
|
||||
goto out;
|
||||
|
||||
/* Expand third key: key = secret, data = second-key || 0x3 */
|
||||
output[BLAKE2S_HASH_SIZE] = 3;
|
||||
blake2s_hmac(output, output, secret, BLAKE2S_HASH_SIZE,
|
||||
BLAKE2S_HASH_SIZE + 1, BLAKE2S_HASH_SIZE);
|
||||
memcpy(third_dst, output, third_len);
|
||||
|
||||
out:
|
||||
/* Clear sensitive data from stack */
|
||||
memzero_explicit(secret, BLAKE2S_HASH_SIZE);
|
||||
memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
|
||||
}
|
||||
|
||||
static void derive_keys(struct noise_symmetric_key *first_dst,
|
||||
struct noise_symmetric_key *second_dst,
|
||||
const u8 chaining_key[NOISE_HASH_LEN])
|
||||
{
|
||||
u64 birthdate = ktime_get_coarse_boottime_ns();
|
||||
kdf(first_dst->key, second_dst->key, NULL, NULL,
|
||||
NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
|
||||
chaining_key);
|
||||
first_dst->birthdate = second_dst->birthdate = birthdate;
|
||||
first_dst->is_valid = second_dst->is_valid = true;
|
||||
}
|
||||
|
||||
static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
const u8 private[NOISE_PUBLIC_KEY_LEN],
|
||||
const u8 public[NOISE_PUBLIC_KEY_LEN])
|
||||
{
|
||||
u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
|
||||
|
||||
if (unlikely(!curve25519(dh_calculation, private, public)))
|
||||
return false;
|
||||
kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN,
|
||||
NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
|
||||
memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
|
||||
{
|
||||
static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
|
||||
if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
|
||||
return false;
|
||||
kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
|
||||
NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
|
||||
chaining_key);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
|
||||
{
|
||||
struct blake2s_state blake;
|
||||
|
||||
blake2s_init(&blake, NOISE_HASH_LEN);
|
||||
blake2s_update(&blake, hash, NOISE_HASH_LEN);
|
||||
blake2s_update(&blake, src, src_len);
|
||||
blake2s_final(&blake, hash);
|
||||
}
|
||||
|
||||
static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN],
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
|
||||
{
|
||||
u8 temp_hash[NOISE_HASH_LEN];
|
||||
|
||||
kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN,
|
||||
NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
|
||||
mix_hash(hash, temp_hash, NOISE_HASH_LEN);
|
||||
memzero_explicit(temp_hash, NOISE_HASH_LEN);
|
||||
}
|
||||
|
||||
static void handshake_init(u8 chaining_key[NOISE_HASH_LEN],
|
||||
u8 hash[NOISE_HASH_LEN],
|
||||
const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
|
||||
{
|
||||
memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
|
||||
memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
|
||||
mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
|
||||
}
|
||||
|
||||
static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext,
|
||||
size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
u8 hash[NOISE_HASH_LEN])
|
||||
{
|
||||
chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash,
|
||||
NOISE_HASH_LEN,
|
||||
0 /* Always zero for Noise_IK */, key);
|
||||
mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
|
||||
}
|
||||
|
||||
static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext,
|
||||
size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
u8 hash[NOISE_HASH_LEN])
|
||||
{
|
||||
if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len,
|
||||
hash, NOISE_HASH_LEN,
|
||||
0 /* Always zero for Noise_IK */, key))
|
||||
return false;
|
||||
mix_hash(hash, src_ciphertext, src_len);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN],
|
||||
const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN],
|
||||
u8 chaining_key[NOISE_HASH_LEN],
|
||||
u8 hash[NOISE_HASH_LEN])
|
||||
{
|
||||
if (ephemeral_dst != ephemeral_src)
|
||||
memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
|
||||
mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
|
||||
kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0,
|
||||
NOISE_PUBLIC_KEY_LEN, chaining_key);
|
||||
}
|
||||
|
||||
static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
|
||||
{
|
||||
struct timespec64 now;
|
||||
|
||||
ktime_get_real_ts64(&now);
|
||||
|
||||
/* In order to prevent some sort of infoleak from precise timers, we
|
||||
* round down the nanoseconds part to the closest rounded-down power of
|
||||
* two to the maximum initiations per second allowed anyway by the
|
||||
* implementation.
|
||||
*/
|
||||
now.tv_nsec = ALIGN_DOWN(now.tv_nsec,
|
||||
rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND));
|
||||
|
||||
/* https://cr.yp.to/libtai/tai64.html */
|
||||
*(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec);
|
||||
*(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec);
|
||||
}
|
||||
|
||||
bool
|
||||
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
|
||||
struct noise_handshake *handshake)
|
||||
{
|
||||
u8 timestamp[NOISE_TIMESTAMP_LEN];
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
bool ret = false;
|
||||
|
||||
/* We need to wait for crng _before_ taking any locks, since
|
||||
* curve25519_generate_secret uses get_random_bytes_wait.
|
||||
*/
|
||||
wait_for_random_bytes();
|
||||
|
||||
down_read(&handshake->static_identity->lock);
|
||||
down_write(&handshake->lock);
|
||||
|
||||
if (unlikely(!handshake->static_identity->has_identity))
|
||||
goto out;
|
||||
|
||||
dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
|
||||
|
||||
handshake_init(handshake->chaining_key, handshake->hash,
|
||||
handshake->remote_static);
|
||||
|
||||
/* e */
|
||||
curve25519_generate_secret(handshake->ephemeral_private);
|
||||
if (!curve25519_generate_public(dst->unencrypted_ephemeral,
|
||||
handshake->ephemeral_private))
|
||||
goto out;
|
||||
message_ephemeral(dst->unencrypted_ephemeral,
|
||||
dst->unencrypted_ephemeral, handshake->chaining_key,
|
||||
handshake->hash);
|
||||
|
||||
/* es */
|
||||
if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private,
|
||||
handshake->remote_static))
|
||||
goto out;
|
||||
|
||||
/* s */
|
||||
message_encrypt(dst->encrypted_static,
|
||||
handshake->static_identity->static_public,
|
||||
NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
|
||||
|
||||
/* ss */
|
||||
if (!mix_precomputed_dh(handshake->chaining_key, key,
|
||||
handshake->precomputed_static_static))
|
||||
goto out;
|
||||
|
||||
/* {t} */
|
||||
tai64n_now(timestamp);
|
||||
message_encrypt(dst->encrypted_timestamp, timestamp,
|
||||
NOISE_TIMESTAMP_LEN, key, handshake->hash);
|
||||
|
||||
dst->sender_index = wg_index_hashtable_insert(
|
||||
handshake->entry.peer->device->index_hashtable,
|
||||
&handshake->entry);
|
||||
|
||||
handshake->state = HANDSHAKE_CREATED_INITIATION;
|
||||
ret = true;
|
||||
|
||||
out:
|
||||
up_write(&handshake->lock);
|
||||
up_read(&handshake->static_identity->lock);
|
||||
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct wg_peer *
|
||||
wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
|
||||
struct wg_device *wg)
|
||||
{
|
||||
struct wg_peer *peer = NULL, *ret_peer = NULL;
|
||||
struct noise_handshake *handshake;
|
||||
bool replay_attack, flood_attack;
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
u8 chaining_key[NOISE_HASH_LEN];
|
||||
u8 hash[NOISE_HASH_LEN];
|
||||
u8 s[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 e[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 t[NOISE_TIMESTAMP_LEN];
|
||||
u64 initiation_consumption;
|
||||
|
||||
down_read(&wg->static_identity.lock);
|
||||
if (unlikely(!wg->static_identity.has_identity))
|
||||
goto out;
|
||||
|
||||
handshake_init(chaining_key, hash, wg->static_identity.static_public);
|
||||
|
||||
/* e */
|
||||
message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
|
||||
|
||||
/* es */
|
||||
if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
|
||||
goto out;
|
||||
|
||||
/* s */
|
||||
if (!message_decrypt(s, src->encrypted_static,
|
||||
sizeof(src->encrypted_static), key, hash))
|
||||
goto out;
|
||||
|
||||
/* Lookup which peer we're actually talking to */
|
||||
peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s);
|
||||
if (!peer)
|
||||
goto out;
|
||||
handshake = &peer->handshake;
|
||||
|
||||
/* ss */
|
||||
if (!mix_precomputed_dh(chaining_key, key,
|
||||
handshake->precomputed_static_static))
|
||||
goto out;
|
||||
|
||||
/* {t} */
|
||||
if (!message_decrypt(t, src->encrypted_timestamp,
|
||||
sizeof(src->encrypted_timestamp), key, hash))
|
||||
goto out;
|
||||
|
||||
down_read(&handshake->lock);
|
||||
replay_attack = memcmp(t, handshake->latest_timestamp,
|
||||
NOISE_TIMESTAMP_LEN) <= 0;
|
||||
flood_attack = (s64)handshake->last_initiation_consumption +
|
||||
NSEC_PER_SEC / INITIATIONS_PER_SECOND >
|
||||
(s64)ktime_get_coarse_boottime_ns();
|
||||
up_read(&handshake->lock);
|
||||
if (replay_attack || flood_attack)
|
||||
goto out;
|
||||
|
||||
/* Success! Copy everything to peer */
|
||||
down_write(&handshake->lock);
|
||||
memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
|
||||
if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0)
|
||||
memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
|
||||
memcpy(handshake->hash, hash, NOISE_HASH_LEN);
|
||||
memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
|
||||
handshake->remote_index = src->sender_index;
|
||||
initiation_consumption = ktime_get_coarse_boottime_ns();
|
||||
if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0)
|
||||
handshake->last_initiation_consumption = initiation_consumption;
|
||||
handshake->state = HANDSHAKE_CONSUMED_INITIATION;
|
||||
up_write(&handshake->lock);
|
||||
ret_peer = peer;
|
||||
|
||||
out:
|
||||
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
|
||||
memzero_explicit(hash, NOISE_HASH_LEN);
|
||||
memzero_explicit(chaining_key, NOISE_HASH_LEN);
|
||||
up_read(&wg->static_identity.lock);
|
||||
if (!ret_peer)
|
||||
wg_peer_put(peer);
|
||||
return ret_peer;
|
||||
}
|
||||
|
||||
bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
|
||||
struct noise_handshake *handshake)
|
||||
{
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
bool ret = false;
|
||||
|
||||
/* We need to wait for crng _before_ taking any locks, since
|
||||
* curve25519_generate_secret uses get_random_bytes_wait.
|
||||
*/
|
||||
wait_for_random_bytes();
|
||||
|
||||
down_read(&handshake->static_identity->lock);
|
||||
down_write(&handshake->lock);
|
||||
|
||||
if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
|
||||
goto out;
|
||||
|
||||
dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
|
||||
dst->receiver_index = handshake->remote_index;
|
||||
|
||||
/* e */
|
||||
curve25519_generate_secret(handshake->ephemeral_private);
|
||||
if (!curve25519_generate_public(dst->unencrypted_ephemeral,
|
||||
handshake->ephemeral_private))
|
||||
goto out;
|
||||
message_ephemeral(dst->unencrypted_ephemeral,
|
||||
dst->unencrypted_ephemeral, handshake->chaining_key,
|
||||
handshake->hash);
|
||||
|
||||
/* ee */
|
||||
if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
|
||||
handshake->remote_ephemeral))
|
||||
goto out;
|
||||
|
||||
/* se */
|
||||
if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
|
||||
handshake->remote_static))
|
||||
goto out;
|
||||
|
||||
/* psk */
|
||||
mix_psk(handshake->chaining_key, handshake->hash, key,
|
||||
handshake->preshared_key);
|
||||
|
||||
/* {} */
|
||||
message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
|
||||
|
||||
dst->sender_index = wg_index_hashtable_insert(
|
||||
handshake->entry.peer->device->index_hashtable,
|
||||
&handshake->entry);
|
||||
|
||||
handshake->state = HANDSHAKE_CREATED_RESPONSE;
|
||||
ret = true;
|
||||
|
||||
out:
|
||||
up_write(&handshake->lock);
|
||||
up_read(&handshake->static_identity->lock);
|
||||
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct wg_peer *
|
||||
wg_noise_handshake_consume_response(struct message_handshake_response *src,
|
||||
struct wg_device *wg)
|
||||
{
|
||||
enum noise_handshake_state state = HANDSHAKE_ZEROED;
|
||||
struct wg_peer *peer = NULL, *ret_peer = NULL;
|
||||
struct noise_handshake *handshake;
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
u8 hash[NOISE_HASH_LEN];
|
||||
u8 chaining_key[NOISE_HASH_LEN];
|
||||
u8 e[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 static_private[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
|
||||
down_read(&wg->static_identity.lock);
|
||||
|
||||
if (unlikely(!wg->static_identity.has_identity))
|
||||
goto out;
|
||||
|
||||
handshake = (struct noise_handshake *)wg_index_hashtable_lookup(
|
||||
wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE,
|
||||
src->receiver_index, &peer);
|
||||
if (unlikely(!handshake))
|
||||
goto out;
|
||||
|
||||
down_read(&handshake->lock);
|
||||
state = handshake->state;
|
||||
memcpy(hash, handshake->hash, NOISE_HASH_LEN);
|
||||
memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
|
||||
memcpy(ephemeral_private, handshake->ephemeral_private,
|
||||
NOISE_PUBLIC_KEY_LEN);
|
||||
memcpy(preshared_key, handshake->preshared_key,
|
||||
NOISE_SYMMETRIC_KEY_LEN);
|
||||
up_read(&handshake->lock);
|
||||
|
||||
if (state != HANDSHAKE_CREATED_INITIATION)
|
||||
goto fail;
|
||||
|
||||
/* e */
|
||||
message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
|
||||
|
||||
/* ee */
|
||||
if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
|
||||
goto fail;
|
||||
|
||||
/* se */
|
||||
if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
|
||||
goto fail;
|
||||
|
||||
/* psk */
|
||||
mix_psk(chaining_key, hash, key, preshared_key);
|
||||
|
||||
/* {} */
|
||||
if (!message_decrypt(NULL, src->encrypted_nothing,
|
||||
sizeof(src->encrypted_nothing), key, hash))
|
||||
goto fail;
|
||||
|
||||
/* Success! Copy everything to peer */
|
||||
down_write(&handshake->lock);
|
||||
/* It's important to check that the state is still the same, while we
|
||||
* have an exclusive lock.
|
||||
*/
|
||||
if (handshake->state != state) {
|
||||
up_write(&handshake->lock);
|
||||
goto fail;
|
||||
}
|
||||
memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
|
||||
memcpy(handshake->hash, hash, NOISE_HASH_LEN);
|
||||
memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
|
||||
handshake->remote_index = src->sender_index;
|
||||
handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
|
||||
up_write(&handshake->lock);
|
||||
ret_peer = peer;
|
||||
goto out;
|
||||
|
||||
fail:
|
||||
wg_peer_put(peer);
|
||||
out:
|
||||
memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
|
||||
memzero_explicit(hash, NOISE_HASH_LEN);
|
||||
memzero_explicit(chaining_key, NOISE_HASH_LEN);
|
||||
memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
|
||||
memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
|
||||
memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN);
|
||||
up_read(&wg->static_identity.lock);
|
||||
return ret_peer;
|
||||
}
|
||||
|
||||
bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
|
||||
struct noise_keypairs *keypairs)
|
||||
{
|
||||
struct noise_keypair *new_keypair;
|
||||
bool ret = false;
|
||||
|
||||
down_write(&handshake->lock);
|
||||
if (handshake->state != HANDSHAKE_CREATED_RESPONSE &&
|
||||
handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
|
||||
goto out;
|
||||
|
||||
new_keypair = keypair_create(handshake->entry.peer);
|
||||
if (!new_keypair)
|
||||
goto out;
|
||||
new_keypair->i_am_the_initiator = handshake->state ==
|
||||
HANDSHAKE_CONSUMED_RESPONSE;
|
||||
new_keypair->remote_index = handshake->remote_index;
|
||||
|
||||
if (new_keypair->i_am_the_initiator)
|
||||
derive_keys(&new_keypair->sending, &new_keypair->receiving,
|
||||
handshake->chaining_key);
|
||||
else
|
||||
derive_keys(&new_keypair->receiving, &new_keypair->sending,
|
||||
handshake->chaining_key);
|
||||
|
||||
handshake_zero(handshake);
|
||||
rcu_read_lock_bh();
|
||||
if (likely(!READ_ONCE(container_of(handshake, struct wg_peer,
|
||||
handshake)->is_dead))) {
|
||||
add_new_keypair(keypairs, new_keypair);
|
||||
net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n",
|
||||
handshake->entry.peer->device->dev->name,
|
||||
new_keypair->internal_id,
|
||||
handshake->entry.peer->internal_id);
|
||||
ret = wg_index_hashtable_replace(
|
||||
handshake->entry.peer->device->index_hashtable,
|
||||
&handshake->entry, &new_keypair->entry);
|
||||
} else {
|
||||
kfree_sensitive(new_keypair);
|
||||
}
|
||||
rcu_read_unlock_bh();
|
||||
|
||||
out:
|
||||
up_write(&handshake->lock);
|
||||
return ret;
|
||||
}
|
135
net/wireguard/noise.h
Normal file
135
net/wireguard/noise.h
Normal file
@ -0,0 +1,135 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
#ifndef _WG_NOISE_H
|
||||
#define _WG_NOISE_H
|
||||
|
||||
#include "messages.h"
|
||||
#include "peerlookup.h"
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/kref.h>
|
||||
|
||||
struct noise_replay_counter {
|
||||
u64 counter;
|
||||
spinlock_t lock;
|
||||
unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
|
||||
};
|
||||
|
||||
struct noise_symmetric_key {
|
||||
u8 key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
u64 birthdate;
|
||||
bool is_valid;
|
||||
};
|
||||
|
||||
struct noise_keypair {
|
||||
struct index_hashtable_entry entry;
|
||||
struct noise_symmetric_key sending;
|
||||
atomic64_t sending_counter;
|
||||
struct noise_symmetric_key receiving;
|
||||
struct noise_replay_counter receiving_counter;
|
||||
__le32 remote_index;
|
||||
bool i_am_the_initiator;
|
||||
struct kref refcount;
|
||||
struct rcu_head rcu;
|
||||
u64 internal_id;
|
||||
};
|
||||
|
||||
struct noise_keypairs {
|
||||
struct noise_keypair __rcu *current_keypair;
|
||||
struct noise_keypair __rcu *previous_keypair;
|
||||
struct noise_keypair __rcu *next_keypair;
|
||||
spinlock_t keypair_update_lock;
|
||||
};
|
||||
|
||||
struct noise_static_identity {
|
||||
u8 static_public[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 static_private[NOISE_PUBLIC_KEY_LEN];
|
||||
struct rw_semaphore lock;
|
||||
bool has_identity;
|
||||
};
|
||||
|
||||
enum noise_handshake_state {
|
||||
HANDSHAKE_ZEROED,
|
||||
HANDSHAKE_CREATED_INITIATION,
|
||||
HANDSHAKE_CONSUMED_INITIATION,
|
||||
HANDSHAKE_CREATED_RESPONSE,
|
||||
HANDSHAKE_CONSUMED_RESPONSE
|
||||
};
|
||||
|
||||
struct noise_handshake {
|
||||
struct index_hashtable_entry entry;
|
||||
|
||||
enum noise_handshake_state state;
|
||||
u64 last_initiation_consumption;
|
||||
|
||||
struct noise_static_identity *static_identity;
|
||||
|
||||
u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 remote_static[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
|
||||
u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
|
||||
|
||||
u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
|
||||
|
||||
u8 hash[NOISE_HASH_LEN];
|
||||
u8 chaining_key[NOISE_HASH_LEN];
|
||||
|
||||
u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
|
||||
__le32 remote_index;
|
||||
|
||||
/* Protects all members except the immutable (after noise_handshake_
|
||||
* init): remote_static, precomputed_static_static, static_identity.
|
||||
*/
|
||||
struct rw_semaphore lock;
|
||||
};
|
||||
|
||||
struct wg_device;
|
||||
|
||||
void wg_noise_init(void);
|
||||
void wg_noise_handshake_init(struct noise_handshake *handshake,
|
||||
struct noise_static_identity *static_identity,
|
||||
const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
|
||||
struct wg_peer *peer);
|
||||
void wg_noise_handshake_clear(struct noise_handshake *handshake);
|
||||
static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
|
||||
{
|
||||
atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() -
|
||||
(u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC);
|
||||
}
|
||||
|
||||
void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now);
|
||||
struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair);
|
||||
void wg_noise_keypairs_clear(struct noise_keypairs *keypairs);
|
||||
bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
|
||||
struct noise_keypair *received_keypair);
|
||||
void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
|
||||
|
||||
void wg_noise_set_static_identity_private_key(
|
||||
struct noise_static_identity *static_identity,
|
||||
const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
|
||||
void wg_noise_precompute_static_static(struct wg_peer *peer);
|
||||
|
||||
bool
|
||||
wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
|
||||
struct noise_handshake *handshake);
|
||||
struct wg_peer *
|
||||
wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
|
||||
struct wg_device *wg);
|
||||
|
||||
bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
|
||||
struct noise_handshake *handshake);
|
||||
struct wg_peer *
|
||||
wg_noise_handshake_consume_response(struct message_handshake_response *src,
|
||||
struct wg_device *wg);
|
||||
|
||||
bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
|
||||
struct noise_keypairs *keypairs);
|
||||
|
||||
#endif /* _WG_NOISE_H */
|
237
net/wireguard/peer.c
Normal file
237
net/wireguard/peer.c
Normal file
@ -0,0 +1,237 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "peer.h"
|
||||
#include "device.h"
|
||||
#include "queueing.h"
|
||||
#include "timers.h"
|
||||
#include "peerlookup.h"
|
||||
#include "noise.h"
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
static atomic64_t peer_counter = ATOMIC64_INIT(0);
|
||||
|
||||
struct wg_peer *wg_peer_create(struct wg_device *wg,
|
||||
const u8 public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
|
||||
{
|
||||
struct wg_peer *peer;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
lockdep_assert_held(&wg->device_update_lock);
|
||||
|
||||
if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
peer = kzalloc(sizeof(*peer), GFP_KERNEL);
|
||||
if (unlikely(!peer))
|
||||
return ERR_PTR(ret);
|
||||
peer->device = wg;
|
||||
|
||||
wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
|
||||
public_key, preshared_key, peer);
|
||||
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
|
||||
goto err_1;
|
||||
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
|
||||
MAX_QUEUED_PACKETS))
|
||||
goto err_2;
|
||||
if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
|
||||
MAX_QUEUED_PACKETS))
|
||||
goto err_3;
|
||||
|
||||
peer->internal_id = atomic64_inc_return(&peer_counter);
|
||||
peer->serial_work_cpu = nr_cpumask_bits;
|
||||
wg_cookie_init(&peer->latest_cookie);
|
||||
wg_timers_init(peer);
|
||||
wg_cookie_checker_precompute_peer_keys(peer);
|
||||
spin_lock_init(&peer->keypairs.keypair_update_lock);
|
||||
INIT_WORK(&peer->transmit_handshake_work,
|
||||
wg_packet_handshake_send_worker);
|
||||
rwlock_init(&peer->endpoint_lock);
|
||||
kref_init(&peer->refcount);
|
||||
skb_queue_head_init(&peer->staged_packet_queue);
|
||||
wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
|
||||
set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state);
|
||||
netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll,
|
||||
NAPI_POLL_WEIGHT);
|
||||
napi_enable(&peer->napi);
|
||||
list_add_tail(&peer->peer_list, &wg->peer_list);
|
||||
INIT_LIST_HEAD(&peer->allowedips_list);
|
||||
wg_pubkey_hashtable_add(wg->peer_hashtable, peer);
|
||||
++wg->num_peers;
|
||||
pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
|
||||
return peer;
|
||||
|
||||
err_3:
|
||||
wg_packet_queue_free(&peer->tx_queue, false);
|
||||
err_2:
|
||||
dst_cache_destroy(&peer->endpoint_cache);
|
||||
err_1:
|
||||
kfree(peer);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer)
|
||||
{
|
||||
RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
|
||||
"Taking peer reference without holding the RCU read lock");
|
||||
if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
|
||||
return NULL;
|
||||
return peer;
|
||||
}
|
||||
|
||||
static void peer_make_dead(struct wg_peer *peer)
|
||||
{
|
||||
/* Remove from configuration-time lookup structures. */
|
||||
list_del_init(&peer->peer_list);
|
||||
wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer,
|
||||
&peer->device->device_update_lock);
|
||||
wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer);
|
||||
|
||||
/* Mark as dead, so that we don't allow jumping contexts after. */
|
||||
WRITE_ONCE(peer->is_dead, true);
|
||||
|
||||
/* The caller must now synchronize_rcu() for this to take effect. */
|
||||
}
|
||||
|
||||
static void peer_remove_after_dead(struct wg_peer *peer)
|
||||
{
|
||||
WARN_ON(!peer->is_dead);
|
||||
|
||||
/* No more keypairs can be created for this peer, since is_dead protects
|
||||
* add_new_keypair, so we can now destroy existing ones.
|
||||
*/
|
||||
wg_noise_keypairs_clear(&peer->keypairs);
|
||||
|
||||
/* Destroy all ongoing timers that were in-flight at the beginning of
|
||||
* this function.
|
||||
*/
|
||||
wg_timers_stop(peer);
|
||||
|
||||
/* The transition between packet encryption/decryption queues isn't
|
||||
* guarded by is_dead, but each reference's life is strictly bounded by
|
||||
* two generations: once for parallel crypto and once for serial
|
||||
* ingestion, so we can simply flush twice, and be sure that we no
|
||||
* longer have references inside these queues.
|
||||
*/
|
||||
|
||||
/* a) For encrypt/decrypt. */
|
||||
flush_workqueue(peer->device->packet_crypt_wq);
|
||||
/* b.1) For send (but not receive, since that's napi). */
|
||||
flush_workqueue(peer->device->packet_crypt_wq);
|
||||
/* b.2.1) For receive (but not send, since that's wq). */
|
||||
napi_disable(&peer->napi);
|
||||
/* b.2.1) It's now safe to remove the napi struct, which must be done
|
||||
* here from process context.
|
||||
*/
|
||||
netif_napi_del(&peer->napi);
|
||||
|
||||
/* Ensure any workstructs we own (like transmit_handshake_work or
|
||||
* clear_peer_work) no longer are in use.
|
||||
*/
|
||||
flush_workqueue(peer->device->handshake_send_wq);
|
||||
|
||||
/* After the above flushes, a peer might still be active in a few
|
||||
* different contexts: 1) from xmit(), before hitting is_dead and
|
||||
* returning, 2) from wg_packet_consume_data(), before hitting is_dead
|
||||
* and returning, 3) from wg_receive_handshake_packet() after a point
|
||||
* where it has processed an incoming handshake packet, but where
|
||||
* all calls to pass it off to timers fails because of is_dead. We won't
|
||||
* have new references in (1) eventually, because we're removed from
|
||||
* allowedips; we won't have new references in (2) eventually, because
|
||||
* wg_index_hashtable_lookup will always return NULL, since we removed
|
||||
* all existing keypairs and no more can be created; we won't have new
|
||||
* references in (3) eventually, because we're removed from the pubkey
|
||||
* hash table, which allows for a maximum of one handshake response,
|
||||
* via the still-uncleared index hashtable entry, but not more than one,
|
||||
* and in wg_cookie_message_consume, the lookup eventually gets a peer
|
||||
* with a refcount of zero, so no new reference is taken.
|
||||
*/
|
||||
|
||||
--peer->device->num_peers;
|
||||
wg_peer_put(peer);
|
||||
}
|
||||
|
||||
/* We have a separate "remove" function make sure that all active places where
|
||||
* a peer is currently operating will eventually come to an end and not pass
|
||||
* their reference onto another context.
|
||||
*/
|
||||
void wg_peer_remove(struct wg_peer *peer)
|
||||
{
|
||||
if (unlikely(!peer))
|
||||
return;
|
||||
lockdep_assert_held(&peer->device->device_update_lock);
|
||||
|
||||
peer_make_dead(peer);
|
||||
synchronize_rcu();
|
||||
peer_remove_after_dead(peer);
|
||||
}
|
||||
|
||||
void wg_peer_remove_all(struct wg_device *wg)
|
||||
{
|
||||
struct wg_peer *peer, *temp;
|
||||
LIST_HEAD(dead_peers);
|
||||
|
||||
lockdep_assert_held(&wg->device_update_lock);
|
||||
|
||||
/* Avoid having to traverse individually for each one. */
|
||||
wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
|
||||
|
||||
list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
|
||||
peer_make_dead(peer);
|
||||
list_add_tail(&peer->peer_list, &dead_peers);
|
||||
}
|
||||
synchronize_rcu();
|
||||
list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
|
||||
peer_remove_after_dead(peer);
|
||||
}
|
||||
|
||||
static void rcu_release(struct rcu_head *rcu)
|
||||
{
|
||||
struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
|
||||
|
||||
dst_cache_destroy(&peer->endpoint_cache);
|
||||
wg_packet_queue_free(&peer->rx_queue, false);
|
||||
wg_packet_queue_free(&peer->tx_queue, false);
|
||||
|
||||
/* The final zeroing takes care of clearing any remaining handshake key
|
||||
* material and other potentially sensitive information.
|
||||
*/
|
||||
kfree_sensitive(peer);
|
||||
}
|
||||
|
||||
static void kref_release(struct kref *refcount)
|
||||
{
|
||||
struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount);
|
||||
|
||||
pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n",
|
||||
peer->device->dev->name, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
|
||||
/* Remove ourself from dynamic runtime lookup structures, now that the
|
||||
* last reference is gone.
|
||||
*/
|
||||
wg_index_hashtable_remove(peer->device->index_hashtable,
|
||||
&peer->handshake.entry);
|
||||
|
||||
/* Remove any lingering packets that didn't have a chance to be
|
||||
* transmitted.
|
||||
*/
|
||||
wg_packet_purge_staged_packets(peer);
|
||||
|
||||
/* Free the memory used. */
|
||||
call_rcu(&peer->rcu, rcu_release);
|
||||
}
|
||||
|
||||
void wg_peer_put(struct wg_peer *peer)
|
||||
{
|
||||
if (unlikely(!peer))
|
||||
return;
|
||||
kref_put(&peer->refcount, kref_release);
|
||||
}
|
83
net/wireguard/peer.h
Normal file
83
net/wireguard/peer.h
Normal file
@ -0,0 +1,83 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_PEER_H
|
||||
#define _WG_PEER_H
|
||||
|
||||
#include "device.h"
|
||||
#include "noise.h"
|
||||
#include "cookie.h"
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/netfilter.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/kref.h>
|
||||
#include <net/dst_cache.h>
|
||||
|
||||
struct wg_device;
|
||||
|
||||
struct endpoint {
|
||||
union {
|
||||
struct sockaddr addr;
|
||||
struct sockaddr_in addr4;
|
||||
struct sockaddr_in6 addr6;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
struct in_addr src4;
|
||||
/* Essentially the same as addr6->scope_id */
|
||||
int src_if4;
|
||||
};
|
||||
struct in6_addr src6;
|
||||
};
|
||||
};
|
||||
|
||||
struct wg_peer {
|
||||
struct wg_device *device;
|
||||
struct crypt_queue tx_queue, rx_queue;
|
||||
struct sk_buff_head staged_packet_queue;
|
||||
int serial_work_cpu;
|
||||
struct noise_keypairs keypairs;
|
||||
struct endpoint endpoint;
|
||||
struct dst_cache endpoint_cache;
|
||||
rwlock_t endpoint_lock;
|
||||
struct noise_handshake handshake;
|
||||
atomic64_t last_sent_handshake;
|
||||
struct work_struct transmit_handshake_work, clear_peer_work;
|
||||
struct cookie latest_cookie;
|
||||
struct hlist_node pubkey_hash;
|
||||
u64 rx_bytes, tx_bytes;
|
||||
struct timer_list timer_retransmit_handshake, timer_send_keepalive;
|
||||
struct timer_list timer_new_handshake, timer_zero_key_material;
|
||||
struct timer_list timer_persistent_keepalive;
|
||||
unsigned int timer_handshake_attempts;
|
||||
u16 persistent_keepalive_interval;
|
||||
bool timer_need_another_keepalive;
|
||||
bool sent_lastminute_handshake;
|
||||
struct timespec64 walltime_last_handshake;
|
||||
struct kref refcount;
|
||||
struct rcu_head rcu;
|
||||
struct list_head peer_list;
|
||||
struct list_head allowedips_list;
|
||||
u64 internal_id;
|
||||
struct napi_struct napi;
|
||||
bool is_dead;
|
||||
};
|
||||
|
||||
struct wg_peer *wg_peer_create(struct wg_device *wg,
|
||||
const u8 public_key[NOISE_PUBLIC_KEY_LEN],
|
||||
const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
|
||||
|
||||
struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer);
|
||||
static inline struct wg_peer *wg_peer_get(struct wg_peer *peer)
|
||||
{
|
||||
kref_get(&peer->refcount);
|
||||
return peer;
|
||||
}
|
||||
void wg_peer_put(struct wg_peer *peer);
|
||||
void wg_peer_remove(struct wg_peer *peer);
|
||||
void wg_peer_remove_all(struct wg_device *wg);
|
||||
|
||||
#endif /* _WG_PEER_H */
|
226
net/wireguard/peerlookup.c
Normal file
226
net/wireguard/peerlookup.c
Normal file
@ -0,0 +1,226 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "peerlookup.h"
|
||||
#include "peer.h"
|
||||
#include "noise.h"
|
||||
|
||||
static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table,
|
||||
const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
|
||||
{
|
||||
/* siphash gives us a secure 64bit number based on a random key. Since
|
||||
* the bits are uniformly distributed, we can then mask off to get the
|
||||
* bits we need.
|
||||
*/
|
||||
const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key);
|
||||
|
||||
return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)];
|
||||
}
|
||||
|
||||
struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void)
|
||||
{
|
||||
struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
|
||||
|
||||
if (!table)
|
||||
return NULL;
|
||||
|
||||
get_random_bytes(&table->key, sizeof(table->key));
|
||||
hash_init(table->hashtable);
|
||||
mutex_init(&table->lock);
|
||||
return table;
|
||||
}
|
||||
|
||||
void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
|
||||
struct wg_peer *peer)
|
||||
{
|
||||
mutex_lock(&table->lock);
|
||||
hlist_add_head_rcu(&peer->pubkey_hash,
|
||||
pubkey_bucket(table, peer->handshake.remote_static));
|
||||
mutex_unlock(&table->lock);
|
||||
}
|
||||
|
||||
void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
|
||||
struct wg_peer *peer)
|
||||
{
|
||||
mutex_lock(&table->lock);
|
||||
hlist_del_init_rcu(&peer->pubkey_hash);
|
||||
mutex_unlock(&table->lock);
|
||||
}
|
||||
|
||||
/* Returns a strong reference to a peer */
|
||||
struct wg_peer *
|
||||
wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
|
||||
const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
|
||||
{
|
||||
struct wg_peer *iter_peer, *peer = NULL;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey),
|
||||
pubkey_hash) {
|
||||
if (!memcmp(pubkey, iter_peer->handshake.remote_static,
|
||||
NOISE_PUBLIC_KEY_LEN)) {
|
||||
peer = iter_peer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
peer = wg_peer_get_maybe_zero(peer);
|
||||
rcu_read_unlock_bh();
|
||||
return peer;
|
||||
}
|
||||
|
||||
static struct hlist_head *index_bucket(struct index_hashtable *table,
|
||||
const __le32 index)
|
||||
{
|
||||
/* Since the indices are random and thus all bits are uniformly
|
||||
* distributed, we can find its bucket simply by masking.
|
||||
*/
|
||||
return &table->hashtable[(__force u32)index &
|
||||
(HASH_SIZE(table->hashtable) - 1)];
|
||||
}
|
||||
|
||||
struct index_hashtable *wg_index_hashtable_alloc(void)
|
||||
{
|
||||
struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
|
||||
|
||||
if (!table)
|
||||
return NULL;
|
||||
|
||||
hash_init(table->hashtable);
|
||||
spin_lock_init(&table->lock);
|
||||
return table;
|
||||
}
|
||||
|
||||
/* At the moment, we limit ourselves to 2^20 total peers, which generally might
|
||||
* amount to 2^20*3 items in this hashtable. The algorithm below works by
|
||||
* picking a random number and testing it. We can see that these limits mean we
|
||||
* usually succeed pretty quickly:
|
||||
*
|
||||
* >>> def calculation(tries, size):
|
||||
* ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32))
|
||||
* ...
|
||||
* >>> calculation(1, 2**20 * 3)
|
||||
* 0.999267578125
|
||||
* >>> calculation(2, 2**20 * 3)
|
||||
* 0.0007318854331970215
|
||||
* >>> calculation(3, 2**20 * 3)
|
||||
* 5.360489012673497e-07
|
||||
* >>> calculation(4, 2**20 * 3)
|
||||
* 3.9261394135792216e-10
|
||||
*
|
||||
* At the moment, we don't do any masking, so this algorithm isn't exactly
|
||||
* constant time in either the random guessing or in the hash list lookup. We
|
||||
* could require a minimum of 3 tries, which would successfully mask the
|
||||
* guessing. this would not, however, help with the growing hash lengths, which
|
||||
* is another thing to consider moving forward.
|
||||
*/
|
||||
|
||||
__le32 wg_index_hashtable_insert(struct index_hashtable *table,
|
||||
struct index_hashtable_entry *entry)
|
||||
{
|
||||
struct index_hashtable_entry *existing_entry;
|
||||
|
||||
spin_lock_bh(&table->lock);
|
||||
hlist_del_init_rcu(&entry->index_hash);
|
||||
spin_unlock_bh(&table->lock);
|
||||
|
||||
rcu_read_lock_bh();
|
||||
|
||||
search_unused_slot:
|
||||
/* First we try to find an unused slot, randomly, while unlocked. */
|
||||
entry->index = (__force __le32)get_random_u32();
|
||||
hlist_for_each_entry_rcu_bh(existing_entry,
|
||||
index_bucket(table, entry->index),
|
||||
index_hash) {
|
||||
if (existing_entry->index == entry->index)
|
||||
/* If it's already in use, we continue searching. */
|
||||
goto search_unused_slot;
|
||||
}
|
||||
|
||||
/* Once we've found an unused slot, we lock it, and then double-check
|
||||
* that nobody else stole it from us.
|
||||
*/
|
||||
spin_lock_bh(&table->lock);
|
||||
hlist_for_each_entry_rcu_bh(existing_entry,
|
||||
index_bucket(table, entry->index),
|
||||
index_hash) {
|
||||
if (existing_entry->index == entry->index) {
|
||||
spin_unlock_bh(&table->lock);
|
||||
/* If it was stolen, we start over. */
|
||||
goto search_unused_slot;
|
||||
}
|
||||
}
|
||||
/* Otherwise, we know we have it exclusively (since we're locked),
|
||||
* so we insert.
|
||||
*/
|
||||
hlist_add_head_rcu(&entry->index_hash,
|
||||
index_bucket(table, entry->index));
|
||||
spin_unlock_bh(&table->lock);
|
||||
|
||||
rcu_read_unlock_bh();
|
||||
|
||||
return entry->index;
|
||||
}
|
||||
|
||||
bool wg_index_hashtable_replace(struct index_hashtable *table,
|
||||
struct index_hashtable_entry *old,
|
||||
struct index_hashtable_entry *new)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
spin_lock_bh(&table->lock);
|
||||
ret = !hlist_unhashed(&old->index_hash);
|
||||
if (unlikely(!ret))
|
||||
goto out;
|
||||
|
||||
new->index = old->index;
|
||||
hlist_replace_rcu(&old->index_hash, &new->index_hash);
|
||||
|
||||
/* Calling init here NULLs out index_hash, and in fact after this
|
||||
* function returns, it's theoretically possible for this to get
|
||||
* reinserted elsewhere. That means the RCU lookup below might either
|
||||
* terminate early or jump between buckets, in which case the packet
|
||||
* simply gets dropped, which isn't terrible.
|
||||
*/
|
||||
INIT_HLIST_NODE(&old->index_hash);
|
||||
out:
|
||||
spin_unlock_bh(&table->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void wg_index_hashtable_remove(struct index_hashtable *table,
|
||||
struct index_hashtable_entry *entry)
|
||||
{
|
||||
spin_lock_bh(&table->lock);
|
||||
hlist_del_init_rcu(&entry->index_hash);
|
||||
spin_unlock_bh(&table->lock);
|
||||
}
|
||||
|
||||
/* Returns a strong reference to a entry->peer */
|
||||
struct index_hashtable_entry *
|
||||
wg_index_hashtable_lookup(struct index_hashtable *table,
|
||||
const enum index_hashtable_type type_mask,
|
||||
const __le32 index, struct wg_peer **peer)
|
||||
{
|
||||
struct index_hashtable_entry *iter_entry, *entry = NULL;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index),
|
||||
index_hash) {
|
||||
if (iter_entry->index == index) {
|
||||
if (likely(iter_entry->type & type_mask))
|
||||
entry = iter_entry;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (likely(entry)) {
|
||||
entry->peer = wg_peer_get_maybe_zero(entry->peer);
|
||||
if (likely(entry->peer))
|
||||
*peer = entry->peer;
|
||||
else
|
||||
entry = NULL;
|
||||
}
|
||||
rcu_read_unlock_bh();
|
||||
return entry;
|
||||
}
|
64
net/wireguard/peerlookup.h
Normal file
64
net/wireguard/peerlookup.h
Normal file
@ -0,0 +1,64 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_PEERLOOKUP_H
|
||||
#define _WG_PEERLOOKUP_H
|
||||
|
||||
#include "messages.h"
|
||||
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/siphash.h>
|
||||
|
||||
struct wg_peer;
|
||||
|
||||
struct pubkey_hashtable {
|
||||
/* TODO: move to rhashtable */
|
||||
DECLARE_HASHTABLE(hashtable, 11);
|
||||
siphash_key_t key;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void);
|
||||
void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
|
||||
struct wg_peer *peer);
|
||||
void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
|
||||
struct wg_peer *peer);
|
||||
struct wg_peer *
|
||||
wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
|
||||
const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
|
||||
|
||||
struct index_hashtable {
|
||||
/* TODO: move to rhashtable */
|
||||
DECLARE_HASHTABLE(hashtable, 13);
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
enum index_hashtable_type {
|
||||
INDEX_HASHTABLE_HANDSHAKE = 1U << 0,
|
||||
INDEX_HASHTABLE_KEYPAIR = 1U << 1
|
||||
};
|
||||
|
||||
struct index_hashtable_entry {
|
||||
struct wg_peer *peer;
|
||||
struct hlist_node index_hash;
|
||||
enum index_hashtable_type type;
|
||||
__le32 index;
|
||||
};
|
||||
|
||||
struct index_hashtable *wg_index_hashtable_alloc(void);
|
||||
__le32 wg_index_hashtable_insert(struct index_hashtable *table,
|
||||
struct index_hashtable_entry *entry);
|
||||
bool wg_index_hashtable_replace(struct index_hashtable *table,
|
||||
struct index_hashtable_entry *old,
|
||||
struct index_hashtable_entry *new);
|
||||
void wg_index_hashtable_remove(struct index_hashtable *table,
|
||||
struct index_hashtable_entry *entry);
|
||||
struct index_hashtable_entry *
|
||||
wg_index_hashtable_lookup(struct index_hashtable *table,
|
||||
const enum index_hashtable_type type_mask,
|
||||
const __le32 index, struct wg_peer **peer);
|
||||
|
||||
#endif /* _WG_PEERLOOKUP_H */
|
55
net/wireguard/queueing.c
Normal file
55
net/wireguard/queueing.c
Normal file
@ -0,0 +1,55 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "queueing.h"
|
||||
|
||||
struct multicore_worker __percpu *
|
||||
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
|
||||
{
|
||||
int cpu;
|
||||
struct multicore_worker __percpu *worker =
|
||||
alloc_percpu(struct multicore_worker);
|
||||
|
||||
if (!worker)
|
||||
return NULL;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
per_cpu_ptr(worker, cpu)->ptr = ptr;
|
||||
INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function);
|
||||
}
|
||||
return worker;
|
||||
}
|
||||
|
||||
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
|
||||
bool multicore, unsigned int len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
memset(queue, 0, sizeof(*queue));
|
||||
ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (function) {
|
||||
if (multicore) {
|
||||
queue->worker = wg_packet_percpu_multicore_worker_alloc(
|
||||
function, queue);
|
||||
if (!queue->worker) {
|
||||
ptr_ring_cleanup(&queue->ring, NULL);
|
||||
return -ENOMEM;
|
||||
}
|
||||
} else {
|
||||
INIT_WORK(&queue->work, function);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
|
||||
{
|
||||
if (multicore)
|
||||
free_percpu(queue->worker);
|
||||
WARN_ON(!__ptr_ring_empty(&queue->ring));
|
||||
ptr_ring_cleanup(&queue->ring, NULL);
|
||||
}
|
196
net/wireguard/queueing.h
Normal file
196
net/wireguard/queueing.h
Normal file
@ -0,0 +1,196 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_QUEUEING_H
|
||||
#define _WG_QUEUEING_H
|
||||
|
||||
#include "peer.h"
|
||||
#include <linux/types.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <net/ip_tunnels.h>
|
||||
|
||||
struct wg_device;
|
||||
struct wg_peer;
|
||||
struct multicore_worker;
|
||||
struct crypt_queue;
|
||||
struct sk_buff;
|
||||
|
||||
/* queueing.c APIs: */
|
||||
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
|
||||
bool multicore, unsigned int len);
|
||||
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
|
||||
struct multicore_worker __percpu *
|
||||
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
|
||||
|
||||
/* receive.c APIs: */
|
||||
void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb);
|
||||
void wg_packet_handshake_receive_worker(struct work_struct *work);
|
||||
/* NAPI poll function: */
|
||||
int wg_packet_rx_poll(struct napi_struct *napi, int budget);
|
||||
/* Workqueue worker: */
|
||||
void wg_packet_decrypt_worker(struct work_struct *work);
|
||||
|
||||
/* send.c APIs: */
|
||||
void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
|
||||
bool is_retry);
|
||||
void wg_packet_send_handshake_response(struct wg_peer *peer);
|
||||
void wg_packet_send_handshake_cookie(struct wg_device *wg,
|
||||
struct sk_buff *initiating_skb,
|
||||
__le32 sender_index);
|
||||
void wg_packet_send_keepalive(struct wg_peer *peer);
|
||||
void wg_packet_purge_staged_packets(struct wg_peer *peer);
|
||||
void wg_packet_send_staged_packets(struct wg_peer *peer);
|
||||
/* Workqueue workers: */
|
||||
void wg_packet_handshake_send_worker(struct work_struct *work);
|
||||
void wg_packet_tx_worker(struct work_struct *work);
|
||||
void wg_packet_encrypt_worker(struct work_struct *work);
|
||||
|
||||
enum packet_state {
|
||||
PACKET_STATE_UNCRYPTED,
|
||||
PACKET_STATE_CRYPTED,
|
||||
PACKET_STATE_DEAD
|
||||
};
|
||||
|
||||
struct packet_cb {
|
||||
u64 nonce;
|
||||
struct noise_keypair *keypair;
|
||||
atomic_t state;
|
||||
u32 mtu;
|
||||
u8 ds;
|
||||
};
|
||||
|
||||
#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
|
||||
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
|
||||
|
||||
static inline bool wg_check_packet_protocol(struct sk_buff *skb)
|
||||
{
|
||||
__be16 real_protocol = ip_tunnel_parse_protocol(skb);
|
||||
return real_protocol && skb->protocol == real_protocol;
|
||||
}
|
||||
|
||||
static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating)
|
||||
{
|
||||
const int pfmemalloc = skb->pfmemalloc;
|
||||
u32 hash = skb->hash;
|
||||
u8 l4_hash = skb->l4_hash;
|
||||
u8 sw_hash = skb->sw_hash;
|
||||
|
||||
skb_scrub_packet(skb, true);
|
||||
memset(&skb->headers_start, 0,
|
||||
offsetof(struct sk_buff, headers_end) -
|
||||
offsetof(struct sk_buff, headers_start));
|
||||
skb->pfmemalloc = pfmemalloc;
|
||||
if (encapsulating) {
|
||||
skb->hash = hash;
|
||||
skb->l4_hash = l4_hash;
|
||||
skb->sw_hash = sw_hash;
|
||||
}
|
||||
skb->queue_mapping = 0;
|
||||
skb->nohdr = 0;
|
||||
skb->peeked = 0;
|
||||
skb->mac_len = 0;
|
||||
skb->dev = NULL;
|
||||
#ifdef CONFIG_NET_SCHED
|
||||
skb->tc_index = 0;
|
||||
#endif
|
||||
skb_reset_redirect(skb);
|
||||
skb->hdr_len = skb_headroom(skb);
|
||||
skb_reset_mac_header(skb);
|
||||
skb_reset_network_header(skb);
|
||||
skb_reset_transport_header(skb);
|
||||
skb_probe_transport_header(skb);
|
||||
skb_reset_inner_headers(skb);
|
||||
}
|
||||
|
||||
static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
|
||||
{
|
||||
unsigned int cpu = *stored_cpu, cpu_index, i;
|
||||
|
||||
if (unlikely(cpu == nr_cpumask_bits ||
|
||||
!cpumask_test_cpu(cpu, cpu_online_mask))) {
|
||||
cpu_index = id % cpumask_weight(cpu_online_mask);
|
||||
cpu = cpumask_first(cpu_online_mask);
|
||||
for (i = 0; i < cpu_index; ++i)
|
||||
cpu = cpumask_next(cpu, cpu_online_mask);
|
||||
*stored_cpu = cpu;
|
||||
}
|
||||
return cpu;
|
||||
}
|
||||
|
||||
/* This function is racy, in the sense that next is unlocked, so it could return
|
||||
* the same CPU twice. A race-free version of this would be to instead store an
|
||||
* atomic sequence number, do an increment-and-return, and then iterate through
|
||||
* every possible CPU until we get to that index -- choose_cpu. However that's
|
||||
* a bit slower, and it doesn't seem like this potential race actually
|
||||
* introduces any performance loss, so we live with it.
|
||||
*/
|
||||
static inline int wg_cpumask_next_online(int *next)
|
||||
{
|
||||
int cpu = *next;
|
||||
|
||||
while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
|
||||
cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
|
||||
*next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
|
||||
return cpu;
|
||||
}
|
||||
|
||||
static inline int wg_queue_enqueue_per_device_and_peer(
|
||||
struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
|
||||
struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
|
||||
/* We first queue this up for the peer ingestion, but the consumer
|
||||
* will wait for the state to change to CRYPTED or DEAD before.
|
||||
*/
|
||||
if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
|
||||
return -ENOSPC;
|
||||
/* Then we queue it up in the device queue, which consumes the
|
||||
* packet as soon as it can.
|
||||
*/
|
||||
cpu = wg_cpumask_next_online(next_cpu);
|
||||
if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
|
||||
return -EPIPE;
|
||||
queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
|
||||
struct sk_buff *skb,
|
||||
enum packet_state state)
|
||||
{
|
||||
/* We take a reference, because as soon as we call atomic_set, the
|
||||
* peer can be freed from below us.
|
||||
*/
|
||||
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
|
||||
|
||||
atomic_set_release(&PACKET_CB(skb)->state, state);
|
||||
queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
|
||||
peer->internal_id),
|
||||
peer->device->packet_crypt_wq, &queue->work);
|
||||
wg_peer_put(peer);
|
||||
}
|
||||
|
||||
static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
|
||||
enum packet_state state)
|
||||
{
|
||||
/* We take a reference, because as soon as we call atomic_set, the
|
||||
* peer can be freed from below us.
|
||||
*/
|
||||
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
|
||||
|
||||
atomic_set_release(&PACKET_CB(skb)->state, state);
|
||||
napi_schedule(&peer->napi);
|
||||
wg_peer_put(peer);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
bool wg_packet_counter_selftest(void);
|
||||
#endif
|
||||
|
||||
#endif /* _WG_QUEUEING_H */
|
235
net/wireguard/ratelimiter.c
Normal file
235
net/wireguard/ratelimiter.c
Normal file
@ -0,0 +1,235 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifdef COMPAT_CANNOT_DEPRECIATE_BH_RCU
|
||||
/* We normally alias all non-_bh functions to the _bh ones in the compat layer,
|
||||
* but that's not appropriate here, where we actually do want non-_bh ones.
|
||||
*/
|
||||
#undef synchronize_rcu
|
||||
#define synchronize_rcu old_synchronize_rcu
|
||||
#undef call_rcu
|
||||
#define call_rcu old_call_rcu
|
||||
#undef rcu_barrier
|
||||
#define rcu_barrier old_rcu_barrier
|
||||
#endif
|
||||
|
||||
#include "ratelimiter.h"
|
||||
#include <linux/siphash.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <net/ip.h>
|
||||
|
||||
static struct kmem_cache *entry_cache;
|
||||
static hsiphash_key_t key;
|
||||
static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
|
||||
static DEFINE_MUTEX(init_lock);
|
||||
static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
|
||||
static atomic_t total_entries = ATOMIC_INIT(0);
|
||||
static unsigned int max_entries, table_size;
|
||||
static void wg_ratelimiter_gc_entries(struct work_struct *);
|
||||
static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
|
||||
static struct hlist_head *table_v4;
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static struct hlist_head *table_v6;
|
||||
#endif
|
||||
|
||||
struct ratelimiter_entry {
|
||||
u64 last_time_ns, tokens, ip;
|
||||
void *net;
|
||||
spinlock_t lock;
|
||||
struct hlist_node hash;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
enum {
|
||||
PACKETS_PER_SECOND = 20,
|
||||
PACKETS_BURSTABLE = 5,
|
||||
PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
|
||||
TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
|
||||
};
|
||||
|
||||
static void entry_free(struct rcu_head *rcu)
|
||||
{
|
||||
kmem_cache_free(entry_cache,
|
||||
container_of(rcu, struct ratelimiter_entry, rcu));
|
||||
atomic_dec(&total_entries);
|
||||
}
|
||||
|
||||
static void entry_uninit(struct ratelimiter_entry *entry)
|
||||
{
|
||||
hlist_del_rcu(&entry->hash);
|
||||
call_rcu(&entry->rcu, entry_free);
|
||||
}
|
||||
|
||||
/* Calling this function with a NULL work uninits all entries. */
|
||||
static void wg_ratelimiter_gc_entries(struct work_struct *work)
|
||||
{
|
||||
const u64 now = ktime_get_coarse_boottime_ns();
|
||||
struct ratelimiter_entry *entry;
|
||||
struct hlist_node *temp;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < table_size; ++i) {
|
||||
spin_lock(&table_lock);
|
||||
hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
|
||||
if (unlikely(!work) ||
|
||||
now - entry->last_time_ns > NSEC_PER_SEC)
|
||||
entry_uninit(entry);
|
||||
}
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
|
||||
if (unlikely(!work) ||
|
||||
now - entry->last_time_ns > NSEC_PER_SEC)
|
||||
entry_uninit(entry);
|
||||
}
|
||||
#endif
|
||||
spin_unlock(&table_lock);
|
||||
if (likely(work))
|
||||
cond_resched();
|
||||
}
|
||||
if (likely(work))
|
||||
queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
|
||||
}
|
||||
|
||||
bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
|
||||
{
|
||||
/* We only take the bottom half of the net pointer, so that we can hash
|
||||
* 3 words in the end. This way, siphash's len param fits into the final
|
||||
* u32, and we don't incur an extra round.
|
||||
*/
|
||||
const u32 net_word = (unsigned long)net;
|
||||
struct ratelimiter_entry *entry;
|
||||
struct hlist_head *bucket;
|
||||
u64 ip;
|
||||
|
||||
if (skb->protocol == htons(ETH_P_IP)) {
|
||||
ip = (u64 __force)ip_hdr(skb)->saddr;
|
||||
bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
|
||||
(table_size - 1)];
|
||||
}
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
else if (skb->protocol == htons(ETH_P_IPV6)) {
|
||||
/* Only use 64 bits, so as to ratelimit the whole /64. */
|
||||
memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
|
||||
bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
|
||||
(table_size - 1)];
|
||||
}
|
||||
#endif
|
||||
else
|
||||
return false;
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(entry, bucket, hash) {
|
||||
if (entry->net == net && entry->ip == ip) {
|
||||
u64 now, tokens;
|
||||
bool ret;
|
||||
/* Quasi-inspired by nft_limit.c, but this is actually a
|
||||
* slightly different algorithm. Namely, we incorporate
|
||||
* the burst as part of the maximum tokens, rather than
|
||||
* as part of the rate.
|
||||
*/
|
||||
spin_lock(&entry->lock);
|
||||
now = ktime_get_coarse_boottime_ns();
|
||||
tokens = min_t(u64, TOKEN_MAX,
|
||||
entry->tokens + now -
|
||||
entry->last_time_ns);
|
||||
entry->last_time_ns = now;
|
||||
ret = tokens >= PACKET_COST;
|
||||
entry->tokens = ret ? tokens - PACKET_COST : tokens;
|
||||
spin_unlock(&entry->lock);
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (atomic_inc_return(&total_entries) > max_entries)
|
||||
goto err_oom;
|
||||
|
||||
entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
|
||||
if (unlikely(!entry))
|
||||
goto err_oom;
|
||||
|
||||
entry->net = net;
|
||||
entry->ip = ip;
|
||||
INIT_HLIST_NODE(&entry->hash);
|
||||
spin_lock_init(&entry->lock);
|
||||
entry->last_time_ns = ktime_get_coarse_boottime_ns();
|
||||
entry->tokens = TOKEN_MAX - PACKET_COST;
|
||||
spin_lock(&table_lock);
|
||||
hlist_add_head_rcu(&entry->hash, bucket);
|
||||
spin_unlock(&table_lock);
|
||||
return true;
|
||||
|
||||
err_oom:
|
||||
atomic_dec(&total_entries);
|
||||
return false;
|
||||
}
|
||||
|
||||
int wg_ratelimiter_init(void)
|
||||
{
|
||||
mutex_lock(&init_lock);
|
||||
if (++init_refcnt != 1)
|
||||
goto out;
|
||||
|
||||
entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
|
||||
if (!entry_cache)
|
||||
goto err;
|
||||
|
||||
/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
|
||||
* but what it shares in common is that it uses a massive hashtable. So,
|
||||
* we borrow their wisdom about good table sizes on different systems
|
||||
* dependent on RAM. This calculation here comes from there.
|
||||
*/
|
||||
table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
|
||||
max_t(unsigned long, 16, roundup_pow_of_two(
|
||||
(totalram_pages() << PAGE_SHIFT) /
|
||||
(1U << 14) / sizeof(struct hlist_head)));
|
||||
max_entries = table_size * 8;
|
||||
|
||||
table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
|
||||
if (unlikely(!table_v4))
|
||||
goto err_kmemcache;
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
|
||||
if (unlikely(!table_v6)) {
|
||||
kvfree(table_v4);
|
||||
goto err_kmemcache;
|
||||
}
|
||||
#endif
|
||||
|
||||
queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
|
||||
get_random_bytes(&key, sizeof(key));
|
||||
out:
|
||||
mutex_unlock(&init_lock);
|
||||
return 0;
|
||||
|
||||
err_kmemcache:
|
||||
kmem_cache_destroy(entry_cache);
|
||||
err:
|
||||
--init_refcnt;
|
||||
mutex_unlock(&init_lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void wg_ratelimiter_uninit(void)
|
||||
{
|
||||
mutex_lock(&init_lock);
|
||||
if (!init_refcnt || --init_refcnt)
|
||||
goto out;
|
||||
|
||||
cancel_delayed_work_sync(&gc_work);
|
||||
wg_ratelimiter_gc_entries(NULL);
|
||||
rcu_barrier();
|
||||
kvfree(table_v4);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
kvfree(table_v6);
|
||||
#endif
|
||||
kmem_cache_destroy(entry_cache);
|
||||
out:
|
||||
mutex_unlock(&init_lock);
|
||||
}
|
||||
|
||||
#include "selftest/ratelimiter.c"
|
19
net/wireguard/ratelimiter.h
Normal file
19
net/wireguard/ratelimiter.h
Normal file
@ -0,0 +1,19 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifndef _WG_RATELIMITER_H
|
||||
#define _WG_RATELIMITER_H
|
||||
|
||||
#include <linux/skbuff.h>
|
||||
|
||||
int wg_ratelimiter_init(void);
|
||||
void wg_ratelimiter_uninit(void);
|
||||
bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net);
|
||||
|
||||
#ifdef DEBUG
|
||||
bool wg_ratelimiter_selftest(void);
|
||||
#endif
|
||||
|
||||
#endif /* _WG_RATELIMITER_H */
|
599
net/wireguard/receive.c
Normal file
599
net/wireguard/receive.c
Normal file
@ -0,0 +1,599 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "queueing.h"
|
||||
#include "device.h"
|
||||
#include "peer.h"
|
||||
#include "timers.h"
|
||||
#include "messages.h"
|
||||
#include "cookie.h"
|
||||
#include "socket.h"
|
||||
|
||||
#include <linux/simd.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/udp.h>
|
||||
#include <net/ip_tunnels.h>
|
||||
|
||||
/* Must be called with bh disabled. */
|
||||
static void update_rx_stats(struct wg_peer *peer, size_t len)
|
||||
{
|
||||
struct pcpu_sw_netstats *tstats =
|
||||
get_cpu_ptr(peer->device->dev->tstats);
|
||||
|
||||
u64_stats_update_begin(&tstats->syncp);
|
||||
++tstats->rx_packets;
|
||||
tstats->rx_bytes += len;
|
||||
peer->rx_bytes += len;
|
||||
u64_stats_update_end(&tstats->syncp);
|
||||
put_cpu_ptr(tstats);
|
||||
}
|
||||
|
||||
#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
|
||||
|
||||
static size_t validate_header_len(struct sk_buff *skb)
|
||||
{
|
||||
if (unlikely(skb->len < sizeof(struct message_header)))
|
||||
return 0;
|
||||
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) &&
|
||||
skb->len >= MESSAGE_MINIMUM_LENGTH)
|
||||
return sizeof(struct message_data);
|
||||
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) &&
|
||||
skb->len == sizeof(struct message_handshake_initiation))
|
||||
return sizeof(struct message_handshake_initiation);
|
||||
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) &&
|
||||
skb->len == sizeof(struct message_handshake_response))
|
||||
return sizeof(struct message_handshake_response);
|
||||
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) &&
|
||||
skb->len == sizeof(struct message_handshake_cookie))
|
||||
return sizeof(struct message_handshake_cookie);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
|
||||
{
|
||||
size_t data_offset, data_len, header_len;
|
||||
struct udphdr *udp;
|
||||
|
||||
if (unlikely(!wg_check_packet_protocol(skb) ||
|
||||
skb_transport_header(skb) < skb->head ||
|
||||
(skb_transport_header(skb) + sizeof(struct udphdr)) >
|
||||
skb_tail_pointer(skb)))
|
||||
return -EINVAL; /* Bogus IP header */
|
||||
udp = udp_hdr(skb);
|
||||
data_offset = (u8 *)udp - skb->data;
|
||||
if (unlikely(data_offset > U16_MAX ||
|
||||
data_offset + sizeof(struct udphdr) > skb->len))
|
||||
/* Packet has offset at impossible location or isn't big enough
|
||||
* to have UDP fields.
|
||||
*/
|
||||
return -EINVAL;
|
||||
data_len = ntohs(udp->len);
|
||||
if (unlikely(data_len < sizeof(struct udphdr) ||
|
||||
data_len > skb->len - data_offset))
|
||||
/* UDP packet is reporting too small of a size or lying about
|
||||
* its size.
|
||||
*/
|
||||
return -EINVAL;
|
||||
data_len -= sizeof(struct udphdr);
|
||||
data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
|
||||
if (unlikely(!pskb_may_pull(skb,
|
||||
data_offset + sizeof(struct message_header)) ||
|
||||
pskb_trim(skb, data_len + data_offset) < 0))
|
||||
return -EINVAL;
|
||||
skb_pull(skb, data_offset);
|
||||
if (unlikely(skb->len != data_len))
|
||||
/* Final len does not agree with calculated len */
|
||||
return -EINVAL;
|
||||
header_len = validate_header_len(skb);
|
||||
if (unlikely(!header_len))
|
||||
return -EINVAL;
|
||||
__skb_push(skb, data_offset);
|
||||
if (unlikely(!pskb_may_pull(skb, data_offset + header_len)))
|
||||
return -EINVAL;
|
||||
__skb_pull(skb, data_offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void wg_receive_handshake_packet(struct wg_device *wg,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
enum cookie_mac_state mac_state;
|
||||
struct wg_peer *peer = NULL;
|
||||
/* This is global, so that our load calculation applies to the whole
|
||||
* system. We don't care about races with it at all.
|
||||
*/
|
||||
static u64 last_under_load;
|
||||
bool packet_needs_cookie;
|
||||
bool under_load;
|
||||
|
||||
if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) {
|
||||
net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n",
|
||||
wg->dev->name, skb);
|
||||
wg_cookie_message_consume(
|
||||
(struct message_handshake_cookie *)skb->data, wg);
|
||||
return;
|
||||
}
|
||||
|
||||
under_load = skb_queue_len(&wg->incoming_handshakes) >=
|
||||
MAX_QUEUED_INCOMING_HANDSHAKES / 8;
|
||||
if (under_load) {
|
||||
last_under_load = ktime_get_coarse_boottime_ns();
|
||||
} else if (last_under_load) {
|
||||
under_load = !wg_birthdate_has_expired(last_under_load, 1);
|
||||
if (!under_load)
|
||||
last_under_load = 0;
|
||||
}
|
||||
mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
|
||||
under_load);
|
||||
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
|
||||
(!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) {
|
||||
packet_needs_cookie = false;
|
||||
} else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) {
|
||||
packet_needs_cookie = true;
|
||||
} else {
|
||||
net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n",
|
||||
wg->dev->name, skb);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (SKB_TYPE_LE32(skb)) {
|
||||
case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): {
|
||||
struct message_handshake_initiation *message =
|
||||
(struct message_handshake_initiation *)skb->data;
|
||||
|
||||
if (packet_needs_cookie) {
|
||||
wg_packet_send_handshake_cookie(wg, skb,
|
||||
message->sender_index);
|
||||
return;
|
||||
}
|
||||
peer = wg_noise_handshake_consume_initiation(message, wg);
|
||||
if (unlikely(!peer)) {
|
||||
net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n",
|
||||
wg->dev->name, skb);
|
||||
return;
|
||||
}
|
||||
wg_socket_set_peer_endpoint_from_skb(peer, skb);
|
||||
net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n",
|
||||
wg->dev->name, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
wg_packet_send_handshake_response(peer);
|
||||
break;
|
||||
}
|
||||
case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): {
|
||||
struct message_handshake_response *message =
|
||||
(struct message_handshake_response *)skb->data;
|
||||
|
||||
if (packet_needs_cookie) {
|
||||
wg_packet_send_handshake_cookie(wg, skb,
|
||||
message->sender_index);
|
||||
return;
|
||||
}
|
||||
peer = wg_noise_handshake_consume_response(message, wg);
|
||||
if (unlikely(!peer)) {
|
||||
net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n",
|
||||
wg->dev->name, skb);
|
||||
return;
|
||||
}
|
||||
wg_socket_set_peer_endpoint_from_skb(peer, skb);
|
||||
net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n",
|
||||
wg->dev->name, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
if (wg_noise_handshake_begin_session(&peer->handshake,
|
||||
&peer->keypairs)) {
|
||||
wg_timers_session_derived(peer);
|
||||
wg_timers_handshake_complete(peer);
|
||||
/* Calling this function will either send any existing
|
||||
* packets in the queue and not send a keepalive, which
|
||||
* is the best case, Or, if there's nothing in the
|
||||
* queue, it will send a keepalive, in order to give
|
||||
* immediate confirmation of the session.
|
||||
*/
|
||||
wg_packet_send_keepalive(peer);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!peer)) {
|
||||
WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
local_bh_disable();
|
||||
update_rx_stats(peer, skb->len);
|
||||
local_bh_enable();
|
||||
|
||||
wg_timers_any_authenticated_packet_received(peer);
|
||||
wg_timers_any_authenticated_packet_traversal(peer);
|
||||
wg_peer_put(peer);
|
||||
}
|
||||
|
||||
void wg_packet_handshake_receive_worker(struct work_struct *work)
|
||||
{
|
||||
struct wg_device *wg = container_of(work, struct multicore_worker,
|
||||
work)->ptr;
|
||||
struct sk_buff *skb;
|
||||
|
||||
while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
|
||||
wg_receive_handshake_packet(wg, skb);
|
||||
dev_kfree_skb(skb);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static void keep_key_fresh(struct wg_peer *peer)
|
||||
{
|
||||
struct noise_keypair *keypair;
|
||||
bool send;
|
||||
|
||||
if (peer->sent_lastminute_handshake)
|
||||
return;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
|
||||
send = keypair && READ_ONCE(keypair->sending.is_valid) &&
|
||||
keypair->i_am_the_initiator &&
|
||||
wg_birthdate_has_expired(keypair->sending.birthdate,
|
||||
REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT);
|
||||
rcu_read_unlock_bh();
|
||||
|
||||
if (unlikely(send)) {
|
||||
peer->sent_lastminute_handshake = true;
|
||||
wg_packet_send_queued_handshake_initiation(peer, false);
|
||||
}
|
||||
}
|
||||
|
||||
static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
struct scatterlist sg[MAX_SKB_FRAGS + 8];
|
||||
struct sk_buff *trailer;
|
||||
unsigned int offset;
|
||||
int num_frags;
|
||||
|
||||
if (unlikely(!keypair))
|
||||
return false;
|
||||
|
||||
if (unlikely(!READ_ONCE(keypair->receiving.is_valid) ||
|
||||
wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) ||
|
||||
keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) {
|
||||
WRITE_ONCE(keypair->receiving.is_valid, false);
|
||||
return false;
|
||||
}
|
||||
|
||||
PACKET_CB(skb)->nonce =
|
||||
le64_to_cpu(((struct message_data *)skb->data)->counter);
|
||||
|
||||
/* We ensure that the network header is part of the packet before we
|
||||
* call skb_cow_data, so that there's no chance that data is removed
|
||||
* from the skb, so that later we can extract the original endpoint.
|
||||
*/
|
||||
offset = skb->data - skb_network_header(skb);
|
||||
skb_push(skb, offset);
|
||||
num_frags = skb_cow_data(skb, 0, &trailer);
|
||||
offset += sizeof(struct message_data);
|
||||
skb_pull(skb, offset);
|
||||
if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
|
||||
return false;
|
||||
|
||||
sg_init_table(sg, num_frags);
|
||||
if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
|
||||
return false;
|
||||
|
||||
if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
|
||||
PACKET_CB(skb)->nonce,
|
||||
keypair->receiving.key,
|
||||
simd_context))
|
||||
return false;
|
||||
|
||||
/* Another ugly situation of pushing and pulling the header so as to
|
||||
* keep endpoint information intact.
|
||||
*/
|
||||
skb_push(skb, offset);
|
||||
if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
|
||||
return false;
|
||||
skb_pull(skb, offset);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
|
||||
static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter)
|
||||
{
|
||||
unsigned long index, index_current, top, i;
|
||||
bool ret = false;
|
||||
|
||||
spin_lock_bh(&counter->lock);
|
||||
|
||||
if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 ||
|
||||
their_counter >= REJECT_AFTER_MESSAGES))
|
||||
goto out;
|
||||
|
||||
++their_counter;
|
||||
|
||||
if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
|
||||
counter->counter))
|
||||
goto out;
|
||||
|
||||
index = their_counter >> ilog2(BITS_PER_LONG);
|
||||
|
||||
if (likely(their_counter > counter->counter)) {
|
||||
index_current = counter->counter >> ilog2(BITS_PER_LONG);
|
||||
top = min_t(unsigned long, index - index_current,
|
||||
COUNTER_BITS_TOTAL / BITS_PER_LONG);
|
||||
for (i = 1; i <= top; ++i)
|
||||
counter->backtrack[(i + index_current) &
|
||||
((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
|
||||
counter->counter = their_counter;
|
||||
}
|
||||
|
||||
index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
|
||||
ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
|
||||
&counter->backtrack[index]);
|
||||
|
||||
out:
|
||||
spin_unlock_bh(&counter->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#include "selftest/counter.c"
|
||||
|
||||
static void wg_packet_consume_data_done(struct wg_peer *peer,
|
||||
struct sk_buff *skb,
|
||||
struct endpoint *endpoint)
|
||||
{
|
||||
struct net_device *dev = peer->device->dev;
|
||||
unsigned int len, len_before_trim;
|
||||
struct wg_peer *routed_peer;
|
||||
|
||||
wg_socket_set_peer_endpoint(peer, endpoint);
|
||||
|
||||
if (unlikely(wg_noise_received_with_keypair(&peer->keypairs,
|
||||
PACKET_CB(skb)->keypair))) {
|
||||
wg_timers_handshake_complete(peer);
|
||||
wg_packet_send_staged_packets(peer);
|
||||
}
|
||||
|
||||
keep_key_fresh(peer);
|
||||
|
||||
wg_timers_any_authenticated_packet_received(peer);
|
||||
wg_timers_any_authenticated_packet_traversal(peer);
|
||||
|
||||
/* A packet with length 0 is a keepalive packet */
|
||||
if (unlikely(!skb->len)) {
|
||||
update_rx_stats(peer, message_data_len(0));
|
||||
net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n",
|
||||
dev->name, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
goto packet_processed;
|
||||
}
|
||||
|
||||
wg_timers_data_received(peer);
|
||||
|
||||
if (unlikely(skb_network_header(skb) < skb->head))
|
||||
goto dishonest_packet_size;
|
||||
if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) &&
|
||||
(ip_hdr(skb)->version == 4 ||
|
||||
(ip_hdr(skb)->version == 6 &&
|
||||
pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
|
||||
goto dishonest_packet_type;
|
||||
|
||||
skb->dev = dev;
|
||||
/* We've already verified the Poly1305 auth tag, which means this packet
|
||||
* was not modified in transit. We can therefore tell the networking
|
||||
* stack that all checksums of every layer of encapsulation have already
|
||||
* been checked "by the hardware" and therefore is unnecessary to check
|
||||
* again in software.
|
||||
*/
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
#ifndef COMPAT_CANNOT_USE_CSUM_LEVEL
|
||||
skb->csum_level = ~0; /* All levels */
|
||||
#endif
|
||||
skb->protocol = ip_tunnel_parse_protocol(skb);
|
||||
if (skb->protocol == htons(ETH_P_IP)) {
|
||||
len = ntohs(ip_hdr(skb)->tot_len);
|
||||
if (unlikely(len < sizeof(struct iphdr)))
|
||||
goto dishonest_packet_size;
|
||||
INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos);
|
||||
} else if (skb->protocol == htons(ETH_P_IPV6)) {
|
||||
len = ntohs(ipv6_hdr(skb)->payload_len) +
|
||||
sizeof(struct ipv6hdr);
|
||||
INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb)));
|
||||
} else {
|
||||
goto dishonest_packet_type;
|
||||
}
|
||||
|
||||
if (unlikely(len > skb->len))
|
||||
goto dishonest_packet_size;
|
||||
len_before_trim = skb->len;
|
||||
if (unlikely(pskb_trim(skb, len)))
|
||||
goto packet_processed;
|
||||
|
||||
routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips,
|
||||
skb);
|
||||
wg_peer_put(routed_peer); /* We don't need the extra reference. */
|
||||
|
||||
if (unlikely(routed_peer != peer))
|
||||
goto dishonest_packet_peer;
|
||||
|
||||
napi_gro_receive(&peer->napi, skb);
|
||||
update_rx_stats(peer, message_data_len(len_before_trim));
|
||||
return;
|
||||
|
||||
dishonest_packet_peer:
|
||||
net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
|
||||
dev->name, skb, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
++dev->stats.rx_errors;
|
||||
++dev->stats.rx_frame_errors;
|
||||
goto packet_processed;
|
||||
dishonest_packet_type:
|
||||
net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
|
||||
dev->name, peer->internal_id, &peer->endpoint.addr);
|
||||
++dev->stats.rx_errors;
|
||||
++dev->stats.rx_frame_errors;
|
||||
goto packet_processed;
|
||||
dishonest_packet_size:
|
||||
net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
|
||||
dev->name, peer->internal_id, &peer->endpoint.addr);
|
||||
++dev->stats.rx_errors;
|
||||
++dev->stats.rx_length_errors;
|
||||
goto packet_processed;
|
||||
packet_processed:
|
||||
dev_kfree_skb(skb);
|
||||
}
|
||||
|
||||
int wg_packet_rx_poll(struct napi_struct *napi, int budget)
|
||||
{
|
||||
struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
|
||||
struct crypt_queue *queue = &peer->rx_queue;
|
||||
struct noise_keypair *keypair;
|
||||
struct endpoint endpoint;
|
||||
enum packet_state state;
|
||||
struct sk_buff *skb;
|
||||
int work_done = 0;
|
||||
bool free;
|
||||
|
||||
if (unlikely(budget <= 0))
|
||||
return 0;
|
||||
|
||||
while ((skb = __ptr_ring_peek(&queue->ring)) != NULL &&
|
||||
(state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
|
||||
PACKET_STATE_UNCRYPTED) {
|
||||
__ptr_ring_discard_one(&queue->ring);
|
||||
peer = PACKET_PEER(skb);
|
||||
keypair = PACKET_CB(skb)->keypair;
|
||||
free = true;
|
||||
|
||||
if (unlikely(state != PACKET_STATE_CRYPTED))
|
||||
goto next;
|
||||
|
||||
if (unlikely(!counter_validate(&keypair->receiving_counter,
|
||||
PACKET_CB(skb)->nonce))) {
|
||||
net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
|
||||
peer->device->dev->name,
|
||||
PACKET_CB(skb)->nonce,
|
||||
keypair->receiving_counter.counter);
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
|
||||
goto next;
|
||||
|
||||
wg_reset_packet(skb, false);
|
||||
wg_packet_consume_data_done(peer, skb, &endpoint);
|
||||
free = false;
|
||||
|
||||
next:
|
||||
wg_noise_keypair_put(keypair, false);
|
||||
wg_peer_put(peer);
|
||||
if (unlikely(free))
|
||||
dev_kfree_skb(skb);
|
||||
|
||||
if (++work_done >= budget)
|
||||
break;
|
||||
}
|
||||
|
||||
if (work_done < budget)
|
||||
napi_complete_done(napi, work_done);
|
||||
|
||||
return work_done;
|
||||
}
|
||||
|
||||
void wg_packet_decrypt_worker(struct work_struct *work)
|
||||
{
|
||||
struct crypt_queue *queue = container_of(work, struct multicore_worker,
|
||||
work)->ptr;
|
||||
simd_context_t simd_context;
|
||||
struct sk_buff *skb;
|
||||
|
||||
simd_get(&simd_context);
|
||||
while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
|
||||
enum packet_state state =
|
||||
likely(decrypt_packet(skb, PACKET_CB(skb)->keypair,
|
||||
&simd_context)) ?
|
||||
PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
|
||||
wg_queue_enqueue_per_peer_napi(skb, state);
|
||||
simd_relax(&simd_context);
|
||||
}
|
||||
|
||||
simd_put(&simd_context);
|
||||
}
|
||||
|
||||
static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
|
||||
{
|
||||
__le32 idx = ((struct message_data *)skb->data)->key_idx;
|
||||
struct wg_peer *peer = NULL;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
PACKET_CB(skb)->keypair =
|
||||
(struct noise_keypair *)wg_index_hashtable_lookup(
|
||||
wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx,
|
||||
&peer);
|
||||
if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair)))
|
||||
goto err_keypair;
|
||||
|
||||
if (unlikely(READ_ONCE(peer->is_dead)))
|
||||
goto err;
|
||||
|
||||
ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue,
|
||||
&peer->rx_queue, skb,
|
||||
wg->packet_crypt_wq,
|
||||
&wg->decrypt_queue.last_cpu);
|
||||
if (unlikely(ret == -EPIPE))
|
||||
wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD);
|
||||
if (likely(!ret || ret == -EPIPE)) {
|
||||
rcu_read_unlock_bh();
|
||||
return;
|
||||
}
|
||||
err:
|
||||
wg_noise_keypair_put(PACKET_CB(skb)->keypair, false);
|
||||
err_keypair:
|
||||
rcu_read_unlock_bh();
|
||||
wg_peer_put(peer);
|
||||
dev_kfree_skb(skb);
|
||||
}
|
||||
|
||||
void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
|
||||
{
|
||||
if (unlikely(prepare_skb_header(skb, wg) < 0))
|
||||
goto err;
|
||||
switch (SKB_TYPE_LE32(skb)) {
|
||||
case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
|
||||
case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
|
||||
case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
|
||||
int cpu;
|
||||
|
||||
if (skb_queue_len(&wg->incoming_handshakes) >
|
||||
MAX_QUEUED_INCOMING_HANDSHAKES ||
|
||||
unlikely(!rng_is_initialized())) {
|
||||
net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
|
||||
wg->dev->name, skb);
|
||||
goto err;
|
||||
}
|
||||
skb_queue_tail(&wg->incoming_handshakes, skb);
|
||||
/* Queues up a call to packet_process_queued_handshake_
|
||||
* packets(skb):
|
||||
*/
|
||||
cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
|
||||
queue_work_on(cpu, wg->handshake_receive_wq,
|
||||
&per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
|
||||
break;
|
||||
}
|
||||
case cpu_to_le32(MESSAGE_DATA):
|
||||
PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
|
||||
wg_packet_consume_data(wg, skb);
|
||||
break;
|
||||
default:
|
||||
WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
|
||||
goto err;
|
||||
}
|
||||
return;
|
||||
|
||||
err:
|
||||
dev_kfree_skb(skb);
|
||||
}
|
683
net/wireguard/selftest/allowedips.c
Normal file
683
net/wireguard/selftest/allowedips.c
Normal file
@ -0,0 +1,683 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*
|
||||
* This contains some basic static unit tests for the allowedips data structure.
|
||||
* It also has two additional modes that are disabled and meant to be used by
|
||||
* folks directly playing with this file. If you define the macro
|
||||
* DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in
|
||||
* memory, it will be printed out as KERN_DEBUG in a format that can be passed
|
||||
* to graphviz (the dot command) to visualize it. If you define the macro
|
||||
* DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of
|
||||
* randomized tests done against a trivial implementation, which may take
|
||||
* upwards of a half-hour to complete. There's no set of users who should be
|
||||
* enabling these, and the only developers that should go anywhere near these
|
||||
* nobs are the ones who are reading this comment.
|
||||
*/
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
#include <linux/siphash.h>
|
||||
|
||||
static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
|
||||
u8 cidr)
|
||||
{
|
||||
swap_endian(dst, src, bits);
|
||||
memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
|
||||
if (cidr)
|
||||
dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
|
||||
}
|
||||
|
||||
static __init void print_node(struct allowedips_node *node, u8 bits)
|
||||
{
|
||||
char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
|
||||
char *fmt_declaration = KERN_DEBUG
|
||||
"\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||
char *style = "dotted";
|
||||
u8 ip1[16], ip2[16];
|
||||
u32 color = 0;
|
||||
|
||||
if (bits == 32) {
|
||||
fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
|
||||
fmt_declaration = KERN_DEBUG
|
||||
"\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||
} else if (bits == 128) {
|
||||
fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
|
||||
fmt_declaration = KERN_DEBUG
|
||||
"\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
|
||||
}
|
||||
if (node->peer) {
|
||||
hsiphash_key_t key = { { 0 } };
|
||||
|
||||
memcpy(&key, &node->peer, sizeof(node->peer));
|
||||
color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 |
|
||||
hsiphash_1u32(0xbabecafe, &key) % 200 << 8 |
|
||||
hsiphash_1u32(0xabad1dea, &key) % 200;
|
||||
style = "bold";
|
||||
}
|
||||
swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
|
||||
printk(fmt_declaration, ip1, node->cidr, style, color);
|
||||
if (node->bit[0]) {
|
||||
swap_endian_and_apply_cidr(ip2,
|
||||
rcu_dereference_raw(node->bit[0])->bits, bits,
|
||||
node->cidr);
|
||||
printk(fmt_connection, ip1, node->cidr, ip2,
|
||||
rcu_dereference_raw(node->bit[0])->cidr);
|
||||
print_node(rcu_dereference_raw(node->bit[0]), bits);
|
||||
}
|
||||
if (node->bit[1]) {
|
||||
swap_endian_and_apply_cidr(ip2,
|
||||
rcu_dereference_raw(node->bit[1])->bits,
|
||||
bits, node->cidr);
|
||||
printk(fmt_connection, ip1, node->cidr, ip2,
|
||||
rcu_dereference_raw(node->bit[1])->cidr);
|
||||
print_node(rcu_dereference_raw(node->bit[1]), bits);
|
||||
}
|
||||
}
|
||||
|
||||
static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
|
||||
{
|
||||
printk(KERN_DEBUG "digraph trie {\n");
|
||||
print_node(rcu_dereference_raw(top), bits);
|
||||
printk(KERN_DEBUG "}\n");
|
||||
}
|
||||
|
||||
enum {
|
||||
NUM_PEERS = 2000,
|
||||
NUM_RAND_ROUTES = 400,
|
||||
NUM_MUTATED_ROUTES = 100,
|
||||
NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30
|
||||
};
|
||||
|
||||
struct horrible_allowedips {
|
||||
struct hlist_head head;
|
||||
};
|
||||
|
||||
struct horrible_allowedips_node {
|
||||
struct hlist_node table;
|
||||
union nf_inet_addr ip;
|
||||
union nf_inet_addr mask;
|
||||
u8 ip_version;
|
||||
void *value;
|
||||
};
|
||||
|
||||
static __init void horrible_allowedips_init(struct horrible_allowedips *table)
|
||||
{
|
||||
INIT_HLIST_HEAD(&table->head);
|
||||
}
|
||||
|
||||
static __init void horrible_allowedips_free(struct horrible_allowedips *table)
|
||||
{
|
||||
struct horrible_allowedips_node *node;
|
||||
struct hlist_node *h;
|
||||
|
||||
hlist_for_each_entry_safe(node, h, &table->head, table) {
|
||||
hlist_del(&node->table);
|
||||
kfree(node);
|
||||
}
|
||||
}
|
||||
|
||||
static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
|
||||
{
|
||||
union nf_inet_addr mask;
|
||||
|
||||
memset(&mask, 0x00, 128 / 8);
|
||||
memset(&mask, 0xff, cidr / 8);
|
||||
if (cidr % 32)
|
||||
mask.all[cidr / 32] = (__force u32)htonl(
|
||||
(0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
|
||||
return mask;
|
||||
}
|
||||
|
||||
static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet)
|
||||
{
|
||||
return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) +
|
||||
hweight32(subnet.all[2]) + hweight32(subnet.all[3]);
|
||||
}
|
||||
|
||||
static __init inline void
|
||||
horrible_mask_self(struct horrible_allowedips_node *node)
|
||||
{
|
||||
if (node->ip_version == 4) {
|
||||
node->ip.ip &= node->mask.ip;
|
||||
} else if (node->ip_version == 6) {
|
||||
node->ip.ip6[0] &= node->mask.ip6[0];
|
||||
node->ip.ip6[1] &= node->mask.ip6[1];
|
||||
node->ip.ip6[2] &= node->mask.ip6[2];
|
||||
node->ip.ip6[3] &= node->mask.ip6[3];
|
||||
}
|
||||
}
|
||||
|
||||
static __init inline bool
|
||||
horrible_match_v4(const struct horrible_allowedips_node *node,
|
||||
struct in_addr *ip)
|
||||
{
|
||||
return (ip->s_addr & node->mask.ip) == node->ip.ip;
|
||||
}
|
||||
|
||||
static __init inline bool
|
||||
horrible_match_v6(const struct horrible_allowedips_node *node,
|
||||
struct in6_addr *ip)
|
||||
{
|
||||
return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
|
||||
node->ip.ip6[0] &&
|
||||
(ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
|
||||
node->ip.ip6[1] &&
|
||||
(ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
|
||||
node->ip.ip6[2] &&
|
||||
(ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
|
||||
}
|
||||
|
||||
static __init void
|
||||
horrible_insert_ordered(struct horrible_allowedips *table,
|
||||
struct horrible_allowedips_node *node)
|
||||
{
|
||||
struct horrible_allowedips_node *other = NULL, *where = NULL;
|
||||
u8 my_cidr = horrible_mask_to_cidr(node->mask);
|
||||
|
||||
hlist_for_each_entry(other, &table->head, table) {
|
||||
if (!memcmp(&other->mask, &node->mask,
|
||||
sizeof(union nf_inet_addr)) &&
|
||||
!memcmp(&other->ip, &node->ip,
|
||||
sizeof(union nf_inet_addr)) &&
|
||||
other->ip_version == node->ip_version) {
|
||||
other->value = node->value;
|
||||
kfree(node);
|
||||
return;
|
||||
}
|
||||
where = other;
|
||||
if (horrible_mask_to_cidr(other->mask) <= my_cidr)
|
||||
break;
|
||||
}
|
||||
if (!other && !where)
|
||||
hlist_add_head(&node->table, &table->head);
|
||||
else if (!other)
|
||||
hlist_add_behind(&node->table, &where->table);
|
||||
else
|
||||
hlist_add_before(&node->table, &where->table);
|
||||
}
|
||||
|
||||
static __init int
|
||||
horrible_allowedips_insert_v4(struct horrible_allowedips *table,
|
||||
struct in_addr *ip, u8 cidr, void *value)
|
||||
{
|
||||
struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (unlikely(!node))
|
||||
return -ENOMEM;
|
||||
node->ip.in = *ip;
|
||||
node->mask = horrible_cidr_to_mask(cidr);
|
||||
node->ip_version = 4;
|
||||
node->value = value;
|
||||
horrible_mask_self(node);
|
||||
horrible_insert_ordered(table, node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int
|
||||
horrible_allowedips_insert_v6(struct horrible_allowedips *table,
|
||||
struct in6_addr *ip, u8 cidr, void *value)
|
||||
{
|
||||
struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (unlikely(!node))
|
||||
return -ENOMEM;
|
||||
node->ip.in6 = *ip;
|
||||
node->mask = horrible_cidr_to_mask(cidr);
|
||||
node->ip_version = 6;
|
||||
node->value = value;
|
||||
horrible_mask_self(node);
|
||||
horrible_insert_ordered(table, node);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init void *
|
||||
horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
|
||||
struct in_addr *ip)
|
||||
{
|
||||
struct horrible_allowedips_node *node;
|
||||
void *ret = NULL;
|
||||
|
||||
hlist_for_each_entry(node, &table->head, table) {
|
||||
if (node->ip_version != 4)
|
||||
continue;
|
||||
if (horrible_match_v4(node, ip)) {
|
||||
ret = node->value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init void *
|
||||
horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
|
||||
struct in6_addr *ip)
|
||||
{
|
||||
struct horrible_allowedips_node *node;
|
||||
void *ret = NULL;
|
||||
|
||||
hlist_for_each_entry(node, &table->head, table) {
|
||||
if (node->ip_version != 6)
|
||||
continue;
|
||||
if (horrible_match_v6(node, ip)) {
|
||||
ret = node->value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init bool randomized_test(void)
|
||||
{
|
||||
unsigned int i, j, k, mutate_amount, cidr;
|
||||
u8 ip[16], mutate_mask[16], mutated[16];
|
||||
struct wg_peer **peers, *peer;
|
||||
struct horrible_allowedips h;
|
||||
DEFINE_MUTEX(mutex);
|
||||
struct allowedips t;
|
||||
bool ret = false;
|
||||
|
||||
mutex_init(&mutex);
|
||||
|
||||
wg_allowedips_init(&t);
|
||||
horrible_allowedips_init(&h);
|
||||
|
||||
peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL);
|
||||
if (unlikely(!peers)) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free;
|
||||
}
|
||||
for (i = 0; i < NUM_PEERS; ++i) {
|
||||
peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL);
|
||||
if (unlikely(!peers[i])) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free;
|
||||
}
|
||||
kref_init(&peers[i]->refcount);
|
||||
}
|
||||
|
||||
mutex_lock(&mutex);
|
||||
|
||||
for (i = 0; i < NUM_RAND_ROUTES; ++i) {
|
||||
prandom_bytes(ip, 4);
|
||||
cidr = prandom_u32_max(32) + 1;
|
||||
peer = peers[prandom_u32_max(NUM_PEERS)];
|
||||
if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr,
|
||||
peer, &mutex) < 0) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip,
|
||||
cidr, peer) < 0) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
|
||||
memcpy(mutated, ip, 4);
|
||||
prandom_bytes(mutate_mask, 4);
|
||||
mutate_amount = prandom_u32_max(32);
|
||||
for (k = 0; k < mutate_amount / 8; ++k)
|
||||
mutate_mask[k] = 0xff;
|
||||
mutate_mask[k] = 0xff
|
||||
<< ((8 - (mutate_amount % 8)) % 8);
|
||||
for (; k < 4; ++k)
|
||||
mutate_mask[k] = 0;
|
||||
for (k = 0; k < 4; ++k)
|
||||
mutated[k] = (mutated[k] & mutate_mask[k]) |
|
||||
(~mutate_mask[k] &
|
||||
prandom_u32_max(256));
|
||||
cidr = prandom_u32_max(32) + 1;
|
||||
peer = peers[prandom_u32_max(NUM_PEERS)];
|
||||
if (wg_allowedips_insert_v4(&t,
|
||||
(struct in_addr *)mutated,
|
||||
cidr, peer, &mutex) < 0) {
|
||||
pr_err("allowedips random malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
if (horrible_allowedips_insert_v4(&h,
|
||||
(struct in_addr *)mutated, cidr, peer)) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_RAND_ROUTES; ++i) {
|
||||
prandom_bytes(ip, 16);
|
||||
cidr = prandom_u32_max(128) + 1;
|
||||
peer = peers[prandom_u32_max(NUM_PEERS)];
|
||||
if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr,
|
||||
peer, &mutex) < 0) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip,
|
||||
cidr, peer) < 0) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
|
||||
memcpy(mutated, ip, 16);
|
||||
prandom_bytes(mutate_mask, 16);
|
||||
mutate_amount = prandom_u32_max(128);
|
||||
for (k = 0; k < mutate_amount / 8; ++k)
|
||||
mutate_mask[k] = 0xff;
|
||||
mutate_mask[k] = 0xff
|
||||
<< ((8 - (mutate_amount % 8)) % 8);
|
||||
for (; k < 4; ++k)
|
||||
mutate_mask[k] = 0;
|
||||
for (k = 0; k < 4; ++k)
|
||||
mutated[k] = (mutated[k] & mutate_mask[k]) |
|
||||
(~mutate_mask[k] &
|
||||
prandom_u32_max(256));
|
||||
cidr = prandom_u32_max(128) + 1;
|
||||
peer = peers[prandom_u32_max(NUM_PEERS)];
|
||||
if (wg_allowedips_insert_v6(&t,
|
||||
(struct in6_addr *)mutated,
|
||||
cidr, peer, &mutex) < 0) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
if (horrible_allowedips_insert_v6(
|
||||
&h, (struct in6_addr *)mutated, cidr,
|
||||
peer)) {
|
||||
pr_err("allowedips random self-test malloc: FAIL\n");
|
||||
goto free_locked;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&mutex);
|
||||
|
||||
if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
|
||||
print_tree(t.root4, 32);
|
||||
print_tree(t.root6, 128);
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_QUERIES; ++i) {
|
||||
prandom_bytes(ip, 4);
|
||||
if (lookup(t.root4, 32, ip) !=
|
||||
horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
|
||||
pr_err("allowedips random self-test: FAIL\n");
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_QUERIES; ++i) {
|
||||
prandom_bytes(ip, 16);
|
||||
if (lookup(t.root6, 128, ip) !=
|
||||
horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
|
||||
pr_err("allowedips random self-test: FAIL\n");
|
||||
goto free;
|
||||
}
|
||||
}
|
||||
ret = true;
|
||||
|
||||
free:
|
||||
mutex_lock(&mutex);
|
||||
free_locked:
|
||||
wg_allowedips_free(&t, &mutex);
|
||||
mutex_unlock(&mutex);
|
||||
horrible_allowedips_free(&h);
|
||||
if (peers) {
|
||||
for (i = 0; i < NUM_PEERS; ++i)
|
||||
kfree(peers[i]);
|
||||
}
|
||||
kfree(peers);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
|
||||
{
|
||||
static struct in_addr ip;
|
||||
u8 *split = (u8 *)&ip;
|
||||
|
||||
split[0] = a;
|
||||
split[1] = b;
|
||||
split[2] = c;
|
||||
split[3] = d;
|
||||
return &ip;
|
||||
}
|
||||
|
||||
static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
|
||||
{
|
||||
static struct in6_addr ip;
|
||||
__be32 *split = (__be32 *)&ip;
|
||||
|
||||
split[0] = cpu_to_be32(a);
|
||||
split[1] = cpu_to_be32(b);
|
||||
split[2] = cpu_to_be32(c);
|
||||
split[3] = cpu_to_be32(d);
|
||||
return &ip;
|
||||
}
|
||||
|
||||
static __init struct wg_peer *init_peer(void)
|
||||
{
|
||||
struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL);
|
||||
|
||||
if (!peer)
|
||||
return NULL;
|
||||
kref_init(&peer->refcount);
|
||||
INIT_LIST_HEAD(&peer->allowedips_list);
|
||||
return peer;
|
||||
}
|
||||
|
||||
#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \
|
||||
wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \
|
||||
cidr, mem, &mutex)
|
||||
|
||||
#define maybe_fail() do { \
|
||||
++i; \
|
||||
if (!_s) { \
|
||||
pr_info("allowedips self-test %zu: FAIL\n", i); \
|
||||
success = false; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define test(version, mem, ipa, ipb, ipc, ipd) do { \
|
||||
bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
|
||||
ip##version(ipa, ipb, ipc, ipd)) == (mem); \
|
||||
maybe_fail(); \
|
||||
} while (0)
|
||||
|
||||
#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \
|
||||
bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
|
||||
ip##version(ipa, ipb, ipc, ipd)) != (mem); \
|
||||
maybe_fail(); \
|
||||
} while (0)
|
||||
|
||||
#define test_boolean(cond) do { \
|
||||
bool _s = (cond); \
|
||||
maybe_fail(); \
|
||||
} while (0)
|
||||
|
||||
bool __init wg_allowedips_selftest(void)
|
||||
{
|
||||
bool found_a = false, found_b = false, found_c = false, found_d = false,
|
||||
found_e = false, found_other = false;
|
||||
struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(),
|
||||
*d = init_peer(), *e = init_peer(), *f = init_peer(),
|
||||
*g = init_peer(), *h = init_peer();
|
||||
struct allowedips_node *iter_node;
|
||||
bool success = false;
|
||||
struct allowedips t;
|
||||
DEFINE_MUTEX(mutex);
|
||||
struct in6_addr ip;
|
||||
size_t i = 0, count = 0;
|
||||
__be64 part;
|
||||
|
||||
mutex_init(&mutex);
|
||||
mutex_lock(&mutex);
|
||||
wg_allowedips_init(&t);
|
||||
|
||||
if (!a || !b || !c || !d || !e || !f || !g || !h) {
|
||||
pr_err("allowedips self-test malloc: FAIL\n");
|
||||
goto free;
|
||||
}
|
||||
|
||||
insert(4, a, 192, 168, 4, 0, 24);
|
||||
insert(4, b, 192, 168, 4, 4, 32);
|
||||
insert(4, c, 192, 168, 0, 0, 16);
|
||||
insert(4, d, 192, 95, 5, 64, 27);
|
||||
/* replaces previous entry, and maskself is required */
|
||||
insert(4, c, 192, 95, 5, 65, 27);
|
||||
insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
|
||||
insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
|
||||
insert(4, e, 0, 0, 0, 0, 0);
|
||||
insert(6, e, 0, 0, 0, 0, 0);
|
||||
/* replaces previous entry */
|
||||
insert(6, f, 0, 0, 0, 0, 0);
|
||||
insert(6, g, 0x24046800, 0, 0, 0, 32);
|
||||
/* maskself is required */
|
||||
insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64);
|
||||
insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
|
||||
insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
|
||||
insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
|
||||
insert(4, g, 64, 15, 112, 0, 20);
|
||||
/* maskself is required */
|
||||
insert(4, h, 64, 15, 123, 211, 25);
|
||||
insert(4, a, 10, 0, 0, 0, 25);
|
||||
insert(4, b, 10, 0, 0, 128, 25);
|
||||
insert(4, a, 10, 1, 0, 0, 30);
|
||||
insert(4, b, 10, 1, 0, 4, 30);
|
||||
insert(4, c, 10, 1, 0, 8, 29);
|
||||
insert(4, d, 10, 1, 0, 16, 29);
|
||||
|
||||
if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
|
||||
print_tree(t.root4, 32);
|
||||
print_tree(t.root6, 128);
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
||||
test(4, a, 192, 168, 4, 20);
|
||||
test(4, a, 192, 168, 4, 0);
|
||||
test(4, b, 192, 168, 4, 4);
|
||||
test(4, c, 192, 168, 200, 182);
|
||||
test(4, c, 192, 95, 5, 68);
|
||||
test(4, e, 192, 95, 5, 96);
|
||||
test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
|
||||
test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
|
||||
test(6, f, 0x26075300, 0x60006b01, 0, 0);
|
||||
test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
|
||||
test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
|
||||
test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
|
||||
test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
|
||||
test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
|
||||
test(6, h, 0x24046800, 0x40040800, 0, 0);
|
||||
test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
|
||||
test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
|
||||
test(4, g, 64, 15, 116, 26);
|
||||
test(4, g, 64, 15, 127, 3);
|
||||
test(4, g, 64, 15, 123, 1);
|
||||
test(4, h, 64, 15, 123, 128);
|
||||
test(4, h, 64, 15, 123, 129);
|
||||
test(4, a, 10, 0, 0, 52);
|
||||
test(4, b, 10, 0, 0, 220);
|
||||
test(4, a, 10, 1, 0, 2);
|
||||
test(4, b, 10, 1, 0, 6);
|
||||
test(4, c, 10, 1, 0, 10);
|
||||
test(4, d, 10, 1, 0, 20);
|
||||
|
||||
insert(4, a, 1, 0, 0, 0, 32);
|
||||
insert(4, a, 64, 0, 0, 0, 32);
|
||||
insert(4, a, 128, 0, 0, 0, 32);
|
||||
insert(4, a, 192, 0, 0, 0, 32);
|
||||
insert(4, a, 255, 0, 0, 0, 32);
|
||||
wg_allowedips_remove_by_peer(&t, a, &mutex);
|
||||
test_negative(4, a, 1, 0, 0, 0);
|
||||
test_negative(4, a, 64, 0, 0, 0);
|
||||
test_negative(4, a, 128, 0, 0, 0);
|
||||
test_negative(4, a, 192, 0, 0, 0);
|
||||
test_negative(4, a, 255, 0, 0, 0);
|
||||
|
||||
wg_allowedips_free(&t, &mutex);
|
||||
wg_allowedips_init(&t);
|
||||
insert(4, a, 192, 168, 0, 0, 16);
|
||||
insert(4, a, 192, 168, 0, 0, 24);
|
||||
wg_allowedips_remove_by_peer(&t, a, &mutex);
|
||||
test_negative(4, a, 192, 168, 0, 1);
|
||||
|
||||
/* These will hit the WARN_ON(len >= 128) in free_node if something
|
||||
* goes wrong.
|
||||
*/
|
||||
for (i = 0; i < 128; ++i) {
|
||||
part = cpu_to_be64(~(1LLU << (i % 64)));
|
||||
memset(&ip, 0xff, 16);
|
||||
memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
|
||||
wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
|
||||
}
|
||||
|
||||
wg_allowedips_free(&t, &mutex);
|
||||
|
||||
wg_allowedips_init(&t);
|
||||
insert(4, a, 192, 95, 5, 93, 27);
|
||||
insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
|
||||
insert(4, a, 10, 1, 0, 20, 29);
|
||||
insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83);
|
||||
insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21);
|
||||
list_for_each_entry(iter_node, &a->allowedips_list, peer_list) {
|
||||
u8 cidr, ip[16] __aligned(__alignof(u64));
|
||||
int family = wg_allowedips_read_node(iter_node, ip, &cidr);
|
||||
|
||||
count++;
|
||||
|
||||
if (cidr == 27 && family == AF_INET &&
|
||||
!memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr)))
|
||||
found_a = true;
|
||||
else if (cidr == 128 && family == AF_INET6 &&
|
||||
!memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543),
|
||||
sizeof(struct in6_addr)))
|
||||
found_b = true;
|
||||
else if (cidr == 29 && family == AF_INET &&
|
||||
!memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr)))
|
||||
found_c = true;
|
||||
else if (cidr == 83 && family == AF_INET6 &&
|
||||
!memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0),
|
||||
sizeof(struct in6_addr)))
|
||||
found_d = true;
|
||||
else if (cidr == 21 && family == AF_INET6 &&
|
||||
!memcmp(ip, ip6(0x26075000, 0, 0, 0),
|
||||
sizeof(struct in6_addr)))
|
||||
found_e = true;
|
||||
else
|
||||
found_other = true;
|
||||
}
|
||||
test_boolean(count == 5);
|
||||
test_boolean(found_a);
|
||||
test_boolean(found_b);
|
||||
test_boolean(found_c);
|
||||
test_boolean(found_d);
|
||||
test_boolean(found_e);
|
||||
test_boolean(!found_other);
|
||||
|
||||
if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success)
|
||||
success = randomized_test();
|
||||
|
||||
if (success)
|
||||
pr_info("allowedips self-tests: pass\n");
|
||||
|
||||
free:
|
||||
wg_allowedips_free(&t, &mutex);
|
||||
kfree(a);
|
||||
kfree(b);
|
||||
kfree(c);
|
||||
kfree(d);
|
||||
kfree(e);
|
||||
kfree(f);
|
||||
kfree(g);
|
||||
kfree(h);
|
||||
mutex_unlock(&mutex);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
#undef test_negative
|
||||
#undef test
|
||||
#undef remove
|
||||
#undef insert
|
||||
#undef init_peer
|
||||
|
||||
#endif
|
111
net/wireguard/selftest/counter.c
Normal file
111
net/wireguard/selftest/counter.c
Normal file
@ -0,0 +1,111 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifdef DEBUG
|
||||
bool __init wg_packet_counter_selftest(void)
|
||||
{
|
||||
struct noise_replay_counter *counter;
|
||||
unsigned int test_num = 0, i;
|
||||
bool success = true;
|
||||
|
||||
counter = kmalloc(sizeof(*counter), GFP_KERNEL);
|
||||
if (unlikely(!counter)) {
|
||||
pr_err("nonce counter self-test malloc: FAIL\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
#define T_INIT do { \
|
||||
memset(counter, 0, sizeof(*counter)); \
|
||||
spin_lock_init(&counter->lock); \
|
||||
} while (0)
|
||||
#define T_LIM (COUNTER_WINDOW_SIZE + 1)
|
||||
#define T(n, v) do { \
|
||||
++test_num; \
|
||||
if (counter_validate(counter, n) != (v)) { \
|
||||
pr_err("nonce counter self-test %u: FAIL\n", \
|
||||
test_num); \
|
||||
success = false; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
T_INIT;
|
||||
/* 1 */ T(0, true);
|
||||
/* 2 */ T(1, true);
|
||||
/* 3 */ T(1, false);
|
||||
/* 4 */ T(9, true);
|
||||
/* 5 */ T(8, true);
|
||||
/* 6 */ T(7, true);
|
||||
/* 7 */ T(7, false);
|
||||
/* 8 */ T(T_LIM, true);
|
||||
/* 9 */ T(T_LIM - 1, true);
|
||||
/* 10 */ T(T_LIM - 1, false);
|
||||
/* 11 */ T(T_LIM - 2, true);
|
||||
/* 12 */ T(2, true);
|
||||
/* 13 */ T(2, false);
|
||||
/* 14 */ T(T_LIM + 16, true);
|
||||
/* 15 */ T(3, false);
|
||||
/* 16 */ T(T_LIM + 16, false);
|
||||
/* 17 */ T(T_LIM * 4, true);
|
||||
/* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
|
||||
/* 19 */ T(10, false);
|
||||
/* 20 */ T(T_LIM * 4 - T_LIM, false);
|
||||
/* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
|
||||
/* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
|
||||
/* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
|
||||
/* 24 */ T(0, false);
|
||||
/* 25 */ T(REJECT_AFTER_MESSAGES, false);
|
||||
/* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
|
||||
/* 27 */ T(REJECT_AFTER_MESSAGES, false);
|
||||
/* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
|
||||
/* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
|
||||
/* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
|
||||
/* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
|
||||
/* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
|
||||
/* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
|
||||
/* 34 */ T(0, false);
|
||||
|
||||
T_INIT;
|
||||
for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
|
||||
T(i, true);
|
||||
T(0, true);
|
||||
T(0, false);
|
||||
|
||||
T_INIT;
|
||||
for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
|
||||
T(i, true);
|
||||
T(1, true);
|
||||
T(0, false);
|
||||
|
||||
T_INIT;
|
||||
for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;)
|
||||
T(i, true);
|
||||
|
||||
T_INIT;
|
||||
for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;)
|
||||
T(i, true);
|
||||
T(0, false);
|
||||
|
||||
T_INIT;
|
||||
for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
|
||||
T(i, true);
|
||||
T(COUNTER_WINDOW_SIZE + 1, true);
|
||||
T(0, false);
|
||||
|
||||
T_INIT;
|
||||
for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
|
||||
T(i, true);
|
||||
T(0, true);
|
||||
T(COUNTER_WINDOW_SIZE + 1, true);
|
||||
|
||||
#undef T
|
||||
#undef T_LIM
|
||||
#undef T_INIT
|
||||
|
||||
if (success)
|
||||
pr_info("nonce counter self-tests: pass\n");
|
||||
kfree(counter);
|
||||
return success;
|
||||
}
|
||||
#endif
|
226
net/wireguard/selftest/ratelimiter.c
Normal file
226
net/wireguard/selftest/ratelimiter.c
Normal file
@ -0,0 +1,226 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
#include <linux/jiffies.h>
|
||||
|
||||
static const struct {
|
||||
bool result;
|
||||
unsigned int msec_to_sleep_before;
|
||||
} expected_results[] __initconst = {
|
||||
[0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
|
||||
[PACKETS_BURSTABLE] = { false, 0 },
|
||||
[PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
|
||||
[PACKETS_BURSTABLE + 2] = { false, 0 },
|
||||
[PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
|
||||
[PACKETS_BURSTABLE + 4] = { true, 0 },
|
||||
[PACKETS_BURSTABLE + 5] = { false, 0 }
|
||||
};
|
||||
|
||||
static __init unsigned int maximum_jiffies_at_index(int index)
|
||||
{
|
||||
unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
|
||||
int i;
|
||||
|
||||
for (i = 0; i <= index; ++i)
|
||||
total_msecs += expected_results[i].msec_to_sleep_before;
|
||||
return msecs_to_jiffies(total_msecs);
|
||||
}
|
||||
|
||||
static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
|
||||
struct sk_buff *skb6, struct ipv6hdr *hdr6,
|
||||
int *test)
|
||||
{
|
||||
unsigned long loop_start_time;
|
||||
int i;
|
||||
|
||||
wg_ratelimiter_gc_entries(NULL);
|
||||
rcu_barrier();
|
||||
loop_start_time = jiffies;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
|
||||
if (expected_results[i].msec_to_sleep_before)
|
||||
msleep(expected_results[i].msec_to_sleep_before);
|
||||
|
||||
if (time_is_before_jiffies(loop_start_time +
|
||||
maximum_jiffies_at_index(i)))
|
||||
return -ETIMEDOUT;
|
||||
if (wg_ratelimiter_allow(skb4, &init_net) !=
|
||||
expected_results[i].result)
|
||||
return -EXFULL;
|
||||
++(*test);
|
||||
|
||||
hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
|
||||
if (time_is_before_jiffies(loop_start_time +
|
||||
maximum_jiffies_at_index(i)))
|
||||
return -ETIMEDOUT;
|
||||
if (!wg_ratelimiter_allow(skb4, &init_net))
|
||||
return -EXFULL;
|
||||
++(*test);
|
||||
|
||||
hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
hdr6->saddr.in6_u.u6_addr32[2] = htonl(i);
|
||||
hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
|
||||
if (time_is_before_jiffies(loop_start_time +
|
||||
maximum_jiffies_at_index(i)))
|
||||
return -ETIMEDOUT;
|
||||
if (wg_ratelimiter_allow(skb6, &init_net) !=
|
||||
expected_results[i].result)
|
||||
return -EXFULL;
|
||||
++(*test);
|
||||
|
||||
hdr6->saddr.in6_u.u6_addr32[0] =
|
||||
htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
|
||||
if (time_is_before_jiffies(loop_start_time +
|
||||
maximum_jiffies_at_index(i)))
|
||||
return -ETIMEDOUT;
|
||||
if (!wg_ratelimiter_allow(skb6, &init_net))
|
||||
return -EXFULL;
|
||||
++(*test);
|
||||
|
||||
hdr6->saddr.in6_u.u6_addr32[0] =
|
||||
htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
|
||||
|
||||
if (time_is_before_jiffies(loop_start_time +
|
||||
maximum_jiffies_at_index(i)))
|
||||
return -ETIMEDOUT;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4,
|
||||
int *test)
|
||||
{
|
||||
int i;
|
||||
|
||||
wg_ratelimiter_gc_entries(NULL);
|
||||
rcu_barrier();
|
||||
|
||||
if (atomic_read(&total_entries))
|
||||
return -EXFULL;
|
||||
++(*test);
|
||||
|
||||
for (i = 0; i <= max_entries; ++i) {
|
||||
hdr4->saddr = htonl(i);
|
||||
if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries))
|
||||
return -EXFULL;
|
||||
++(*test);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool __init wg_ratelimiter_selftest(void)
|
||||
{
|
||||
enum { TRIALS_BEFORE_GIVING_UP = 5000 };
|
||||
bool success = false;
|
||||
int test = 0, trials;
|
||||
struct sk_buff *skb4, *skb6 = NULL;
|
||||
struct iphdr *hdr4;
|
||||
struct ipv6hdr *hdr6 = NULL;
|
||||
|
||||
if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
|
||||
return true;
|
||||
|
||||
BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
|
||||
|
||||
if (wg_ratelimiter_init())
|
||||
goto out;
|
||||
++test;
|
||||
if (wg_ratelimiter_init()) {
|
||||
wg_ratelimiter_uninit();
|
||||
goto out;
|
||||
}
|
||||
++test;
|
||||
if (wg_ratelimiter_init()) {
|
||||
wg_ratelimiter_uninit();
|
||||
wg_ratelimiter_uninit();
|
||||
goto out;
|
||||
}
|
||||
++test;
|
||||
|
||||
skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
|
||||
if (unlikely(!skb4))
|
||||
goto err_nofree;
|
||||
skb4->protocol = htons(ETH_P_IP);
|
||||
hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4));
|
||||
hdr4->saddr = htonl(8182);
|
||||
skb_reset_network_header(skb4);
|
||||
++test;
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
|
||||
if (unlikely(!skb6)) {
|
||||
kfree_skb(skb4);
|
||||
goto err_nofree;
|
||||
}
|
||||
skb6->protocol = htons(ETH_P_IPV6);
|
||||
hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6));
|
||||
hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
|
||||
hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
|
||||
skb_reset_network_header(skb6);
|
||||
++test;
|
||||
#endif
|
||||
|
||||
for (trials = TRIALS_BEFORE_GIVING_UP;;) {
|
||||
int test_count = 0, ret;
|
||||
|
||||
ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
|
||||
if (ret == -ETIMEDOUT) {
|
||||
if (!trials--) {
|
||||
test += test_count;
|
||||
goto err;
|
||||
}
|
||||
msleep(500);
|
||||
continue;
|
||||
} else if (ret < 0) {
|
||||
test += test_count;
|
||||
goto err;
|
||||
} else {
|
||||
test += test_count;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (trials = TRIALS_BEFORE_GIVING_UP;;) {
|
||||
int test_count = 0;
|
||||
|
||||
if (capacity_test(skb4, hdr4, &test_count) < 0) {
|
||||
if (!trials--) {
|
||||
test += test_count;
|
||||
goto err;
|
||||
}
|
||||
msleep(50);
|
||||
continue;
|
||||
}
|
||||
test += test_count;
|
||||
break;
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
||||
err:
|
||||
kfree_skb(skb4);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
kfree_skb(skb6);
|
||||
#endif
|
||||
err_nofree:
|
||||
wg_ratelimiter_uninit();
|
||||
wg_ratelimiter_uninit();
|
||||
wg_ratelimiter_uninit();
|
||||
/* Uninit one extra time to check underflow detection. */
|
||||
wg_ratelimiter_uninit();
|
||||
out:
|
||||
if (success)
|
||||
pr_info("ratelimiter self-tests: pass\n");
|
||||
else
|
||||
pr_err("ratelimiter self-test %d: FAIL\n", test);
|
||||
|
||||
return success;
|
||||
}
|
||||
#endif
|
429
net/wireguard/send.c
Normal file
429
net/wireguard/send.c
Normal file
@ -0,0 +1,429 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "queueing.h"
|
||||
#include "timers.h"
|
||||
#include "device.h"
|
||||
#include "peer.h"
|
||||
#include "socket.h"
|
||||
#include "messages.h"
|
||||
#include "cookie.h"
|
||||
|
||||
#include <linux/simd.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/inetdevice.h>
|
||||
#include <linux/socket.h>
|
||||
#include <net/ip_tunnels.h>
|
||||
#include <net/udp.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
static void wg_packet_send_handshake_initiation(struct wg_peer *peer)
|
||||
{
|
||||
struct message_handshake_initiation packet;
|
||||
|
||||
if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
|
||||
REKEY_TIMEOUT))
|
||||
return; /* This function is rate limited. */
|
||||
|
||||
atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
|
||||
net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n",
|
||||
peer->device->dev->name, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
|
||||
if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) {
|
||||
wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
|
||||
wg_timers_any_authenticated_packet_traversal(peer);
|
||||
wg_timers_any_authenticated_packet_sent(peer);
|
||||
atomic64_set(&peer->last_sent_handshake,
|
||||
ktime_get_coarse_boottime_ns());
|
||||
wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet),
|
||||
HANDSHAKE_DSCP);
|
||||
wg_timers_handshake_initiated(peer);
|
||||
}
|
||||
}
|
||||
|
||||
void wg_packet_handshake_send_worker(struct work_struct *work)
|
||||
{
|
||||
struct wg_peer *peer = container_of(work, struct wg_peer,
|
||||
transmit_handshake_work);
|
||||
|
||||
wg_packet_send_handshake_initiation(peer);
|
||||
wg_peer_put(peer);
|
||||
}
|
||||
|
||||
void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
|
||||
bool is_retry)
|
||||
{
|
||||
if (!is_retry)
|
||||
peer->timer_handshake_attempts = 0;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
/* We check last_sent_handshake here in addition to the actual function
|
||||
* we're queueing up, so that we don't queue things if not strictly
|
||||
* necessary:
|
||||
*/
|
||||
if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
|
||||
REKEY_TIMEOUT) ||
|
||||
unlikely(READ_ONCE(peer->is_dead)))
|
||||
goto out;
|
||||
|
||||
wg_peer_get(peer);
|
||||
/* Queues up calling packet_send_queued_handshakes(peer), where we do a
|
||||
* peer_put(peer) after:
|
||||
*/
|
||||
if (!queue_work(peer->device->handshake_send_wq,
|
||||
&peer->transmit_handshake_work))
|
||||
/* If the work was already queued, we want to drop the
|
||||
* extra reference:
|
||||
*/
|
||||
wg_peer_put(peer);
|
||||
out:
|
||||
rcu_read_unlock_bh();
|
||||
}
|
||||
|
||||
void wg_packet_send_handshake_response(struct wg_peer *peer)
|
||||
{
|
||||
struct message_handshake_response packet;
|
||||
|
||||
atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
|
||||
net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n",
|
||||
peer->device->dev->name, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
|
||||
if (wg_noise_handshake_create_response(&packet, &peer->handshake)) {
|
||||
wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
|
||||
if (wg_noise_handshake_begin_session(&peer->handshake,
|
||||
&peer->keypairs)) {
|
||||
wg_timers_session_derived(peer);
|
||||
wg_timers_any_authenticated_packet_traversal(peer);
|
||||
wg_timers_any_authenticated_packet_sent(peer);
|
||||
atomic64_set(&peer->last_sent_handshake,
|
||||
ktime_get_coarse_boottime_ns());
|
||||
wg_socket_send_buffer_to_peer(peer, &packet,
|
||||
sizeof(packet),
|
||||
HANDSHAKE_DSCP);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void wg_packet_send_handshake_cookie(struct wg_device *wg,
|
||||
struct sk_buff *initiating_skb,
|
||||
__le32 sender_index)
|
||||
{
|
||||
struct message_handshake_cookie packet;
|
||||
|
||||
net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n",
|
||||
wg->dev->name, initiating_skb);
|
||||
wg_cookie_message_create(&packet, initiating_skb, sender_index,
|
||||
&wg->cookie_checker);
|
||||
wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet,
|
||||
sizeof(packet));
|
||||
}
|
||||
|
||||
static void keep_key_fresh(struct wg_peer *peer)
|
||||
{
|
||||
struct noise_keypair *keypair;
|
||||
bool send;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
|
||||
send = keypair && READ_ONCE(keypair->sending.is_valid) &&
|
||||
(atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES ||
|
||||
(keypair->i_am_the_initiator &&
|
||||
wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME)));
|
||||
rcu_read_unlock_bh();
|
||||
|
||||
if (unlikely(send))
|
||||
wg_packet_send_queued_handshake_initiation(peer, false);
|
||||
}
|
||||
|
||||
static unsigned int calculate_skb_padding(struct sk_buff *skb)
|
||||
{
|
||||
unsigned int padded_size, last_unit = skb->len;
|
||||
|
||||
if (unlikely(!PACKET_CB(skb)->mtu))
|
||||
return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
|
||||
|
||||
/* We do this modulo business with the MTU, just in case the networking
|
||||
* layer gives us a packet that's bigger than the MTU. In that case, we
|
||||
* wouldn't want the final subtraction to overflow in the case of the
|
||||
* padded_size being clamped. Fortunately, that's very rarely the case,
|
||||
* so we optimize for that not happening.
|
||||
*/
|
||||
if (unlikely(last_unit > PACKET_CB(skb)->mtu))
|
||||
last_unit %= PACKET_CB(skb)->mtu;
|
||||
|
||||
padded_size = min(PACKET_CB(skb)->mtu,
|
||||
ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
|
||||
return padded_size - last_unit;
|
||||
}
|
||||
|
||||
static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair,
|
||||
simd_context_t *simd_context)
|
||||
{
|
||||
unsigned int padding_len, plaintext_len, trailer_len;
|
||||
struct scatterlist sg[MAX_SKB_FRAGS + 8];
|
||||
struct message_data *header;
|
||||
struct sk_buff *trailer;
|
||||
int num_frags;
|
||||
|
||||
/* Force hash calculation before encryption so that flow analysis is
|
||||
* consistent over the inner packet.
|
||||
*/
|
||||
skb_get_hash(skb);
|
||||
|
||||
/* Calculate lengths. */
|
||||
padding_len = calculate_skb_padding(skb);
|
||||
trailer_len = padding_len + noise_encrypted_len(0);
|
||||
plaintext_len = skb->len + padding_len;
|
||||
|
||||
/* Expand data section to have room for padding and auth tag. */
|
||||
num_frags = skb_cow_data(skb, trailer_len, &trailer);
|
||||
if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
|
||||
return false;
|
||||
|
||||
/* Set the padding to zeros, and make sure it and the auth tag are part
|
||||
* of the skb.
|
||||
*/
|
||||
memset(skb_tail_pointer(trailer), 0, padding_len);
|
||||
|
||||
/* Expand head section to have room for our header and the network
|
||||
* stack's headers.
|
||||
*/
|
||||
if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
|
||||
return false;
|
||||
|
||||
/* Finalize checksum calculation for the inner packet, if required. */
|
||||
if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL &&
|
||||
skb_checksum_help(skb)))
|
||||
return false;
|
||||
|
||||
/* Only after checksumming can we safely add on the padding at the end
|
||||
* and the header.
|
||||
*/
|
||||
skb_set_inner_network_header(skb, 0);
|
||||
header = (struct message_data *)skb_push(skb, sizeof(*header));
|
||||
header->header.type = cpu_to_le32(MESSAGE_DATA);
|
||||
header->key_idx = keypair->remote_index;
|
||||
header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
|
||||
pskb_put(skb, trailer, trailer_len);
|
||||
|
||||
/* Now we can encrypt the scattergather segments */
|
||||
sg_init_table(sg, num_frags);
|
||||
if (skb_to_sgvec(skb, sg, sizeof(struct message_data),
|
||||
noise_encrypted_len(plaintext_len)) <= 0)
|
||||
return false;
|
||||
return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0,
|
||||
PACKET_CB(skb)->nonce,
|
||||
keypair->sending.key,
|
||||
simd_context);
|
||||
}
|
||||
|
||||
void wg_packet_send_keepalive(struct wg_peer *peer)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
|
||||
if (skb_queue_empty(&peer->staged_packet_queue)) {
|
||||
skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
|
||||
GFP_ATOMIC);
|
||||
if (unlikely(!skb))
|
||||
return;
|
||||
skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
|
||||
skb->dev = peer->device->dev;
|
||||
PACKET_CB(skb)->mtu = skb->dev->mtu;
|
||||
skb_queue_tail(&peer->staged_packet_queue, skb);
|
||||
net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n",
|
||||
peer->device->dev->name, peer->internal_id,
|
||||
&peer->endpoint.addr);
|
||||
}
|
||||
|
||||
wg_packet_send_staged_packets(peer);
|
||||
}
|
||||
|
||||
static void wg_packet_create_data_done(struct sk_buff *first,
|
||||
struct wg_peer *peer)
|
||||
{
|
||||
struct sk_buff *skb, *next;
|
||||
bool is_keepalive, data_sent = false;
|
||||
|
||||
wg_timers_any_authenticated_packet_traversal(peer);
|
||||
wg_timers_any_authenticated_packet_sent(peer);
|
||||
skb_list_walk_safe(first, skb, next) {
|
||||
is_keepalive = skb->len == message_data_len(0);
|
||||
if (likely(!wg_socket_send_skb_to_peer(peer, skb,
|
||||
PACKET_CB(skb)->ds) && !is_keepalive))
|
||||
data_sent = true;
|
||||
}
|
||||
|
||||
if (likely(data_sent))
|
||||
wg_timers_data_sent(peer);
|
||||
|
||||
keep_key_fresh(peer);
|
||||
}
|
||||
|
||||
void wg_packet_tx_worker(struct work_struct *work)
|
||||
{
|
||||
struct crypt_queue *queue = container_of(work, struct crypt_queue,
|
||||
work);
|
||||
struct noise_keypair *keypair;
|
||||
enum packet_state state;
|
||||
struct sk_buff *first;
|
||||
struct wg_peer *peer;
|
||||
|
||||
while ((first = __ptr_ring_peek(&queue->ring)) != NULL &&
|
||||
(state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
|
||||
PACKET_STATE_UNCRYPTED) {
|
||||
__ptr_ring_discard_one(&queue->ring);
|
||||
peer = PACKET_PEER(first);
|
||||
keypair = PACKET_CB(first)->keypair;
|
||||
|
||||
if (likely(state == PACKET_STATE_CRYPTED))
|
||||
wg_packet_create_data_done(first, peer);
|
||||
else
|
||||
kfree_skb_list(first);
|
||||
|
||||
wg_noise_keypair_put(keypair, false);
|
||||
wg_peer_put(peer);
|
||||
if (need_resched())
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
void wg_packet_encrypt_worker(struct work_struct *work)
|
||||
{
|
||||
struct crypt_queue *queue = container_of(work, struct multicore_worker,
|
||||
work)->ptr;
|
||||
struct sk_buff *first, *skb, *next;
|
||||
simd_context_t simd_context;
|
||||
|
||||
simd_get(&simd_context);
|
||||
while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
|
||||
enum packet_state state = PACKET_STATE_CRYPTED;
|
||||
|
||||
skb_list_walk_safe(first, skb, next) {
|
||||
if (likely(encrypt_packet(skb,
|
||||
PACKET_CB(first)->keypair,
|
||||
&simd_context))) {
|
||||
wg_reset_packet(skb, true);
|
||||
} else {
|
||||
state = PACKET_STATE_DEAD;
|
||||
break;
|
||||
}
|
||||
}
|
||||
wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
|
||||
state);
|
||||
|
||||
simd_relax(&simd_context);
|
||||
}
|
||||
simd_put(&simd_context);
|
||||
}
|
||||
|
||||
static void wg_packet_create_data(struct sk_buff *first)
|
||||
{
|
||||
struct wg_peer *peer = PACKET_PEER(first);
|
||||
struct wg_device *wg = peer->device;
|
||||
int ret = -EINVAL;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
if (unlikely(READ_ONCE(peer->is_dead)))
|
||||
goto err;
|
||||
|
||||
ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue,
|
||||
&peer->tx_queue, first,
|
||||
wg->packet_crypt_wq,
|
||||
&wg->encrypt_queue.last_cpu);
|
||||
if (unlikely(ret == -EPIPE))
|
||||
wg_queue_enqueue_per_peer(&peer->tx_queue, first,
|
||||
PACKET_STATE_DEAD);
|
||||
err:
|
||||
rcu_read_unlock_bh();
|
||||
if (likely(!ret || ret == -EPIPE))
|
||||
return;
|
||||
wg_noise_keypair_put(PACKET_CB(first)->keypair, false);
|
||||
wg_peer_put(peer);
|
||||
kfree_skb_list(first);
|
||||
}
|
||||
|
||||
void wg_packet_purge_staged_packets(struct wg_peer *peer)
|
||||
{
|
||||
spin_lock_bh(&peer->staged_packet_queue.lock);
|
||||
peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
|
||||
__skb_queue_purge(&peer->staged_packet_queue);
|
||||
spin_unlock_bh(&peer->staged_packet_queue.lock);
|
||||
}
|
||||
|
||||
void wg_packet_send_staged_packets(struct wg_peer *peer)
|
||||
{
|
||||
struct noise_keypair *keypair;
|
||||
struct sk_buff_head packets;
|
||||
struct sk_buff *skb;
|
||||
|
||||
/* Steal the current queue into our local one. */
|
||||
__skb_queue_head_init(&packets);
|
||||
spin_lock_bh(&peer->staged_packet_queue.lock);
|
||||
skb_queue_splice_init(&peer->staged_packet_queue, &packets);
|
||||
spin_unlock_bh(&peer->staged_packet_queue.lock);
|
||||
if (unlikely(skb_queue_empty(&packets)))
|
||||
return;
|
||||
|
||||
/* First we make sure we have a valid reference to a valid key. */
|
||||
rcu_read_lock_bh();
|
||||
keypair = wg_noise_keypair_get(
|
||||
rcu_dereference_bh(peer->keypairs.current_keypair));
|
||||
rcu_read_unlock_bh();
|
||||
if (unlikely(!keypair))
|
||||
goto out_nokey;
|
||||
if (unlikely(!READ_ONCE(keypair->sending.is_valid)))
|
||||
goto out_nokey;
|
||||
if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
|
||||
REJECT_AFTER_TIME)))
|
||||
goto out_invalid;
|
||||
|
||||
/* After we know we have a somewhat valid key, we now try to assign
|
||||
* nonces to all of the packets in the queue. If we can't assign nonces
|
||||
* for all of them, we just consider it a failure and wait for the next
|
||||
* handshake.
|
||||
*/
|
||||
skb_queue_walk(&packets, skb) {
|
||||
/* 0 for no outer TOS: no leak. TODO: at some later point, we
|
||||
* might consider using flowi->tos as outer instead.
|
||||
*/
|
||||
PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
|
||||
PACKET_CB(skb)->nonce =
|
||||
atomic64_inc_return(&keypair->sending_counter) - 1;
|
||||
if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
|
||||
goto out_invalid;
|
||||
}
|
||||
|
||||
packets.prev->next = NULL;
|
||||
wg_peer_get(keypair->entry.peer);
|
||||
PACKET_CB(packets.next)->keypair = keypair;
|
||||
wg_packet_create_data(packets.next);
|
||||
return;
|
||||
|
||||
out_invalid:
|
||||
WRITE_ONCE(keypair->sending.is_valid, false);
|
||||
out_nokey:
|
||||
wg_noise_keypair_put(keypair, false);
|
||||
|
||||
/* We orphan the packets if we're waiting on a handshake, so that they
|
||||
* don't block a socket's pool.
|
||||
*/
|
||||
skb_queue_walk(&packets, skb)
|
||||
skb_orphan(skb);
|
||||
/* Then we put them back on the top of the queue. We're not too
|
||||
* concerned about accidentally getting things a little out of order if
|
||||
* packets are being added really fast, because this queue is for before
|
||||
* packets can even be sent and it's small anyway.
|
||||
*/
|
||||
spin_lock_bh(&peer->staged_packet_queue.lock);
|
||||
skb_queue_splice(&packets, &peer->staged_packet_queue);
|
||||
spin_unlock_bh(&peer->staged_packet_queue.lock);
|
||||
|
||||
/* If we're exiting because there's something wrong with the key, it
|
||||
* means we should initiate a new handshake.
|
||||
*/
|
||||
wg_packet_send_queued_handshake_initiation(peer, false);
|
||||
}
|
436
net/wireguard/socket.c
Normal file
436
net/wireguard/socket.c
Normal file
@ -0,0 +1,436 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
|
||||
*/
|
||||
|
||||
#include "device.h"
|
||||
#include "peer.h"
|
||||
#include "socket.h"
|
||||
#include "queueing.h"
|
||||
#include "messages.h"
|
||||
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/net.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/inetdevice.h>
|
||||
#include <net/udp_tunnel.h>
|
||||
#include <net/ipv6.h>
|
||||
|
||||
static int send4(struct wg_device *wg, struct sk_buff *skb,
|
||||
struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
|
||||
{
|
||||
struct flowi4 fl = {
|
||||
.saddr = endpoint->src4.s_addr,
|
||||
.daddr = endpoint->addr4.sin_addr.s_addr,
|
||||
.fl4_dport = endpoint->addr4.sin_port,
|
||||
.flowi4_mark = wg->fwmark,
|
||||
.flowi4_proto = IPPROTO_UDP
|
||||
};
|
||||
struct rtable *rt = NULL;
|
||||
struct sock *sock;
|
||||
int ret = 0;
|
||||
|
||||
skb_mark_not_on_list(skb);
|
||||
skb->dev = wg->dev;
|
||||
skb->mark = wg->fwmark;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
sock = rcu_dereference_bh(wg->sock4);
|
||||
|
||||
if (unlikely(!sock)) {
|
||||
ret = -ENONET;
|
||||
goto err;
|
||||
}
|
||||
|
||||
fl.fl4_sport = inet_sk(sock)->inet_sport;
|
||||
|
||||
if (cache)
|
||||
rt = dst_cache_get_ip4(cache, &fl.saddr);
|
||||
|
||||
if (!rt) {
|
||||
security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
|
||||
if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
|
||||
fl.saddr, RT_SCOPE_HOST))) {
|
||||
endpoint->src4.s_addr = 0;
|
||||
*(__force __be32 *)&endpoint->src_if4 = 0;
|
||||
fl.saddr = 0;
|
||||
if (cache)
|
||||
dst_cache_reset(cache);
|
||||
}
|
||||
rt = ip_route_output_flow(sock_net(sock), &fl, sock);
|
||||
if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) &&
|
||||
PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
|
||||
rt->dst.dev->ifindex != endpoint->src_if4)))) {
|
||||
endpoint->src4.s_addr = 0;
|
||||
*(__force __be32 *)&endpoint->src_if4 = 0;
|
||||
fl.saddr = 0;
|
||||
if (cache)
|
||||
dst_cache_reset(cache);
|
||||
if (!IS_ERR(rt))
|
||||
ip_rt_put(rt);
|
||||
rt = ip_route_output_flow(sock_net(sock), &fl, sock);
|
||||
}
|
||||
if (unlikely(IS_ERR(rt))) {
|
||||
ret = PTR_ERR(rt);
|
||||
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
|
||||
wg->dev->name, &endpoint->addr, ret);
|
||||
goto err;
|
||||
}
|
||||
if (cache)
|
||||
dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
|
||||
}
|
||||
|
||||
skb->ignore_df = 1;
|
||||
udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds,
|
||||
ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
|
||||
fl.fl4_dport, false, false);
|
||||
goto out;
|
||||
|
||||
err:
|
||||
kfree_skb(skb);
|
||||
out:
|
||||
rcu_read_unlock_bh();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int send6(struct wg_device *wg, struct sk_buff *skb,
|
||||
struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
struct flowi6 fl = {
|
||||
.saddr = endpoint->src6,
|
||||
.daddr = endpoint->addr6.sin6_addr,
|
||||
.fl6_dport = endpoint->addr6.sin6_port,
|
||||
.flowi6_mark = wg->fwmark,
|
||||
.flowi6_oif = endpoint->addr6.sin6_scope_id,
|
||||
.flowi6_proto = IPPROTO_UDP
|
||||
/* TODO: addr->sin6_flowinfo */
|
||||
};
|
||||
struct dst_entry *dst = NULL;
|
||||
struct sock *sock;
|
||||
int ret = 0;
|
||||
|
||||
skb_mark_not_on_list(skb);
|
||||
skb->dev = wg->dev;
|
||||
skb->mark = wg->fwmark;
|
||||
|
||||
rcu_read_lock_bh();
|
||||
sock = rcu_dereference_bh(wg->sock6);
|
||||
|
||||
if (unlikely(!sock)) {
|
||||
ret = -ENONET;
|
||||
goto err;
|
||||
}
|
||||
|
||||
fl.fl6_sport = inet_sk(sock)->inet_sport;
|
||||
|
||||
if (cache)
|
||||
dst = dst_cache_get_ip6(cache, &fl.saddr);
|
||||
|
||||
if (!dst) {
|
||||
security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
|
||||
if (unlikely(!ipv6_addr_any(&fl.saddr) &&
|
||||
!ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
|
||||
endpoint->src6 = fl.saddr = in6addr_any;
|
||||
if (cache)
|
||||
dst_cache_reset(cache);
|
||||
}
|
||||
dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
|
||||
NULL);
|
||||
if (unlikely(IS_ERR(dst))) {
|
||||
ret = PTR_ERR(dst);
|
||||
net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
|
||||
wg->dev->name, &endpoint->addr, ret);
|
||||
goto err;
|
||||
}
|
||||
if (cache)
|
||||
dst_cache_set_ip6(cache, dst, &fl.saddr);
|
||||
}
|
||||
|
||||
skb->ignore_df = 1;
|
||||
udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds,
|
||||
ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
|
||||
fl.fl6_dport, false);
|
||||
goto out;
|
||||
|
||||
err:
|
||||
kfree_skb(skb);
|
||||
out:
|
||||
rcu_read_unlock_bh();
|
||||
return ret;
|
||||
#else
|
||||
return -EAFNOSUPPORT;
|
||||
#endif
|
||||
}
|
||||
|
||||
int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
|
||||
{
|
||||
size_t skb_len = skb->len;
|
||||
int ret = -EAFNOSUPPORT;
|
||||
|
||||
read_lock_bh(&peer->endpoint_lock);
|
||||
if (peer->endpoint.addr.sa_family == AF_INET)
|
||||
ret = send4(peer->device, skb, &peer->endpoint, ds,
|
||||
&peer->endpoint_cache);
|
||||
else if (peer->endpoint.addr.sa_family == AF_INET6)
|
||||
ret = send6(peer->device, skb, &peer->endpoint, ds,
|
||||
&peer->endpoint_cache);
|
||||
else
|
||||
dev_kfree_skb(skb);
|
||||
if (likely(!ret))
|
||||
peer->tx_bytes += skb_len;
|
||||
read_unlock_bh(&peer->endpoint_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer,
|
||||
size_t len, u8 ds)
|
||||
{
|
||||
struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
|
||||
|
||||
if (unlikely(!skb))
|
||||
return -ENOMEM;
|
||||
|
||||
skb_reserve(skb, SKB_HEADER_LEN);
|
||||
skb_set_inner_network_header(skb, 0);
|
||||
skb_put_data(skb, buffer, len);
|
||||
return wg_socket_send_skb_to_peer(peer, skb, ds);
|
||||
}
|
||||
|
||||
int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
|
||||
struct sk_buff *in_skb, void *buffer,
|
||||
size_t len)
|
||||
{
|
||||
int ret = 0;
|
||||
struct sk_buff *skb;
|
||||
struct endpoint endpoint;
|
||||
|
||||
if (unlikely(!in_skb))
|
||||
return -EINVAL;
|
||||
ret = wg_socket_endpoint_from_skb(&endpoint, in_skb);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
|
||||
if (unlikely(!skb))
|
||||
return -ENOMEM;
|
||||
skb_reserve(skb, SKB_HEADER_LEN);
|
||||
skb_set_inner_network_header(skb, 0);
|
||||
skb_put_data(skb, buffer, len);
|
||||
|
||||
if (endpoint.addr.sa_family == AF_INET)
|
||||
ret = send4(wg, skb, &endpoint, 0, NULL);
|
||||
else if (endpoint.addr.sa_family == AF_INET6)
|
||||
ret = send6(wg, skb, &endpoint, 0, NULL);
|
||||
/* No other possibilities if the endpoint is valid, which it is,
|
||||
* as we checked above.
|
||||
*/
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
memset(endpoint, 0, sizeof(*endpoint));
|
||||
if (skb->protocol == htons(ETH_P_IP)) {
|
||||
endpoint->addr4.sin_family = AF_INET;
|
||||
endpoint->addr4.sin_port = udp_hdr(skb)->source;
|
||||
endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
|
||||
endpoint->src4.s_addr = ip_hdr(skb)->daddr;
|
||||
endpoint->src_if4 = skb->skb_iif;
|
||||
} else if (skb->protocol == htons(ETH_P_IPV6)) {
|
||||
endpoint->addr6.sin6_family = AF_INET6;
|
||||
endpoint->addr6.sin6_port = udp_hdr(skb)->source;
|
||||
endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
|
||||
endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(
|
||||
&ipv6_hdr(skb)->saddr, skb->skb_iif);
|
||||
endpoint->src6 = ipv6_hdr(skb)->daddr;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b)
|
||||
{
|
||||
return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET &&
|
||||
a->addr4.sin_port == b->addr4.sin_port &&
|
||||
a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr &&
|
||||
a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) ||
|
||||
(a->addr.sa_family == AF_INET6 &&
|
||||
b->addr.sa_family == AF_INET6 &&
|
||||
a->addr6.sin6_port == b->addr6.sin6_port &&
|
||||
ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) &&
|
||||
a->addr6.sin6_scope_id == b->addr6.sin6_scope_id &&
|
||||
ipv6_addr_equal(&a->src6, &b->src6)) ||
|
||||
unlikely(!a->addr.sa_family && !b->addr.sa_family);
|
||||
}
|
||||
|
||||
void wg_socket_set_peer_endpoint(struct wg_peer *peer,
|
||||
const struct endpoint *endpoint)
|
||||
{
|
||||
/* First we check unlocked, in order to optimize, since it's pretty rare
|
||||
* that an endpoint will change. If we happen to be mid-write, and two
|
||||
* CPUs wind up writing the same thing or something slightly different,
|
||||
* it doesn't really matter much either.
|
||||
*/
|
||||
if (endpoint_eq(endpoint, &peer->endpoint))
|
||||
return;
|
||||
write_lock_bh(&peer->endpoint_lock);
|
||||
if (endpoint->addr.sa_family == AF_INET) {
|
||||
peer->endpoint.addr4 = endpoint->addr4;
|
||||
peer->endpoint.src4 = endpoint->src4;
|
||||
peer->endpoint.src_if4 = endpoint->src_if4;
|
||||
} else if (endpoint->addr.sa_family == AF_INET6) {
|
||||
peer->endpoint.addr6 = endpoint->addr6;
|
||||
peer->endpoint.src6 = endpoint->src6;
|
||||
} else {
|
||||
goto out;
|
||||
}
|
||||
dst_cache_reset(&peer->endpoint_cache);
|
||||
out:
|
||||
write_unlock_bh(&peer->endpoint_lock);
|
||||
}
|
||||
|
||||
void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
|
||||
const struct sk_buff *skb)
|
||||
{
|
||||
struct endpoint endpoint;
|
||||
|
||||
if (!wg_socket_endpoint_from_skb(&endpoint, skb))
|
||||
wg_socket_set_peer_endpoint(peer, &endpoint);
|
||||
}
|
||||
|
||||
void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
|
||||
{
|
||||
write_lock_bh(&peer->endpoint_lock);
|
||||
memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
|
||||
dst_cache_reset(&peer->endpoint_cache);
|
||||
write_unlock_bh(&peer->endpoint_lock);
|
||||
}
|
||||
|
||||
static int wg_receive(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct wg_device *wg;
|
||||
|
||||
if (unlikely(!sk))
|
||||
goto err;
|
||||
wg = sk->sk_user_data;
|
||||
if (unlikely(!wg))
|
||||
goto err;
|
||||
skb_mark_not_on_list(skb);
|
||||
wg_packet_receive(wg, skb);
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kfree_skb(skb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sock_free(struct sock *sock)
|
||||
{
|
||||
if (unlikely(!sock))
|
||||
return;
|
||||
sk_clear_memalloc(sock);
|
||||
udp_tunnel_sock_release(sock->sk_socket);
|
||||
}
|
||||
|
||||
static void set_sock_opts(struct socket *sock)
|
||||
{
|
||||
sock->sk->sk_allocation = GFP_ATOMIC;
|
||||
sock->sk->sk_sndbuf = INT_MAX;
|
||||
sk_set_memalloc(sock->sk);
|
||||
}
|
||||
|
||||
int wg_socket_init(struct wg_device *wg, u16 port)
|
||||
{
|
||||
struct net *net;
|
||||
int ret;
|
||||
struct udp_tunnel_sock_cfg cfg = {
|
||||
.sk_user_data = wg,
|
||||
.encap_type = 1,
|
||||
.encap_rcv = wg_receive
|
||||
};
|
||||
struct socket *new4 = NULL, *new6 = NULL;
|
||||
struct udp_port_cfg port4 = {
|
||||
.family = AF_INET,
|
||||
.local_ip.s_addr = htonl(INADDR_ANY),
|
||||
.local_udp_port = htons(port),
|
||||
.use_udp_checksums = true
|
||||
};
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
int retries = 0;
|
||||
struct udp_port_cfg port6 = {
|
||||
.family = AF_INET6,
|
||||
.local_ip6 = IN6ADDR_ANY_INIT,
|
||||
.use_udp6_tx_checksums = true,
|
||||
.use_udp6_rx_checksums = true,
|
||||
.ipv6_v6only = true
|
||||
};
|
||||
#endif
|
||||
|
||||
rcu_read_lock();
|
||||
net = rcu_dereference(wg->creating_net);
|
||||
net = net ? maybe_get_net(net) : NULL;
|
||||
rcu_read_unlock();
|
||||
if (unlikely(!net))
|
||||
return -ENONET;
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
retry:
|
||||
#endif
|
||||
|
||||
ret = udp_sock_create(net, &port4, &new4);
|
||||
if (ret < 0) {
|
||||
pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
|
||||
goto out;
|
||||
}
|
||||
set_sock_opts(new4);
|
||||
setup_udp_tunnel_sock(net, new4, &cfg);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
if (ipv6_mod_enabled()) {
|
||||
port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
|
||||
ret = udp_sock_create(net, &port6, &new6);
|
||||
if (ret < 0) {
|
||||
udp_tunnel_sock_release(new4);
|
||||
if (ret == -EADDRINUSE && !port && retries++ < 100)
|
||||
goto retry;
|
||||
pr_err("%s: Could not create IPv6 socket\n",
|
||||
wg->dev->name);
|
||||
goto out;
|
||||
}
|
||||
set_sock_opts(new6);
|
||||
setup_udp_tunnel_sock(net, new6, &cfg);
|
||||
}
|
||||
#endif
|
||||
|
||||
wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
|
||||
ret = 0;
|
||||
out:
|
||||
put_net(net);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
|
||||
struct sock *new6)
|
||||
{
|
||||
struct sock *old4, *old6;
|
||||
|
||||
mutex_lock(&wg->socket_update_lock);
|
||||
old4 = rcu_dereference_protected(wg->sock4,
|
||||
lockdep_is_held(&wg->socket_update_lock));
|
||||
old6 = rcu_dereference_protected(wg->sock6,
|
||||
lockdep_is_held(&wg->socket_update_lock));
|
||||
rcu_assign_pointer(wg->sock4, new4);
|
||||
rcu_assign_pointer(wg->sock6, new6);
|
||||
if (new4)
|
||||
wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
|
||||
mutex_unlock(&wg->socket_update_lock);
|
||||
synchronize_rcu();
|
||||
sock_free(old4);
|
||||
sock_free(old6);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user