rand: new-style locking and support rdrand.
Pure /dev/urandom, no buffering (previous behaviour): Did 2320000 RNG (16 bytes) operations in 3000082us (773312.2 ops/sec): 12.4 MB/s Did 209000 RNG (256 bytes) operations in 3011984us (69389.5 ops/sec): 17.8 MB/s Did 6851 RNG (8192 bytes) operations in 3052027us (2244.7 ops/sec): 18.4 MB/s Pure rdrand speed: Did 34930500 RNG (16 bytes) operations in 3000021us (11643418.5 ops/sec): 186.3 MB/s Did 2444000 RNG (256 bytes) operations in 3000164us (814622.1 ops/sec): 208.5 MB/s Did 80000 RNG (8192 bytes) operations in 3020968us (26481.6 ops/sec): 216.9 MB/s rdrand + ChaCha (as in this change): Did 19498000 RNG (16 bytes) operations in 3000086us (6499147.0 ops/sec): 104.0 MB/s Did 1964000 RNG (256 bytes) operations in 3000566us (654543.2 ops/sec): 167.6 MB/s Did 62000 RNG (8192 bytes) operations in 3034090us (20434.5 ops/sec): 167.4 MB/s Change-Id: Ie17045650cfe75858e4498ac28dbc4dcf8338376 Reviewed-on: https://boringssl-review.googlesource.com/4328 Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
@@ -434,6 +434,7 @@ void CRYPTO_STATIC_MUTEX_unlock(struct CRYPTO_STATIC_MUTEX *lock);
|
||||
* stored. */
|
||||
typedef enum {
|
||||
OPENSSL_THREAD_LOCAL_ERR = 0,
|
||||
OPENSSL_THREAD_LOCAL_RAND,
|
||||
OPENSSL_THREAD_LOCAL_TEST,
|
||||
NUM_OPENSSL_THREAD_LOCALS,
|
||||
} thread_local_data_t;
|
||||
|
||||
@@ -1,5 +1,13 @@
|
||||
include_directories(. .. ../../include)
|
||||
|
||||
if (${ARCH} STREQUAL "x86_64")
|
||||
set(
|
||||
RAND_ARCH_SOURCES
|
||||
|
||||
rdrand-x86_64.${ASM_EXT}
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
rand
|
||||
|
||||
@@ -8,4 +16,9 @@ add_library(
|
||||
rand.c
|
||||
urandom.c
|
||||
windows.c
|
||||
hwrand.c
|
||||
|
||||
${RAND_ARCH_SOURCES}
|
||||
)
|
||||
|
||||
perlasm(rdrand-x86_64.${ASM_EXT} asm/rdrand-x86_64.pl)
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
$flavour = shift;
|
||||
$output = shift;
|
||||
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||
die "can't locate x86_64-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
print<<___;
|
||||
.text
|
||||
|
||||
.globl CRYPTO_rdrand
|
||||
.type CRYPTO_rdrand,\@function,1
|
||||
.align 16
|
||||
CRYPTO_rdrand:
|
||||
.byte 0x48, 0x0f, 0xc7, 0xf0
|
||||
retq
|
||||
___
|
||||
|
||||
close STDOUT; # flush
|
||||
@@ -0,0 +1,56 @@
|
||||
/* Copyright (c) 2015, Google Inc.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
#include <openssl/rand.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <openssl/cpu.h>
|
||||
|
||||
|
||||
#if defined(OPENSSL_X86_64)
|
||||
|
||||
int CRYPTO_have_hwrand(void) {
|
||||
return (OPENSSL_ia32cap_P[1] & (1u << 30)) != 0;
|
||||
}
|
||||
|
||||
/* CRYPTO_rdrand is defined in asm/rdrand-x86_64.pl */
|
||||
extern uint64_t CRYPTO_rdrand();
|
||||
|
||||
void CRYPTO_hwrand(uint8_t *buf, size_t len) {
|
||||
while (len >= 8) {
|
||||
uint64_t rand = CRYPTO_rdrand();
|
||||
memcpy(buf, &rand, sizeof(rand));
|
||||
len -= sizeof(rand);
|
||||
buf += sizeof(rand);
|
||||
}
|
||||
|
||||
if (len > 0) {
|
||||
uint64_t rand = CRYPTO_rdrand();
|
||||
memcpy(buf, &rand, len);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int CRYPTO_have_hwrand(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CRYPTO_hwrand(uint8_t *buf, size_t len) {
|
||||
abort();
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,40 @@
|
||||
/* Copyright (c) 2015, Google Inc.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
#ifndef OPENSSL_HEADER_CRYPTO_RAND_INTERNAL_H
|
||||
#define OPENSSL_HEADER_CRYPTO_RAND_INTERNAL_H
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* CRYPTO_sysrand fills |len| bytes at |buf| with entropy from the operating
|
||||
* system. */
|
||||
void CRYPTO_sysrand(uint8_t *buf, size_t len);
|
||||
|
||||
/* CRYPTO_have_hwrand returns one iff |CRYPTO_hwrand| can be called to generate
|
||||
* hardware entropy. */
|
||||
int CRYPTO_have_hwrand(void);
|
||||
|
||||
/* CRYPTO_hwrand fills |len| bytes at |buf| with entropy from the hardware.
|
||||
* This function can only be called if |CRYPTO_have_hwrand| returns one. */
|
||||
void CRYPTO_hwrand(uint8_t *buf, size_t len);
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_CRYPTO_RAND_INTERNAL_H */
|
||||
@@ -14,6 +14,134 @@
|
||||
|
||||
#include <openssl/rand.h>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <openssl/mem.h>
|
||||
|
||||
#include "internal.h"
|
||||
#include "../internal.h"
|
||||
|
||||
|
||||
/* It's assumed that the operating system always has an unfailing source of
|
||||
* entropy which is accessed via |CRYPTO_sysrand|. (If the operating system
|
||||
* entropy source fails, it's up to |CRYPTO_sysrand| to abort the process—we
|
||||
* don't try to handle it.)
|
||||
*
|
||||
* In addition, the hardware may provide a low-latency RNG. Intel's rdrand
|
||||
* instruction is the canonical example of this. When a hardware RNG is
|
||||
* available we don't need to worry about an RNG failure arising from fork()ing
|
||||
* the process or moving a VM, so we can keep thread-local RNG state and XOR
|
||||
* the hardware entropy in.
|
||||
*
|
||||
* (We assume that the OS entropy is safe from fork()ing and VM duplication.
|
||||
* This might be a bit of a leap of faith, esp on Windows, but there's nothing
|
||||
* that we can do about it.) */
|
||||
|
||||
/* rand_thread_state contains the per-thread state for the RNG. This is only
|
||||
* used if the system has support for a hardware RNG. */
|
||||
struct rand_thread_state {
|
||||
uint8_t key[32];
|
||||
uint64_t calls_used;
|
||||
size_t bytes_used;
|
||||
uint8_t partial_block[64];
|
||||
unsigned partial_block_used;
|
||||
};
|
||||
|
||||
/* kMaxCallsPerRefresh is the maximum number of |RAND_bytes| calls that we'll
|
||||
* serve before reading a new key from the operating system. This only applies
|
||||
* if we have a hardware RNG. */
|
||||
static const unsigned kMaxCallsPerRefresh = 1024;
|
||||
|
||||
/* kMaxBytesPerRefresh is the maximum number of bytes that we'll return from
|
||||
* |RAND_bytes| before reading a new key from the operating system. This only
|
||||
* applies if we have a hardware RNG. */
|
||||
static const uint64_t kMaxBytesPerRefresh = 1024 * 1024;
|
||||
|
||||
/* rand_thread_state_free frees a |rand_thread_state|. This is called when a
|
||||
* thread exits. */
|
||||
static void rand_thread_state_free(void *state) {
|
||||
if (state == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
OPENSSL_cleanse(state, sizeof(struct rand_thread_state));
|
||||
OPENSSL_free(state);
|
||||
}
|
||||
|
||||
extern void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len,
|
||||
const uint8_t key[32], const uint8_t nonce[8],
|
||||
size_t counter);
|
||||
|
||||
int RAND_bytes(uint8_t *buf, const size_t len) {
|
||||
if (len == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!CRYPTO_have_hwrand()) {
|
||||
/* Without a hardware RNG to save us from address-space duplication, the OS
|
||||
* entropy is used directly. */
|
||||
CRYPTO_sysrand(buf, len);
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct rand_thread_state *state =
|
||||
CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_RAND);
|
||||
if (state == NULL) {
|
||||
state = OPENSSL_malloc(sizeof(struct rand_thread_state));
|
||||
if (state == NULL ||
|
||||
!CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_RAND, state,
|
||||
rand_thread_state_free)) {
|
||||
CRYPTO_sysrand(buf, len);
|
||||
return 1;
|
||||
}
|
||||
|
||||
state->calls_used = kMaxCallsPerRefresh;
|
||||
}
|
||||
|
||||
if (state->calls_used >= kMaxCallsPerRefresh ||
|
||||
state->bytes_used >= kMaxBytesPerRefresh) {
|
||||
CRYPTO_sysrand(state->key, sizeof(state->key));
|
||||
state->calls_used = 0;
|
||||
state->bytes_used = 0;
|
||||
state->partial_block_used = sizeof(state->partial_block);
|
||||
}
|
||||
|
||||
CRYPTO_hwrand(buf, len);
|
||||
|
||||
if (len >= sizeof(state->partial_block)) {
|
||||
size_t remaining = len;
|
||||
while (remaining > 0) {
|
||||
// kMaxBytesPerCall is only 2GB, while ChaCha can handle 256GB. But this
|
||||
// is sufficient and easier on 32-bit.
|
||||
static const size_t kMaxBytesPerCall = 0x80000000;
|
||||
size_t todo = remaining;
|
||||
if (todo > kMaxBytesPerCall) {
|
||||
todo = kMaxBytesPerCall;
|
||||
}
|
||||
CRYPTO_chacha_20(buf, buf, todo, state->key,
|
||||
(uint8_t *)&state->calls_used, 0);
|
||||
buf += todo;
|
||||
remaining -= todo;
|
||||
state->calls_used++;
|
||||
}
|
||||
} else {
|
||||
if (sizeof(state->partial_block) - state->partial_block_used < len) {
|
||||
CRYPTO_chacha_20(state->partial_block, state->partial_block,
|
||||
sizeof(state->partial_block), state->key,
|
||||
(uint8_t *)&state->calls_used, 0);
|
||||
state->partial_block_used = 0;
|
||||
}
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < len; i++) {
|
||||
buf[i] ^= state->partial_block[state->partial_block_used++];
|
||||
}
|
||||
state->calls_used++;
|
||||
}
|
||||
state->bytes_used += len;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int RAND_pseudo_bytes(uint8_t *buf, size_t len) {
|
||||
return RAND_bytes(buf, len);
|
||||
|
||||
+27
-25
@@ -25,6 +25,9 @@
|
||||
#include <openssl/thread.h>
|
||||
#include <openssl/mem.h>
|
||||
|
||||
#include "internal.h"
|
||||
#include "../internal.h"
|
||||
|
||||
|
||||
/* This file implements a PRNG by reading from /dev/urandom, optionally with a
|
||||
* fork-safe buffer.
|
||||
@@ -72,20 +75,22 @@ struct rand_buffer {
|
||||
/* rand_bytes_per_buf is the number of actual entropy bytes in a buffer. */
|
||||
static const size_t rand_bytes_per_buf = BUF_SIZE - sizeof(struct rand_buffer);
|
||||
|
||||
static struct CRYPTO_STATIC_MUTEX global_lock = CRYPTO_STATIC_MUTEX_INIT;
|
||||
|
||||
/* list_head is the start of a global, linked-list of rand_buffer objects. It's
|
||||
* protected by CRYPTO_LOCK_RAND. */
|
||||
* protected by |global_lock|. */
|
||||
static struct rand_buffer *list_head;
|
||||
|
||||
/* urandom_fd is a file descriptor to /dev/urandom. It's protected by
|
||||
* CRYPTO_LOCK_RAND. */
|
||||
* |global_lock|. */
|
||||
static int urandom_fd = -2;
|
||||
|
||||
/* urandom_buffering controls whether buffering is enabled (1) or not (0). This
|
||||
* is protected by CRYPTO_LOCK_RAND. */
|
||||
* is protected by |global_lock|. */
|
||||
static int urandom_buffering = 0;
|
||||
|
||||
/* urandom_get_fd_locked returns a file descriptor to /dev/urandom. The caller
|
||||
* of this function must hold CRYPTO_LOCK_RAND. */
|
||||
* of this function must hold |global_lock|. */
|
||||
static int urandom_get_fd_locked(void) {
|
||||
if (urandom_fd != -2) {
|
||||
return urandom_fd;
|
||||
@@ -100,7 +105,7 @@ static int urandom_get_fd_locked(void) {
|
||||
void RAND_cleanup(void) {
|
||||
struct rand_buffer *cur;
|
||||
|
||||
CRYPTO_w_lock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_lock_write(&global_lock);
|
||||
while ((cur = list_head)) {
|
||||
list_head = cur->next;
|
||||
OPENSSL_free(cur);
|
||||
@@ -110,7 +115,7 @@ void RAND_cleanup(void) {
|
||||
}
|
||||
urandom_fd = -2;
|
||||
list_head = NULL;
|
||||
CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_unlock(&global_lock);
|
||||
}
|
||||
|
||||
/* read_full reads exactly |len| bytes from |fd| into |out| and returns 1. In
|
||||
@@ -133,36 +138,34 @@ static char read_full(int fd, uint8_t *out, size_t len) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* urandom_rand_pseudo_bytes puts |num| random bytes into |out|. It returns
|
||||
* one on success and zero otherwise. */
|
||||
int RAND_bytes(uint8_t *out, size_t requested) {
|
||||
/* CRYPTO_sysrand puts |num| random bytes into |out|. */
|
||||
void CRYPTO_sysrand(uint8_t *out, size_t requested) {
|
||||
int fd;
|
||||
struct rand_buffer *buf;
|
||||
size_t todo;
|
||||
pid_t pid, ppid;
|
||||
|
||||
if (requested == 0) {
|
||||
return 1;
|
||||
return;
|
||||
}
|
||||
|
||||
CRYPTO_w_lock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_lock_write(&global_lock);
|
||||
fd = urandom_get_fd_locked();
|
||||
|
||||
if (fd < 0) {
|
||||
CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_unlock(&global_lock);
|
||||
abort();
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* If buffering is not enabled, or if the request is large, then the
|
||||
* result comes directly from urandom. */
|
||||
if (!urandom_buffering || requested > BUF_SIZE / 2) {
|
||||
CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_unlock(&global_lock);
|
||||
if (!read_full(fd, out, requested)) {
|
||||
abort();
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
return;
|
||||
}
|
||||
|
||||
pid = getpid();
|
||||
@@ -174,8 +177,8 @@ int RAND_bytes(uint8_t *out, size_t requested) {
|
||||
rand_bytes_per_buf - buf->used >= requested) {
|
||||
memcpy(out, &buf->rand[buf->used], requested);
|
||||
buf->used += requested;
|
||||
CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
|
||||
return 1;
|
||||
CRYPTO_STATIC_MUTEX_unlock(&global_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we don't immediately have enough entropy with the correct
|
||||
@@ -184,13 +187,13 @@ int RAND_bytes(uint8_t *out, size_t requested) {
|
||||
if (buf) {
|
||||
list_head = buf->next;
|
||||
}
|
||||
CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_unlock(&global_lock);
|
||||
|
||||
if (!buf) {
|
||||
buf = (struct rand_buffer *)OPENSSL_malloc(BUF_SIZE);
|
||||
if (!buf) {
|
||||
abort();
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
/* The buffer doesn't contain any random bytes yet
|
||||
* so we mark it as fully used so that it will be
|
||||
@@ -208,7 +211,7 @@ int RAND_bytes(uint8_t *out, size_t requested) {
|
||||
/* We have forked and so cannot use these bytes as they
|
||||
* may have been used in another process. */
|
||||
OPENSSL_free(buf);
|
||||
CRYPTO_w_lock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_lock_write(&global_lock);
|
||||
}
|
||||
|
||||
while (requested > 0) {
|
||||
@@ -228,18 +231,17 @@ int RAND_bytes(uint8_t *out, size_t requested) {
|
||||
if (!read_full(fd, buf->rand, rand_bytes_per_buf)) {
|
||||
OPENSSL_free(buf);
|
||||
abort();
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
|
||||
buf->used = 0;
|
||||
}
|
||||
|
||||
CRYPTO_w_lock(CRYPTO_LOCK_RAND);
|
||||
CRYPTO_STATIC_MUTEX_lock_write(&global_lock);
|
||||
assert(list_head != buf);
|
||||
buf->next = list_head;
|
||||
list_head = buf;
|
||||
CRYPTO_w_unlock(CRYPTO_LOCK_RAND);
|
||||
return 1;
|
||||
CRYPTO_STATIC_MUTEX_unlock(&global_lock);
|
||||
}
|
||||
|
||||
#endif /* !OPENSSL_WINDOWS */
|
||||
|
||||
@@ -32,11 +32,13 @@
|
||||
|
||||
#pragma warning(pop)
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
void RAND_cleanup(void) {
|
||||
}
|
||||
|
||||
int RAND_bytes(uint8_t *out, size_t requested) {
|
||||
void CRYPTO_sysrand(uint8_t *out, size_t requested) {
|
||||
while (requested > 0) {
|
||||
ULONG output_bytes_this_pass = ULONG_MAX;
|
||||
if (requested < output_bytes_this_pass) {
|
||||
@@ -48,7 +50,7 @@ int RAND_bytes(uint8_t *out, size_t requested) {
|
||||
requested -= output_bytes_this_pass;
|
||||
out += output_bytes_this_pass;
|
||||
}
|
||||
return 1;
|
||||
return;
|
||||
}
|
||||
|
||||
#endif /* OPENSSL_WINDOWS */
|
||||
|
||||
@@ -25,8 +25,7 @@ extern "C" {
|
||||
/* Random number generation. */
|
||||
|
||||
|
||||
/* RAND_bytes writes |len| bytes of random data to |buf|. It returns one on
|
||||
* success and zero on otherwise. */
|
||||
/* RAND_bytes writes |len| bytes of random data to |buf| and returns one. */
|
||||
OPENSSL_EXPORT int RAND_bytes(uint8_t *buf, size_t len);
|
||||
|
||||
/* RAND_cleanup frees any resources used by the RNG. This is not safe if other
|
||||
|
||||
Reference in New Issue
Block a user