Guard the _GNU_SOURCE #define.

It is hard to control what flags consumers may try to build us with. Account for someone adding _GNU_SOURCE to the build line. Change-Id: I4c931da70a9dccc89382ce9100c228c29d28d4bf Reviewed-on: https://boringssl-review.googlesource.com/13621 Commit-Queue: David Benjamin <davidben@google.com> Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> (cherry picked from commit e025f30507)
Convert one libssl function to C++11.
2017-02-03 17:18:09 -05:00 · 2017-02-02 18:16:27 -05:00
1908 changed files with 56054 additions and 428050 deletions
@@ -4,21 +4,3 @@ ssl/test/runner/runner
 *.swo
 doc/*.html
 doc/doc.css
-
-util/bot/android_tools
-util/bot/cmake-linux64
-util/bot/cmake-linux64.tar.gz
-util/bot/cmake-mac
-util/bot/cmake-mac.tar.gz
-util/bot/cmake-win32
-util/bot/cmake-win32.zip
-util/bot/golang
-util/bot/gyp
-util/bot/libFuzzer
-util/bot/llvm-build
-util/bot/perl-win32
-util/bot/perl-win32.zip
-util/bot/sde-linux64
-util/bot/sde-linux64.tar.bz2
-util/bot/win_toolchain.json
-util/bot/yasm-win32.exe
@@ -14,10 +14,10 @@ All supported public APIs are documented in the public header files, found in
 Some headers lack documention comments. These are functions and structures from
 OpenSSL's legacy ASN.1, X.509, and PEM implementation. If possible, avoid using
 them. These are left largely unmodified from upstream and are retained only for
-compatibility with existing OpenSSL consumers.
+compatibilty with existing OpenSSL consumers.


-## Forward declarations
+# Forward declarations

 Do not write `typedef struct foo_st FOO` or try otherwise to define BoringSSL's
 types. Including `openssl/base.h` (or `openssl/ossl_typ.h` for consumers who
@@ -2,7 +2,7 @@

 ## Build Prerequisites

-  * [CMake](https://cmake.org/download/) 2.8.11 or later is required.
+  * [CMake](https://cmake.org/download/) 2.8.8 or later is required.

  * Perl 5.6.1 or later is required. On Windows,
    [Active State Perl](http://www.activestate.com/activeperl/) has been
@@ -33,7 +33,7 @@
    executable may be configured explicitly by setting `GO_EXECUTABLE`.

  * To build the x86 and x86\_64 assembly, your assembler must support AVX2
-    instructions and MOVBE. If using GNU binutils, you must have 2.22 or later.
+    instructions. If using GNU binutils, you must have 2.22 or later.

 ## Building

@@ -96,15 +96,6 @@ higher to build aarch64 binaries.

 For other options, see [android-cmake's documentation](./third_party/android-cmake/README.md).

-### Building for iOS
-
-To build for iOS, pass `-DCMAKE_OSX_SYSROOT=iphoneos` and
-`-DCMAKE_OSX_ARCHITECTURES=ARCH` to CMake, where `ARCH` is the desired
-architecture, matching values used in the `-arch` flag in Apple's toolchain.
-
-Passing multiple architectures for a multiple-architecture build is not
-supported.
-
 ## Known Limitations on Windows

  * Versions of CMake since 3.0.2 have a bug in its Ninja generator that causes
@@ -1,4 +1,4 @@
-cmake_minimum_required (VERSION 2.8.11)
+cmake_minimum_required (VERSION 2.8.10)

 # Defer enabling C and CXX languages.
 project (BoringSSL NONE)
@@ -9,8 +9,6 @@ if(WIN32)
  set(CMAKE_GENERATOR_CC cl)
 endif()

-include(sources.cmake)
-
 enable_language(C)
 enable_language(CXX)

@@ -36,18 +34,15 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
  set(C_CXX_FLAGS "-Wall -Werror -Wformat=2 -Wsign-compare -Wmissing-field-initializers -Wwrite-strings -ggdb -fvisibility=hidden -fno-common")
  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    set(C_CXX_FLAGS "${C_CXX_FLAGS} -Wnewline-eof")
-  else()
-    # GCC (at least 4.8.4) has a bug where it'll find unreachable free() calls
-    # and declare that the code is trying to free a stack pointer.
-    set(C_CXX_FLAGS "${C_CXX_FLAGS} -Wno-free-nonheap-object")
  endif()
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${C_CXX_FLAGS} -Wmissing-prototypes -Wold-style-definition -Wstrict-prototypes")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 ${C_CXX_FLAGS} -Wmissing-declarations")
+  # Clang's integerated assembler does not support debug symbols.
+  if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
+  endif()
 elseif(MSVC)
  set(MSVC_DISABLED_WARNINGS_LIST
-      "C4061" # enumerator 'identifier' in switch of enum 'enumeration' is not
-              # explicitly handled by a case label
-              # Disable this because it flags even when there is a default.
      "C4100" # 'exarg' : unreferenced formal parameter
      "C4127" # conditional expression is constant
      "C4200" # nonstandard extension used : zero-sized array in
@@ -83,16 +78,12 @@ elseif(MSVC)
              # copy constructor is inaccessible or deleted
      "C4626" # assignment operator could not be generated because a base class
              # assignment operator is inaccessible or deleted
-      "C4668" # 'symbol' is not defined as a preprocessor macro, replacing with
-              # '0' for 'directives'
-              # Disable this because GTest uses it everywhere.
      "C4706" # assignment within conditional expression
      "C4710" # 'function': function not inlined
      "C4711" # function 'function' selected for inline expansion
      "C4800" # 'int' : forcing value to bool 'true' or 'false'
              # (performance warning)
      "C4820" # 'bytes' bytes padding added after construct 'member_name'
-      "C5026" # move constructor was implicitly defined as deleted
      "C5027" # move assignment operator was implicitly defined as deleted
      )
  set(MSVC_LEVEL4_WARNINGS_LIST
@@ -145,8 +136,8 @@ if(FUZZ)
    set(RUNNER_ARGS ${RUNNER_ARGS} "-fuzzer" "-shim-config" "fuzzer_mode.json")
  endif()

-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fsanitize-coverage=edge,indirect-calls,trace-pc-guard")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize-coverage=edge,indirect-calls,trace-pc-guard")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fsanitize-coverage=edge,indirect-calls,8bit-counters")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize-coverage=edge,indirect-calls,8bit-counters")
  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
  link_directories(.)
 endif()
@@ -160,57 +151,7 @@ if (BUILD_SHARED_LIBS)
  set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
 endif()

-if (MSAN)
-  if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    message(FATAL_ERROR "Cannot enable MSAN unless using Clang")
-  endif()
-
-  if (ASAN)
-    message(FATAL_ERROR "ASAN and MSAN are mutually exclusive")
-  endif()
-
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=memory -fsanitize-memory-track-origins -fno-omit-frame-pointer")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=memory -fsanitize-memory-track-origins -fno-omit-frame-pointer")
-  set(OPENSSL_NO_ASM "1")
-endif()
-
-if (ASAN)
-  if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    message(FATAL_ERROR "Cannot enable ASAN unless using Clang")
-  endif()
-
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fsanitize-address-use-after-scope -fno-omit-frame-pointer")
-  set(OPENSSL_NO_ASM "1")
-endif()
-
-if (GCOV)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
-endif()
-
-if(FIPS)
-  add_definitions(-DBORINGSSL_FIPS)
-endif()
-
-# CMake's iOS support uses Apple's multiple-architecture toolchain. It takes an
-# architecture list from CMAKE_OSX_ARCHITECTURES, leaves CMAKE_SYSTEM_PROCESSOR
-# alone, and expects all architecture-specific logic to be conditioned within
-# the source files rather than the build. This does not work for our assembly
-# files, so we fix CMAKE_SYSTEM_PROCESSOR and only support single-architecture
-# builds.
-if (NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES)
-  list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES)
-  if (NOT ${NUM_ARCHES} EQUAL 1)
-    message(FATAL_ERROR "Universal binaries not supported.")
-  endif()
-  list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR)
-endif()
-
-if (OPENSSL_NO_ASM)
-  add_definitions(-DOPENSSL_NO_ASM)
-  set(ARCH "generic")
-elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
+if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
  set(ARCH "x86_64")
 elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
  set(ARCH "x86_64")
@@ -227,17 +168,12 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
  set(ARCH "x86")
 elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
  set(ARCH "x86")
-elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
-  set(ARCH "aarch64")
-elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
-  set(ARCH "aarch64")
 elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm*")
  set(ARCH "arm")
+elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
+  set(ARCH "aarch64")
 elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
  # Just to avoid the “unknown processor” error.
-  set(ARCH "generic")
-elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
-  set(ARCH "ppc64le")
 else()
  message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
 endif()
@@ -256,42 +192,22 @@ if (${ARCH} STREQUAL "x86" AND APPLE)
  set(ARCH "x86_64")
 endif()

-# Add minimal googletest targets. The provided one has many side-effects, and
-# googletest has a very straightforward build.
-add_library(gtest third_party/googletest/src/gtest-all.cc)
-target_include_directories(gtest PRIVATE third_party/googletest)
-
-include_directories(third_party/googletest/include)
+if (OPENSSL_NO_ASM)
+  add_definitions(-DOPENSSL_NO_ASM)
+  set(ARCH "generic")
+endif()

 # Declare a dummy target to build all unit tests. Test targets should inject
 # themselves as dependencies next to the target definition.
 add_custom_target(all_tests)

-add_custom_command(
-  OUTPUT crypto_test_data.cc
-  COMMAND ${GO_EXECUTABLE} run util/embed_test_data.go ${CRYPTO_TEST_DATA} >
-  ${CMAKE_CURRENT_BINARY_DIR}/crypto_test_data.cc
-  DEPENDS util/embed_test_data.go ${CRYPTO_TEST_DATA}
-  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
-
-add_library(crypto_test_data OBJECT crypto_test_data.cc)
-
 add_subdirectory(crypto)
 add_subdirectory(ssl)
 add_subdirectory(ssl/test)
-add_subdirectory(fipstools)
 add_subdirectory(tool)
 add_subdirectory(decrepit)

 if(FUZZ)
-  if(LIBFUZZER_FROM_DEPS)
-    file(GLOB LIBFUZZER_SOURCES "util/bot/libFuzzer/*.cpp")
-    add_library(Fuzzer STATIC ${LIBFUZZER_SOURCES})
-    # libFuzzer does not pass our aggressive warnings. It also must be built
-    # without -fsanitize-coverage options or clang crashes.
-    set_target_properties(Fuzzer PROPERTIES COMPILE_FLAGS "-Wno-shadow -Wno-format-nonliteral -fsanitize-coverage=0")
-  endif()
-
  add_subdirectory(fuzz)
 endif()

@@ -23,7 +23,7 @@ Then copy `libFuzzer.a` to the top-level of your BoringSSL source directory.
 From the `build/` directory, you can then run the fuzzers. For example:

 ```
-./fuzz/cert -max_len=10000 -jobs=32 -workers=32 ../fuzz/cert_corpus/
+./fuzz/cert -max_len=3072 -jobs=32 -workers=32 ../fuzz/cert_corpus/
 ```

 The arguments to `jobs` and `workers` should be the number of cores that you wish to dedicate to fuzzing. By default, libFuzzer uses the largest test in the corpus (or 64 if empty) as the maximum test case length. The `max_len` argument overrides this.
@@ -32,12 +32,11 @@ The recommended values of `max_len` for each test are:

 | Test          | `max_len` value |
 |---------------|-----------------|
-| `cert`        | 10000           |
+| `cert`        | 3072            |
 | `client`      | 20000           |
 | `pkcs8`       | 2048            |
 | `privkey`     | 2048            |
 | `server`      | 4096            |
-| `session`     | 8192            |
 | `spki`        | 1024            |
 | `read_pem`    | 512             |
 | `ssl_ctx_api` | 256             |
@@ -80,5 +79,5 @@ If both sets of tests pass, refresh the fuzzer corpora with `refresh_ssl_corpora

 ```
 cd fuzz
-./refresh_ssl_corpora.sh /path/to/fuzzer/mode/build /path/to/non/fuzzer/mode/build
+./refresh_fuzzer_corpora.sh /path/to/fuzzer/mode/build /path/to/non/fuzzer/mode/build
 ```
@@ -227,7 +227,6 @@ parameter.
 `SSL_CTRL_OPTIONS` | `SSL_CTX_get_options` or `SSL_CTX_set_options`
 `SSL_CTRL_SESS_NUMBER` | `SSL_CTX_sess_number`
 `SSL_CTRL_SET_CURVES` | `SSL_CTX_set1_curves`
-`SSL_CTRL_SET_ECDH_AUTO` | `SSL_CTX_set_ecdh_auto`
 `SSL_CTRL_SET_MAX_CERT_LIST` | `SSL_CTX_set_max_cert_list`
 `SSL_CTRL_SET_MAX_SEND_FRAGMENT` | `SSL_CTX_set_max_send_fragment`
 `SSL_CTRL_SET_MSG_CALLBACK` | `SSL_set_msg_callback`
@@ -45,16 +45,6 @@ not
 Rather than `malloc()` and `free()`, use the wrappers `OPENSSL_malloc()`
 and `OPENSSL_free()`. Use the standard C `assert()` function freely.

-Use the following wrappers, found in `crypto/internal.h` instead of the
-corresponding C standard library functions. They behave the same but avoid
-confusing undefined behavior.
-
-* `OPENSSL_memchr`
-* `OPENSSL_memcmp`
-* `OPENSSL_memcpy`
-* `OPENSSL_memmove`
-* `OPENSSL_memset`
-
 For new constants, prefer enums when the values are sequential and typed
 constants for flags. If adding values to an existing set of `#define`s,
 continue with `#define`.
@@ -1,54 +1,36 @@
 include_directories(../include)

-if(UNIX)
+if(APPLE)
+  if (${ARCH} STREQUAL "x86")
+    set(PERLASM_FLAGS "-fPIC -DOPENSSL_IA32_SSE2")
+  endif()
+  set(PERLASM_STYLE macosx)
+  set(ASM_EXT S)
+  enable_language(ASM)
+elseif(UNIX)
  if (${ARCH} STREQUAL "aarch64")
    # The "armx" Perl scripts look for "64" in the style argument
    # in order to decide whether to generate 32- or 64-bit asm.
-    if (APPLE)
-      set(PERLASM_STYLE ios64)
-    else()
-      set(PERLASM_STYLE linux64)
-    endif()
+    set(PERLASM_STYLE linux64)
  elseif (${ARCH} STREQUAL "arm")
-    if (APPLE)
-      set(PERLASM_STYLE ios32)
-    else()
-      set(PERLASM_STYLE linux32)
-    endif()
+    set(PERLASM_STYLE linux32)
+  elseif (${ARCH} STREQUAL "x86")
+    set(PERLASM_FLAGS "-fPIC -DOPENSSL_IA32_SSE2")
+    set(PERLASM_STYLE elf)
  elseif (${ARCH} STREQUAL "ppc64le")
    set(PERLASM_STYLE ppc64le)
  else()
-    if (${ARCH} STREQUAL "x86")
-      set(PERLASM_FLAGS "-fPIC -DOPENSSL_IA32_SSE2")
-    endif()
-    if (APPLE)
-      set(PERLASM_STYLE macosx)
-    else()
-      set(PERLASM_STYLE elf)
-    endif()
+    set(PERLASM_STYLE elf)
  endif()
  set(ASM_EXT S)
  enable_language(ASM)
  set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,--noexecstack")
-
-  # Clang's integerated assembler does not support debug symbols.
-  if(NOT CMAKE_ASM_COMPILER_ID MATCHES "Clang")
-    set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -Wa,-g")
-  endif()
-
-  # CMake does not add -isysroot and -arch flags to assembly.
-  if (APPLE)
-    if (CMAKE_OSX_SYSROOT)
-      set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
-    endif()
-    foreach(arch ${CMAKE_OSX_ARCHITECTURES})
-      set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -arch ${arch}")
-    endforeach()
-  endif()
 else()
  if (CMAKE_CL_64)
+    message("Using nasm")
    set(PERLASM_STYLE nasm)
  else()
+    message("Using win32n")
    set(PERLASM_STYLE win32n)
    set(PERLASM_FLAGS "-DOPENSSL_IA32_SSE2")
  endif()
@@ -85,18 +67,25 @@ add_subdirectory(bytestring)
 add_subdirectory(pool)

 # Level 0.2 - depends on nothing but itself
+add_subdirectory(sha)
+add_subdirectory(md4)
+add_subdirectory(md5)
+add_subdirectory(modes)
+add_subdirectory(aes)
+add_subdirectory(des)
 add_subdirectory(rc4)
 add_subdirectory(conf)
 add_subdirectory(chacha)
 add_subdirectory(poly1305)
 add_subdirectory(curve25519)
+add_subdirectory(newhope)

 # Level 1, depends only on 0.*
-add_subdirectory(digest_extra)
-add_subdirectory(cipher_extra)
-add_subdirectory(rand_extra)
+add_subdirectory(digest)
+add_subdirectory(cipher)
+add_subdirectory(rand)
 add_subdirectory(bio)
-add_subdirectory(bn_extra)
+add_subdirectory(bn)
 add_subdirectory(obj)
 add_subdirectory(asn1)

@@ -104,11 +93,11 @@ add_subdirectory(asn1)
 add_subdirectory(engine)
 add_subdirectory(dh)
 add_subdirectory(dsa)
-add_subdirectory(rsa_extra)
-add_subdirectory(ec_extra)
+add_subdirectory(rsa)
+add_subdirectory(ec)
 add_subdirectory(ecdh)
-add_subdirectory(ecdsa_extra)
-add_subdirectory(hmac_extra)
+add_subdirectory(ecdsa)
+add_subdirectory(hmac)

 # Level 3
 add_subdirectory(cmac)
@@ -119,18 +108,13 @@ add_subdirectory(x509)
 add_subdirectory(x509v3)

 # Level 4
-add_subdirectory(pkcs7)
 add_subdirectory(pkcs8)

 # Test support code
 add_subdirectory(test)

-add_subdirectory(fipsmodule)
-
 add_library(
-  crypto_base
-
-  OBJECT
+  crypto

  cpu-aarch64-linux.c
  cpu-arm.c
@@ -146,72 +130,66 @@ add_library(
  thread_none.c
  thread_pthread.c
  thread_win.c
-)
+  time_support.c

-if(FIPS)
-  SET_SOURCE_FILES_PROPERTIES(fipsmodule/bcm.o PROPERTIES EXTERNAL_OBJECT true)
-  SET_SOURCE_FILES_PROPERTIES(fipsmodule/bcm.o PROPERTIES GENERATED true)
-
-  set(
-    CRYPTO_FIPS_OBJECTS
-
-    fipsmodule/bcm.o
-  )
-endif()
-
-add_library(
-  crypto
-
-  $<TARGET_OBJECTS:crypto_base>
  $<TARGET_OBJECTS:stack>
  $<TARGET_OBJECTS:lhash>
  $<TARGET_OBJECTS:err>
  $<TARGET_OBJECTS:base64>
  $<TARGET_OBJECTS:bytestring>
  $<TARGET_OBJECTS:pool>
-  $<TARGET_OBJECTS:fipsmodule>
-  $<TARGET_OBJECTS:digest_extra>
-  $<TARGET_OBJECTS:cipher_extra>
+  $<TARGET_OBJECTS:sha>
+  $<TARGET_OBJECTS:md4>
+  $<TARGET_OBJECTS:md5>
+  $<TARGET_OBJECTS:digest>
+  $<TARGET_OBJECTS:cipher>
+  $<TARGET_OBJECTS:modes>
+  $<TARGET_OBJECTS:aes>
+  $<TARGET_OBJECTS:des>
  $<TARGET_OBJECTS:rc4>
  $<TARGET_OBJECTS:conf>
  $<TARGET_OBJECTS:chacha>
  $<TARGET_OBJECTS:poly1305>
  $<TARGET_OBJECTS:curve25519>
  $<TARGET_OBJECTS:buf>
-  $<TARGET_OBJECTS:bn_extra>
+  $<TARGET_OBJECTS:bn>
  $<TARGET_OBJECTS:bio>
-  $<TARGET_OBJECTS:rand_extra>
+  $<TARGET_OBJECTS:rand>
  $<TARGET_OBJECTS:obj>
  $<TARGET_OBJECTS:asn1>
  $<TARGET_OBJECTS:engine>
  $<TARGET_OBJECTS:dh>
  $<TARGET_OBJECTS:dsa>
-  $<TARGET_OBJECTS:rsa_extra>
-  $<TARGET_OBJECTS:ec_extra>
+  $<TARGET_OBJECTS:rsa>
+  $<TARGET_OBJECTS:ec>
  $<TARGET_OBJECTS:ecdh>
-  $<TARGET_OBJECTS:ecdsa_extra>
+  $<TARGET_OBJECTS:ecdsa>
+  $<TARGET_OBJECTS:hmac>
  $<TARGET_OBJECTS:cmac>
  $<TARGET_OBJECTS:evp>
  $<TARGET_OBJECTS:hkdf>
  $<TARGET_OBJECTS:pem>
  $<TARGET_OBJECTS:x509>
  $<TARGET_OBJECTS:x509v3>
-  $<TARGET_OBJECTS:pkcs7>
  $<TARGET_OBJECTS:pkcs8_lib>
-
-  ${CRYPTO_FIPS_OBJECTS}
+  $<TARGET_OBJECTS:newhope>
 )

-if(FIPS)
-  add_dependencies(crypto bcm_o_target)
-endif()
-
-SET_TARGET_PROPERTIES(crypto PROPERTIES LINKER_LANGUAGE C)
-
 if(NOT MSVC AND NOT ANDROID)
  target_link_libraries(crypto pthread)
 endif()

+add_executable(
+  constant_time_test
+
+  constant_time_test.c
+
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(constant_time_test crypto)
+add_dependencies(all_tests constant_time_test)
+
 add_executable(
  thread_test

@@ -223,45 +201,11 @@ add_executable(
 target_link_libraries(thread_test crypto)
 add_dependencies(all_tests thread_test)

-# TODO(davidben): Convert the remaining tests to GTest.
 add_executable(
-  crypto_test
+  refcount_test

-  asn1/asn1_test.cc
-  base64/base64_test.cc
-  bio/bio_test.cc
-  bytestring/bytestring_test.cc
-  chacha/chacha_test.cc
-  cipher_extra/aead_extra_test.cc
-  cmac/cmac_test.cc
-  compiler_test.cc
-  constant_time_test.cc
-  curve25519/ed25519_test.cc
-  curve25519/spake25519_test.cc
-  curve25519/x25519_test.cc
-  dh/dh_test.cc
-  digest_extra/digest_test.cc
-  dsa/dsa_test.cc
-  err/err_test.cc
-  evp/evp_extra_test.cc
-  evp/pbkdf_test.cc
-  fipsmodule/aes/aes_test.cc
-  fipsmodule/ec/ec_test.cc
-  fipsmodule/rand/ctrdrbg_test.cc
-  hkdf/hkdf_test.cc
-  lhash/lhash_test.cc
-  pool/pool_test.cc
-  refcount_test.cc
-  rsa_extra/rsa_test.cc
-  test/file_test_gtest.cc
-
-  $<TARGET_OBJECTS:crypto_test_data>
-  $<TARGET_OBJECTS:gtest_main>
-  $<TARGET_OBJECTS:test_support>
+  refcount_test.c
 )

-target_link_libraries(crypto_test crypto gtest)
-if (WIN32)
-  target_link_libraries(crypto_test ws2_32)
-endif()
-add_dependencies(all_tests crypto_test)
+target_link_libraries(refcount_test crypto)
+add_dependencies(all_tests refcount_test)
@@ -0,0 +1,82 @@
+include_directories(../../include)
+
+if (${ARCH} STREQUAL "x86_64")
+  set(
+    AES_ARCH_SOURCES
+
+    aes-x86_64.${ASM_EXT}
+    aesni-x86_64.${ASM_EXT}
+    bsaes-x86_64.${ASM_EXT}
+    vpaes-x86_64.${ASM_EXT}
+  )
+endif()
+
+if (${ARCH} STREQUAL "x86")
+  set(
+    AES_ARCH_SOURCES
+
+    aes-586.${ASM_EXT}
+    vpaes-x86.${ASM_EXT}
+    aesni-x86.${ASM_EXT}
+  )
+endif()
+
+if (${ARCH} STREQUAL "arm")
+  set(
+    AES_ARCH_SOURCES
+
+    aes-armv4.${ASM_EXT}
+    bsaes-armv7.${ASM_EXT}
+    aesv8-armx.${ASM_EXT}
+  )
+endif()
+
+if (${ARCH} STREQUAL "aarch64")
+  set(
+    AES_ARCH_SOURCES
+
+    aesv8-armx.${ASM_EXT}
+  )
+endif()
+
+if (${ARCH} STREQUAL "ppc64le")
+  set(
+    AES_ARCH_SOURCES
+
+    aesp8-ppc.${ASM_EXT}
+  )
+endif()
+
+add_library(
+  aes
+
+  OBJECT
+
+  aes.c
+  key_wrap.c
+  mode_wrappers.c
+
+  ${AES_ARCH_SOURCES}
+)
+
+perlasm(aes-x86_64.${ASM_EXT} asm/aes-x86_64.pl)
+perlasm(aesni-x86_64.${ASM_EXT} asm/aesni-x86_64.pl)
+perlasm(bsaes-x86_64.${ASM_EXT} asm/bsaes-x86_64.pl)
+perlasm(vpaes-x86_64.${ASM_EXT} asm/vpaes-x86_64.pl)
+perlasm(aes-586.${ASM_EXT} asm/aes-586.pl)
+perlasm(vpaes-x86.${ASM_EXT} asm/vpaes-x86.pl)
+perlasm(aesni-x86.${ASM_EXT} asm/aesni-x86.pl)
+perlasm(aes-armv4.${ASM_EXT} asm/aes-armv4.pl)
+perlasm(bsaes-armv7.${ASM_EXT} asm/bsaes-armv7.pl)
+perlasm(aesv8-armx.${ASM_EXT} asm/aesv8-armx.pl)
+perlasm(aesp8-ppc.${ASM_EXT} asm/aesp8-ppc.pl)
+
+add_executable(
+  aes_test
+
+  aes_test.cc
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(aes_test crypto)
+add_dependencies(all_tests aes_test)
@@ -49,11 +49,11 @@
 #include <openssl/aes.h>

 #include <assert.h>
+#include <stdlib.h>

 #include <openssl/cpu.h>

 #include "internal.h"
-#include "../modes/internal.h"


 #if defined(OPENSSL_NO_ASM) || \
@@ -1060,6 +1060,44 @@ void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {

 #else

+#if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
+
+static int hwaes_capable(void) {
+  return CRYPTO_is_ARMv8_AES_capable();
+}
+
+int aes_hw_set_encrypt_key(const uint8_t *user_key, const int bits,
+                           AES_KEY *key);
+int aes_hw_set_decrypt_key(const uint8_t *user_key, const int bits,
+                           AES_KEY *key);
+void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+
+#else
+
+static int hwaes_capable(void) {
+  return 0;
+}
+
+static int aes_hw_set_encrypt_key(const uint8_t *user_key, int bits, AES_KEY *key) {
+  abort();
+}
+
+static int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits, AES_KEY *key) {
+  abort();
+}
+
+static void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+  abort();
+}
+
+static void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+  abort();
+}
+
+#endif
+
+
 /* In this case several functions are provided by asm code. However, one cannot
 * control asm symbol visibility with command line flags and such so they are
 * always hidden and wrapped by these C functions, which can be so
@@ -0,0 +1,182 @@
+/* Copyright (c) 2015, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <memory>
+#include <vector>
+
+#include <openssl/aes.h>
+#include <openssl/crypto.h>
+
+#include "../test/file_test.h"
+
+
+static bool TestRaw(FileTest *t) {
+  std::vector<uint8_t> key, plaintext, ciphertext;
+  if (!t->GetBytes(&key, "Key") ||
+      !t->GetBytes(&plaintext, "Plaintext") ||
+      !t->GetBytes(&ciphertext, "Ciphertext")) {
+    return false;
+  }
+
+  if (plaintext.size() != AES_BLOCK_SIZE ||
+      ciphertext.size() != AES_BLOCK_SIZE) {
+    t->PrintLine("Plaintext or Ciphertext not a block size.");
+    return false;
+  }
+
+  AES_KEY aes_key;
+  if (AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+    t->PrintLine("AES_set_encrypt_key failed.");
+    return false;
+  }
+
+  // Test encryption.
+  uint8_t block[AES_BLOCK_SIZE];
+  AES_encrypt(plaintext.data(), block, &aes_key);
+  if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, ciphertext.data(),
+                           ciphertext.size())) {
+    t->PrintLine("AES_encrypt gave the wrong output.");
+    return false;
+  }
+
+  // Test in-place encryption.
+  memcpy(block, plaintext.data(), AES_BLOCK_SIZE);
+  AES_encrypt(block, block, &aes_key);
+  if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, ciphertext.data(),
+                           ciphertext.size())) {
+    t->PrintLine("In-place AES_encrypt gave the wrong output.");
+    return false;
+  }
+
+  if (AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+    t->PrintLine("AES_set_decrypt_key failed.");
+    return false;
+  }
+
+  // Test decryption.
+  AES_decrypt(ciphertext.data(), block, &aes_key);
+  if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, plaintext.data(),
+                           plaintext.size())) {
+    t->PrintLine("AES_decrypt gave the wrong output.");
+    return false;
+  }
+
+  // Test in-place decryption.
+  memcpy(block, ciphertext.data(), AES_BLOCK_SIZE);
+  AES_decrypt(block, block, &aes_key);
+  if (!t->ExpectBytesEqual(block, AES_BLOCK_SIZE, plaintext.data(),
+                           plaintext.size())) {
+    t->PrintLine("In-place AES_decrypt gave the wrong output.");
+    return false;
+  }
+
+  return true;
+}
+
+static bool TestKeyWrap(FileTest *t) {
+  // All test vectors use the default IV, so test both with implicit and
+  // explicit IV.
+  //
+  // TODO(davidben): Find test vectors that use a different IV.
+  static const uint8_t kDefaultIV[] = {
+      0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6,
+  };
+
+  std::vector<uint8_t> key, plaintext, ciphertext;
+  if (!t->GetBytes(&key, "Key") ||
+      !t->GetBytes(&plaintext, "Plaintext") ||
+      !t->GetBytes(&ciphertext, "Ciphertext")) {
+    return false;
+  }
+
+  if (plaintext.size() + 8 != ciphertext.size()) {
+    t->PrintLine("Invalid Plaintext and Ciphertext lengths.");
+    return false;
+  }
+
+  AES_KEY aes_key;
+  if (AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+    t->PrintLine("AES_set_encrypt_key failed.");
+    return false;
+  }
+
+  std::unique_ptr<uint8_t[]> buf(new uint8_t[ciphertext.size()]);
+  if (AES_wrap_key(&aes_key, nullptr /* iv */, buf.get(), plaintext.data(),
+                   plaintext.size()) != static_cast<int>(ciphertext.size()) ||
+      !t->ExpectBytesEqual(buf.get(), ciphertext.size(), ciphertext.data(),
+                           ciphertext.size())) {
+    t->PrintLine("AES_wrap_key with implicit IV failed.");
+    return false;
+  }
+
+  memset(buf.get(), 0, ciphertext.size());
+  if (AES_wrap_key(&aes_key, kDefaultIV, buf.get(), plaintext.data(),
+                   plaintext.size()) != static_cast<int>(ciphertext.size()) ||
+      !t->ExpectBytesEqual(buf.get(), ciphertext.size(), ciphertext.data(),
+                           ciphertext.size())) {
+    t->PrintLine("AES_wrap_key with explicit IV failed.");
+    return false;
+  }
+
+  if (AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key) != 0) {
+    t->PrintLine("AES_set_decrypt_key failed.");
+    return false;
+  }
+
+  buf.reset(new uint8_t[plaintext.size()]);
+  if (AES_unwrap_key(&aes_key, nullptr /* iv */, buf.get(), ciphertext.data(),
+                     ciphertext.size()) != static_cast<int>(plaintext.size()) ||
+      !t->ExpectBytesEqual(buf.get(), plaintext.size(), plaintext.data(),
+                           plaintext.size())) {
+    t->PrintLine("AES_unwrap_key with implicit IV failed.");
+    return false;
+  }
+
+  memset(buf.get(), 0, plaintext.size());
+  if (AES_unwrap_key(&aes_key, kDefaultIV, buf.get(), ciphertext.data(),
+                     ciphertext.size()) != static_cast<int>(plaintext.size()) ||
+      !t->ExpectBytesEqual(buf.get(), plaintext.size(), plaintext.data(),
+                           plaintext.size())) {
+    t->PrintLine("AES_unwrap_key with explicit IV failed.");
+    return false;
+  }
+
+  return true;
+}
+
+static bool TestAES(FileTest *t, void *arg) {
+  if (t->GetParameter() == "Raw") {
+    return TestRaw(t);
+  }
+  if (t->GetParameter() == "KeyWrap") {
+    return TestKeyWrap(t);
+  }
+
+  t->PrintLine("Unknown mode '%s'.", t->GetParameter().c_str());
+  return false;
+}
+
+int main(int argc, char **argv) {
+  CRYPTO_library_init();
+
+  if (argc != 2) {
+    fprintf(stderr, "%s <test file.txt>\n", argv[0]);
+    return 1;
+  }
+
+  return FileTestMain(TestAES, nullptr, argv[1]);
+}
@@ -116,7 +116,7 @@
 # words every cache-line is *guaranteed* to be accessed within ~50
 # cycles window. Why just SSE? Because it's needed on hyper-threading
 # CPU! Which is also why it's prefetched with 64 byte stride. Best
-# part is that it has no negative effect on performance:-)
+# part is that it has no negative effect on performance:-)  
 #
 # Version 4.3 implements switch between compact and non-compact block
 # functions in AES_cbc_encrypt depending on how much data was asked
@@ -188,14 +188,14 @@
 # window, which is actually *less* than RDTSC latency on Intel P4!

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../../perlasm");
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";

 $output = pop;
 open OUT,">$output";
 *STDOUT=*OUT;

-&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],"aes-586.pl",$x86only = $ARGV[$#ARGV] eq "386");
 &static_label("AES_Te");
 &static_label("AES_Td");

@@ -578,7 +578,7 @@ sub enctransform()
 # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
 # |          mm4          |          mm0          |
 # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
-# |     s3    |     s2    |     s1    |     s0    |
+# |     s3    |     s2    |     s1    |     s0    |    
 # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
 # |15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
 # +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
@@ -798,7 +798,7 @@ sub encstep()

 	if ($i==3)  {	$tmp=$s[3]; &mov ($s[2],$__s1);		}##%ecx
 	elsif($i==2){	&movz	($tmp,&HB($s[3]));		}#%ebx[2]
-	else        {	&mov	($tmp,$s[3]);
+	else        {	&mov	($tmp,$s[3]); 
 			&shr	($tmp,24)			}
 			&xor	($out,&DWP(1,$te,$tmp,8));
 	if ($i<2)   {	&mov	(&DWP(4+4*$i,"esp"),$out);	}
@@ -1551,7 +1551,7 @@ sub sse_deccompact()
 		&pxor	("mm1","mm3");		&pxor	("mm5","mm7");	# tp4
 		&pshufw	("mm3","mm1",0xb1);	&pshufw	("mm7","mm5",0xb1);
 		&pxor	("mm0","mm1");		&pxor	("mm4","mm5");	# ^= tp4
-		&pxor	("mm0","mm3");		&pxor	("mm4","mm7");	# ^= ROTATE(tp4,16)
+		&pxor	("mm0","mm3");		&pxor	("mm4","mm7");	# ^= ROTATE(tp4,16)	

 		&pxor	("mm3","mm3");		&pxor	("mm7","mm7");
 		&pcmpgtb("mm3","mm1");		&pcmpgtb("mm7","mm5");
@@ -2021,7 +2021,7 @@ sub declast()
 {
 # stack frame layout
 #             -4(%esp)		# return address	 0(%esp)
-#              0(%esp)		# s0 backing store	 4(%esp)
+#              0(%esp)		# s0 backing store	 4(%esp)	
 #              4(%esp)		# s1 backing store	 8(%esp)
 #              8(%esp)		# s2 backing store	12(%esp)
 #             12(%esp)		# s3 backing store	16(%esp)
@@ -2731,7 +2731,7 @@ sub enckey()
 	&mov	(&DWP(80,"edi"),10);		# setup number of rounds
 	&xor	("eax","eax");
 	&jmp	(&label("exit"));
-
+		
    &set_label("12rounds");
 	&mov	("eax",&DWP(0,"esi"));		# copy first 6 dwords
 	&mov	("ebx",&DWP(4,"esi"));
@@ -39,7 +39,7 @@ else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
 if ($flavour && $flavour ne "void") {
    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
    die "can't locate arm-xlate.pl";

    open STDOUT,"| \"$^X\" $xlate $flavour $output";
@@ -34,7 +34,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";

 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
@@ -590,7 +590,6 @@ $code.=<<___;
 .type	asm_AES_encrypt,\@function,3
 .hidden	asm_AES_encrypt
 asm_AES_encrypt:
-	mov	%rsp,%rax
 	push	%rbx
 	push	%rbp
 	push	%r12
@@ -599,6 +598,7 @@ asm_AES_encrypt:
 	push	%r15

 	# allocate frame "above" key schedule
+	mov	%rsp,%r10
 	lea	-63(%rdx),%rcx	# %rdx is key argument
 	and	\$-64,%rsp
 	sub	%rsp,%rcx
@@ -608,7 +608,7 @@ asm_AES_encrypt:
 	sub	\$32,%rsp

 	mov	%rsi,16(%rsp)	# save out
-	mov	%rax,24(%rsp)	# save original stack pointer
+	mov	%r10,24(%rsp)	# save real stack pointer
 .Lenc_prologue:

 	mov	%rdx,$key
@@ -640,13 +640,13 @@ asm_AES_encrypt:
 	mov	$s2,8($out)
 	mov	$s3,12($out)

-	mov	-48(%rsi),%r15
-	mov	-40(%rsi),%r14
-	mov	-32(%rsi),%r13
-	mov	-24(%rsi),%r12
-	mov	-16(%rsi),%rbp
-	mov	-8(%rsi),%rbx
-	lea	(%rsi),%rsp
+	mov	(%rsi),%r15
+	mov	8(%rsi),%r14
+	mov	16(%rsi),%r13
+	mov	24(%rsi),%r12
+	mov	32(%rsi),%rbp
+	mov	40(%rsi),%rbx
+	lea	48(%rsi),%rsp
 .Lenc_epilogue:
 	ret
 .size	asm_AES_encrypt,.-asm_AES_encrypt
@@ -1186,7 +1186,6 @@ $code.=<<___;
 .type	asm_AES_decrypt,\@function,3
 .hidden	asm_AES_decrypt
 asm_AES_decrypt:
-	mov	%rsp,%rax
 	push	%rbx
 	push	%rbp
 	push	%r12
@@ -1195,6 +1194,7 @@ asm_AES_decrypt:
 	push	%r15

 	# allocate frame "above" key schedule
+	mov	%rsp,%r10
 	lea	-63(%rdx),%rcx	# %rdx is key argument
 	and	\$-64,%rsp
 	sub	%rsp,%rcx
@@ -1204,7 +1204,7 @@ asm_AES_decrypt:
 	sub	\$32,%rsp

 	mov	%rsi,16(%rsp)	# save out
-	mov	%rax,24(%rsp)	# save original stack pointer
+	mov	%r10,24(%rsp)	# save real stack pointer
 .Ldec_prologue:

 	mov	%rdx,$key
@@ -1238,13 +1238,13 @@ asm_AES_decrypt:
 	mov	$s2,8($out)
 	mov	$s3,12($out)

-	mov	-48(%rsi),%r15
-	mov	-40(%rsi),%r14
-	mov	-32(%rsi),%r13
-	mov	-24(%rsi),%r12
-	mov	-16(%rsi),%rbp
-	mov	-8(%rsi),%rbx
-	lea	(%rsi),%rsp
+	mov	(%rsi),%r15
+	mov	8(%rsi),%r14
+	mov	16(%rsi),%r13
+	mov	24(%rsi),%r12
+	mov	32(%rsi),%rbp
+	mov	40(%rsi),%rbx
+	lea	48(%rsi),%rsp
 .Ldec_epilogue:
 	ret
 .size	asm_AES_decrypt,.-asm_AES_decrypt
@@ -1286,7 +1286,7 @@ $code.=<<___;
 asm_AES_set_encrypt_key:
 	push	%rbx
 	push	%rbp
-	push	%r12			# redundant, but allows to share
+	push	%r12			# redundant, but allows to share 
 	push	%r13			# exception handler...
 	push	%r14
 	push	%r15
@@ -1412,7 +1412,7 @@ $code.=<<___;
 	xor	%rax,%rax
 	jmp	.Lexit

-.L14rounds:
+.L14rounds:		
 	mov	0(%rsi),%rax			# copy first 8 dwords
 	mov	8(%rsi),%rbx
 	mov	16(%rsi),%rcx
@@ -1660,12 +1660,12 @@ asm_AES_cbc_encrypt:
 	mov	%r9d,%r9d	# clear upper half of enc

 	lea	.LAES_Te(%rip),$sbox
-	lea	.LAES_Td(%rip),%r10
 	cmp	\$0,%r9
-	cmoveq	%r10,$sbox
+	jne	.Lcbc_picked_te
+	lea	.LAES_Td(%rip),$sbox
+.Lcbc_picked_te:

-	leaq	OPENSSL_ia32cap_P(%rip),%r10
-	mov	(%r10), %r10d
+	mov	OPENSSL_ia32cap_P(%rip),%r10d
 	cmp	\$$speed_limit,%rdx
 	jb	.Lcbc_slow_prologue
 	test	\$15,%rdx
@@ -2565,6 +2565,7 @@ block_se_handler:
 	jae	.Lin_block_prologue

 	mov	24(%rax),%rax		# pull saved real stack pointer
+	lea	48(%rax),%rax		# adjust...

 	mov	-8(%rax),%rbx
 	mov	-16(%rax),%rbp
@@ -51,9 +51,7 @@
 # Westmere	3.77/1.37	1.37	1.52	1.27
 # * Bridge	5.07/0.98	0.99	1.09	0.91
 # Haswell	4.44/0.80	0.97	1.03	0.72
-# Skylake	2.68/0.65	0.65	0.66	0.64
 # Silvermont	5.77/3.56	3.67	4.03	3.46
-# Goldmont	3.84/1.39	1.39	1.63	1.31
 # Bulldozer	5.80/0.98	1.05	1.24	0.93

 $PREFIX="aesni";	# if $PREFIX is set to "AES", the script
@@ -62,14 +60,14 @@ $PREFIX="aesni";	# if $PREFIX is set to "AES", the script
 $inline=1;		# inline _aesni_[en|de]crypt

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../../perlasm");
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";

 $output = pop;
 open OUT,">$output";
 *STDOUT=*OUT;

-&asm_init($ARGV[0]);
+&asm_init($ARGV[0],$0);

 &external_label("OPENSSL_ia32cap_P");
 &static_label("key_const");
@@ -1042,7 +1040,7 @@ if ($PREFIX eq "aesni") {
 &set_label("ctr32_one_shortcut",16);
 	&movups	($inout0,&QWP(0,$rounds_));	# load ivec
 	&mov	($rounds,&DWP(240,$key));
-
+	
 &set_label("ctr32_one");
 	if ($inline)
 	{   &aesni_inline_generate1("enc");	}
@@ -65,7 +65,7 @@ $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
 die "can't locate ppc-xlate.pl";

 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
@@ -36,7 +36,7 @@ $output  = shift;

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
 die "can't locate arm-xlate.pl";

 open OUT,"| \"$^X\" $xlate $flavour $output";
@@ -957,21 +957,21 @@ if ($flavour =~ /64/) {			######## 64-bit code

 	$arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
 	sprintf	"vtbl.8	d%d,{q%d},d%d\n\t".
-		"vtbl.8	d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;
+		"vtbl.8	d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;	
    }

    sub unvdup32 {
 	my $arg=shift;

 	$arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
-	sprintf	"vdup.32	q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
+	sprintf	"vdup.32	q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;	
    }

    sub unvmov32 {
 	my $arg=shift;

 	$arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
-	sprintf	"vmov.32	d%d[%d],%s",2*$1+($2>>1),$2&1,$3;
+	sprintf	"vmov.32	d%d[%d],%s",2*$1+($2>>1),$2&1,$3;	
    }

    foreach(split("\n",$code)) {
@@ -1,11 +1,4 @@
-#! /usr/bin/env perl
-# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License").  You may not use
-# this file except in compliance with the License.  You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
-
+#!/usr/bin/env perl

 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -61,7 +54,7 @@ else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
 if ($flavour && $flavour ne "void") {
    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
    die "can't locate arm-xlate.pl";

    open STDOUT,"| \"$^X\" $xlate $flavour $output";
@@ -91,7 +84,7 @@ my @s=@_[12..15];

 sub InBasisChange {
 # input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
+# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb 
 my @b=@_[0..7];
 $code.=<<___;
 	veor	@b[2], @b[2], @b[1]
@@ -738,7 +731,6 @@ $code.=<<___;
 .thumb
 #else
 .code   32
-# undef __thumb2__
 #endif

 .type	_bsaes_decrypt8,%function
@@ -1365,7 +1357,7 @@ bsaes_cbc_encrypt:
 	vmov	@XMM[4],@XMM[15]		@ just in case ensure that IV
 	vmov	@XMM[5],@XMM[0]			@ and input are preserved
 	bl	AES_decrypt
-	vld1.8	{@XMM[0]}, [$fp]		@ load result
+	vld1.8	{@XMM[0]}, [$fp,:64]		@ load result
 	veor	@XMM[0], @XMM[0], @XMM[4]	@ ^= IV
 	vmov	@XMM[15], @XMM[5]		@ @XMM[5] holds input
 	vst1.8	{@XMM[0]}, [$rounds]		@ write output
@@ -41,7 +41,6 @@
 # Nehalem(**) 	7.63		6.88		+11%
 # Atom	    	17.1		16.4		+4%
 # Silvermont	-		12.9
-# Goldmont	-		8.85
 #
 # (*)	Comparison is not completely fair, because "this" is ECB,
 #	i.e. no extra processing such as counter values calculation
@@ -81,7 +80,6 @@
 # Nehalem	7.80
 # Atom		17.9
 # Silvermont	14.0
-# Goldmont	10.2
 #
 # November 2011.
 #
@@ -98,7 +96,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";

 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
@@ -124,7 +122,7 @@ my @s=@_[12..15];

 sub InBasisChange {
 # input in  lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
-# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
+# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb 
 my @b=@_[0..7];
 $code.=<<___;
 	pxor	@b[6], @b[5]
@@ -374,7 +372,7 @@ $code.=<<___;
 	pxor	@s[0], @t[3]
 	pxor	@s[1], @t[2]
 	pxor	@s[2], @t[1]
-	pxor	@s[3], @t[0]
+	pxor	@s[3], @t[0] 

 	#Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3

@@ -1327,7 +1325,7 @@ $code.=<<___;
 	cmp	%rax, %rbp
 	jb	.Lecb_enc_bzero

-	lea	0x78(%rbp),%rax
+	lea	(%rbp),%rsp		# restore %rsp
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -1340,17 +1338,17 @@ $code.=<<___ if ($win64);
 	movaps	0xb0(%rbp), %xmm13
 	movaps	0xc0(%rbp), %xmm14
 	movaps	0xd0(%rbp), %xmm15
-	lea	0xa0(%rax), %rax
-.Lecb_enc_tail:
+	lea	0xa0(%rbp), %rsp
 ___
 $code.=<<___;
-	mov	-48(%rax), %r15
-	mov	-40(%rax), %r14
-	mov	-32(%rax), %r13
-	mov	-24(%rax), %r12
-	mov	-16(%rax), %rbx
-	mov	-8(%rax), %rbp
-	lea	(%rax), %rsp		# restore %rsp
+	mov	0x48(%rsp), %r15
+	mov	0x50(%rsp), %r14
+	mov	0x58(%rsp), %r13
+	mov	0x60(%rsp), %r12
+	mov	0x68(%rsp), %rbx
+	mov	0x70(%rsp), %rax
+	lea	0x78(%rsp), %rsp
+	mov	%rax, %rbp
 .Lecb_enc_epilogue:
 	ret
 .size	bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks
@@ -1529,7 +1527,7 @@ $code.=<<___;
 	cmp	%rax, %rbp
 	jb	.Lecb_dec_bzero

-	lea	0x78(%rbp),%rax
+	lea	(%rbp),%rsp		# restore %rsp
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -1542,17 +1540,17 @@ $code.=<<___ if ($win64);
 	movaps	0xb0(%rbp), %xmm13
 	movaps	0xc0(%rbp), %xmm14
 	movaps	0xd0(%rbp), %xmm15
-	lea	0xa0(%rax), %rax
-.Lecb_dec_tail:
+	lea	0xa0(%rbp), %rsp
 ___
 $code.=<<___;
-	mov	-48(%rax), %r15
-	mov	-40(%rax), %r14
-	mov	-32(%rax), %r13
-	mov	-24(%rax), %r12
-	mov	-16(%rax), %rbx
-	mov	-8(%rax), %rbp
-	lea	(%rax), %rsp		# restore %rsp
+	mov	0x48(%rsp), %r15
+	mov	0x50(%rsp), %r14
+	mov	0x58(%rsp), %r13
+	mov	0x60(%rsp), %r12
+	mov	0x68(%rsp), %rbx
+	mov	0x70(%rsp), %rax
+	lea	0x78(%rsp), %rsp
+	mov	%rax, %rbp
 .Lecb_dec_epilogue:
 	ret
 .size	bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks
@@ -1819,7 +1817,7 @@ $code.=<<___;
 	cmp	%rax, %rbp
 	ja	.Lcbc_dec_bzero

-	lea	0x78(%rbp),%rax
+	lea	(%rbp),%rsp		# restore %rsp
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -1832,17 +1830,17 @@ $code.=<<___ if ($win64);
 	movaps	0xb0(%rbp), %xmm13
 	movaps	0xc0(%rbp), %xmm14
 	movaps	0xd0(%rbp), %xmm15
-	lea	0xa0(%rax), %rax
-.Lcbc_dec_tail:
+	lea	0xa0(%rbp), %rsp
 ___
 $code.=<<___;
-	mov	-48(%rax), %r15
-	mov	-40(%rax), %r14
-	mov	-32(%rax), %r13
-	mov	-24(%rax), %r12
-	mov	-16(%rax), %rbx
-	mov	-8(%rax), %rbp
-	lea	(%rax), %rsp		# restore %rsp
+	mov	0x48(%rsp), %r15
+	mov	0x50(%rsp), %r14
+	mov	0x58(%rsp), %r13
+	mov	0x60(%rsp), %r12
+	mov	0x68(%rsp), %rbx
+	mov	0x70(%rsp), %rax
+	lea	0x78(%rsp), %rsp
+	mov	%rax, %rbp
 .Lcbc_dec_epilogue:
 	ret
 .size	bsaes_cbc_encrypt,.-bsaes_cbc_encrypt
@@ -2051,7 +2049,7 @@ $code.=<<___;
 	cmp	%rax, %rbp
 	ja	.Lctr_enc_bzero

-	lea	0x78(%rbp),%rax
+	lea	(%rbp),%rsp		# restore %rsp
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -2064,17 +2062,17 @@ $code.=<<___ if ($win64);
 	movaps	0xb0(%rbp), %xmm13
 	movaps	0xc0(%rbp), %xmm14
 	movaps	0xd0(%rbp), %xmm15
-	lea	0xa0(%rax), %rax
-.Lctr_enc_tail:
+	lea	0xa0(%rbp), %rsp
 ___
 $code.=<<___;
-	mov	-48(%rax), %r15
-	mov	-40(%rax), %r14
-	mov	-32(%rax), %r13
-	mov	-24(%rax), %r12
-	mov	-16(%rax), %rbx
-	mov	-8(%rax), %rbp
-	lea	(%rax), %rsp		# restore %rsp
+	mov	0x48(%rsp), %r15
+	mov	0x50(%rsp), %r14
+	mov	0x58(%rsp), %r13
+	mov	0x60(%rsp), %r12
+	mov	0x68(%rsp), %rbx
+	mov	0x70(%rsp), %rax
+	lea	0x78(%rsp), %rsp
+	mov	%rax, %rbp
 .Lctr_enc_epilogue:
 	ret
 .size	bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
@@ -2441,7 +2439,7 @@ $code.=<<___;
 	cmp	%rax, %rbp
 	ja	.Lxts_enc_bzero

-	lea	0x78(%rbp),%rax
+	lea	(%rbp),%rsp		# restore %rsp
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -2454,17 +2452,17 @@ $code.=<<___ if ($win64);
 	movaps	0xb0(%rbp), %xmm13
 	movaps	0xc0(%rbp), %xmm14
 	movaps	0xd0(%rbp), %xmm15
-	lea	0xa0(%rax), %rax
-.Lxts_enc_tail:
+	lea	0xa0(%rbp), %rsp
 ___
 $code.=<<___;
-	mov	-48(%rax), %r15
-	mov	-40(%rax), %r14
-	mov	-32(%rax), %r13
-	mov	-24(%rax), %r12
-	mov	-16(%rax), %rbx
-	mov	-8(%rax), %rbp
-	lea	(%rax), %rsp		# restore %rsp
+	mov	0x48(%rsp), %r15
+	mov	0x50(%rsp), %r14
+	mov	0x58(%rsp), %r13
+	mov	0x60(%rsp), %r12
+	mov	0x68(%rsp), %rbx
+	mov	0x70(%rsp), %rax
+	lea	0x78(%rsp), %rsp
+	mov	%rax, %rbp
 .Lxts_enc_epilogue:
 	ret
 .size	bsaes_xts_encrypt,.-bsaes_xts_encrypt
@@ -2848,7 +2846,7 @@ $code.=<<___;
 	cmp	%rax, %rbp
 	ja	.Lxts_dec_bzero

-	lea	0x78(%rbp),%rax
+	lea	(%rbp),%rsp		# restore %rsp
 ___
 $code.=<<___ if ($win64);
 	movaps	0x40(%rbp), %xmm6
@@ -2861,17 +2859,17 @@ $code.=<<___ if ($win64);
 	movaps	0xb0(%rbp), %xmm13
 	movaps	0xc0(%rbp), %xmm14
 	movaps	0xd0(%rbp), %xmm15
-	lea	0xa0(%rax), %rax
-.Lxts_dec_tail:
+	lea	0xa0(%rbp), %rsp
 ___
 $code.=<<___;
-	mov	-48(%rax), %r15
-	mov	-40(%rax), %r14
-	mov	-32(%rax), %r13
-	mov	-24(%rax), %r12
-	mov	-16(%rax), %rbx
-	mov	-8(%rax), %rbp
-	lea	(%rax), %rsp		# restore %rsp
+	mov	0x48(%rsp), %r15
+	mov	0x50(%rsp), %r14
+	mov	0x58(%rsp), %r13
+	mov	0x60(%rsp), %r12
+	mov	0x68(%rsp), %rbx
+	mov	0x70(%rsp), %rax
+	lea	0x78(%rsp), %rsp
+	mov	%rax, %rbp
 .Lxts_dec_epilogue:
 	ret
 .size	bsaes_xts_decrypt,.-bsaes_xts_decrypt
@@ -2967,34 +2965,31 @@ se_handler:

 	mov	0(%r11),%r10d		# HandlerData[0]
 	lea	(%rsi,%r10),%r10	# prologue label
-	cmp	%r10,%rbx		# context->Rip<=prologue label
-	jbe	.Lin_prologue
+	cmp	%r10,%rbx		# context->Rip<prologue label
+	jb	.Lin_prologue
+
+	mov	152($context),%rax	# pull context->Rsp

 	mov	4(%r11),%r10d		# HandlerData[1]
 	lea	(%rsi,%r10),%r10	# epilogue label
 	cmp	%r10,%rbx		# context->Rip>=epilogue label
 	jae	.Lin_prologue

-	mov	8(%r11),%r10d		# HandlerData[2]
-	lea	(%rsi,%r10),%r10	# epilogue label
-	cmp	%r10,%rbx		# context->Rip>=tail label
-	jae	.Lin_tail
-
 	mov	160($context),%rax	# pull context->Rbp

 	lea	0x40(%rax),%rsi		# %xmm save area
 	lea	512($context),%rdi	# &context.Xmm6
 	mov	\$20,%ecx		# 10*sizeof(%xmm0)/sizeof(%rax)
 	.long	0xa548f3fc		# cld; rep movsq
-	lea	0xa0+0x78(%rax),%rax	# adjust stack pointer
+	lea	0xa0(%rax),%rax		# adjust stack pointer

-.Lin_tail:
-	mov	-48(%rax),%rbp
-	mov	-40(%rax),%rbx
-	mov	-32(%rax),%r12
-	mov	-24(%rax),%r13
-	mov	-16(%rax),%r14
-	mov	-8(%rax),%r15
+	mov	0x70(%rax),%rbp
+	mov	0x68(%rax),%rbx
+	mov	0x60(%rax),%r12
+	mov	0x58(%rax),%r13
+	mov	0x50(%rax),%r14
+	mov	0x48(%rax),%r15
+	lea	0x78(%rax),%rax		# adjust stack pointer
 	mov	%rbx,144($context)	# restore context->Rbx
 	mov	%rbp,160($context)	# restore context->Rbp
 	mov	%r12,216($context)	# restore context->R12
@@ -3075,40 +3070,28 @@ $code.=<<___ if ($ecb);
 	.byte	9,0,0,0
 	.rva	se_handler
 	.rva	.Lecb_enc_body,.Lecb_enc_epilogue	# HandlerData[]
-	.rva	.Lecb_enc_tail
-	.long	0
 .Lecb_dec_info:
 	.byte	9,0,0,0
 	.rva	se_handler
 	.rva	.Lecb_dec_body,.Lecb_dec_epilogue	# HandlerData[]
-	.rva	.Lecb_dec_tail
-	.long	0
 ___
 $code.=<<___;
 .Lcbc_dec_info:
 	.byte	9,0,0,0
 	.rva	se_handler
 	.rva	.Lcbc_dec_body,.Lcbc_dec_epilogue	# HandlerData[]
-	.rva	.Lcbc_dec_tail
-	.long	0
 .Lctr_enc_info:
 	.byte	9,0,0,0
 	.rva	se_handler
 	.rva	.Lctr_enc_body,.Lctr_enc_epilogue	# HandlerData[]
-	.rva	.Lctr_enc_tail
-	.long	0
 .Lxts_enc_info:
 	.byte	9,0,0,0
 	.rva	se_handler
 	.rva	.Lxts_enc_body,.Lxts_enc_epilogue	# HandlerData[]
-	.rva	.Lxts_enc_tail
-	.long	0
 .Lxts_dec_info:
 	.byte	9,0,0,0
 	.rva	se_handler
 	.rva	.Lxts_dec_body,.Lxts_dec_epilogue	# HandlerData[]
-	.rva	.Lxts_dec_tail
-	.long	0
 ___
 }

@@ -48,14 +48,14 @@
 #						<appro@openssl.org>

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../../perlasm");
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";

 $output = pop;
 open OUT,">$output";
 *STDOUT=*OUT;

-&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
+&asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");

 $PREFIX="vpaes";

@@ -438,7 +438,7 @@ $k_dsbo=0x2c0;		# decryption sbox final output
 ##
 &set_label("schedule_192",16);
 	&movdqu	("xmm0",&QWP(8,$inp));		# load key part 2 (very unaligned)
-	&call	("_vpaes_schedule_transform");	# input transform
+	&call	("_vpaes_schedule_transform");	# input transform	
 	&movdqa	("xmm6","xmm0");		# save short part
 	&pxor	("xmm4","xmm4");		# clear 4
 	&movhlps("xmm6","xmm4");		# clobber low side with zeros
@@ -469,7 +469,7 @@ $k_dsbo=0x2c0;		# decryption sbox final output
 ##
 &set_label("schedule_256",16);
 	&movdqu	("xmm0",&QWP(16,$inp));		# load key part 2 (unaligned)
-	&call	("_vpaes_schedule_transform");	# input transform
+	&call	("_vpaes_schedule_transform");	# input transform	
 	&mov	($round,7);

 &set_label("loop_schedule_256");
@@ -480,7 +480,7 @@ $k_dsbo=0x2c0;		# decryption sbox final output
 	&call	("_vpaes_schedule_round");
 	&dec	($round);
 	&jz	(&label("schedule_mangle_last"));
-	&call	("_vpaes_schedule_mangle");
+	&call	("_vpaes_schedule_mangle");	

 	# low round. swap xmm7 and xmm6
 	&pshufd	("xmm0","xmm0",0xFF);
@@ -603,7 +603,7 @@ $k_dsbo=0x2c0;		# decryption sbox final output
 	# subbyte
 	&movdqa	("xmm4",&QWP($k_s0F,$const));
 	&movdqa	("xmm5",&QWP($k_inv,$const));	# 4 : 1/j
-	&movdqa	("xmm1","xmm4");
+	&movdqa	("xmm1","xmm4");	
 	&pandn	("xmm1","xmm0");
 	&psrld	("xmm1",4);			# 1 = i
 	&pand	("xmm0","xmm4");		# 0 = k
@@ -31,7 +31,6 @@
 # Nehalem	29.6/40.3/14.6		10.0/11.8
 # Atom		57.3/74.2/32.1		60.9/77.2(***)
 # Silvermont	52.7/64.0/19.5		48.8/60.8(***)
-# Goldmont	38.9/49.0/17.8		10.6/12.6
 #
 # (*)	"Hyper-threading" in the context refers rather to cache shared
 #	among multiple cores, than to specifically Intel HTT. As vast
@@ -55,7 +54,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";

 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
@@ -165,7 +164,7 @@ _vpaes_encrypt_core:
 	pshufb	%xmm1,	%xmm0
 	ret
 .size	_vpaes_encrypt_core,.-_vpaes_encrypt_core
-
+	
 ##
 ##  Decryption core
 ##
@@ -326,7 +325,7 @@ _vpaes_schedule_core:
 ##
 .Lschedule_128:
 	mov	\$10, %esi
-
+	
 .Loop_schedule_128:
 	call 	_vpaes_schedule_round
 	dec	%rsi
@@ -360,7 +359,7 @@ _vpaes_schedule_core:

 .Loop_schedule_192:
 	call	_vpaes_schedule_round
-	palignr	\$8,%xmm6,%xmm0
+	palignr	\$8,%xmm6,%xmm0	
 	call	_vpaes_schedule_mangle	# save key n
 	call	_vpaes_schedule_192_smear
 	call	_vpaes_schedule_mangle	# save key n+1
@@ -386,7 +385,7 @@ _vpaes_schedule_core:
 	movdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
 	call	_vpaes_schedule_transform	# input transform
 	mov	\$7, %esi
-
+	
 .Loop_schedule_256:
 	call	_vpaes_schedule_mangle	# output low result
 	movdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
@@ -395,7 +394,7 @@ _vpaes_schedule_core:
 	call	_vpaes_schedule_round
 	dec	%rsi
 	jz 	.Lschedule_mangle_last
-	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_mangle	

 	# low round. swap xmm7 and xmm6
 	pshufd	\$0xFF,	%xmm0,	%xmm0
@@ -403,10 +402,10 @@ _vpaes_schedule_core:
 	movdqa	%xmm6,	%xmm7
 	call	_vpaes_schedule_low_round
 	movdqa	%xmm5,	%xmm7
-
+	
 	jmp	.Loop_schedule_256

-
+	
 ##
 ##  .aes_schedule_mangle_last
 ##
@@ -505,9 +504,9 @@ _vpaes_schedule_round:
 	# rotate
 	pshufd	\$0xFF,	%xmm0,	%xmm0
 	palignr	\$1,	%xmm0,	%xmm0
-
+	
 	# fall through...
-
+	
 	# low round: same as high round, but no rotation and no rcon.
 _vpaes_schedule_low_round:
 	# smear xmm7
@@ -546,7 +545,7 @@ _vpaes_schedule_low_round:
 	pxor	%xmm4, 	%xmm0		# 0 = sbox output

 	# add in smeared stuff
-	pxor	%xmm7,	%xmm0
+	pxor	%xmm7,	%xmm0	
 	movdqa	%xmm0,	%xmm7
 	ret
 .size	_vpaes_schedule_round,.-_vpaes_schedule_round
@@ -0,0 +1,87 @@
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#ifndef OPENSSL_HEADER_AES_INTERNAL_H
+#define OPENSSL_HEADER_AES_INTERNAL_H
+
+#include <openssl/base.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+#if defined(_MSC_VER) && \
+    (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
+#define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
+#define GETU32(p) SWAP(*((uint32_t *)(p)))
+#define PUTU32(ct, st) \
+  { *((uint32_t *)(ct)) = SWAP((st)); }
+#else
+#define GETU32(pt)                                         \
+  (((uint32_t)(pt)[0] << 24) ^ ((uint32_t)(pt)[1] << 16) ^ \
+   ((uint32_t)(pt)[2] << 8) ^ ((uint32_t)(pt)[3]))
+#define PUTU32(ct, st)          \
+  {                             \
+    (ct)[0] = (uint8_t)((st) >> 24); \
+    (ct)[1] = (uint8_t)((st) >> 16); \
+    (ct)[2] = (uint8_t)((st) >> 8);  \
+    (ct)[3] = (uint8_t)(st);         \
+  }
+#endif
+
+#define MAXKC (256 / 32)
+#define MAXKB (256 / 8)
+#define MAXNR 14
+
+
+#if defined(__cplusplus)
+} /* extern C */
+#endif
+
+#endif /* OPENSSL_HEADER_AES_INTERNAL_H */
@@ -53,8 +53,6 @@

 #include <openssl/mem.h>

-#include "../../internal.h"
-

 /* kDefaultIV is the default IV value given in RFC 3394, 2.2.3.1. */
 static const uint8_t kDefaultIV[] = {
@@ -75,15 +73,15 @@ int AES_wrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
    iv = kDefaultIV;
  }

-  OPENSSL_memmove(out + 8, in, in_len);
+  memmove(out + 8, in, in_len);
  uint8_t A[AES_BLOCK_SIZE];
-  OPENSSL_memcpy(A, iv, 8);
+  memcpy(A, iv, 8);

  size_t n = in_len / 8;

  for (unsigned j = 0; j < kBound; j++) {
    for (size_t i = 1; i <= n; i++) {
-      OPENSSL_memcpy(A + 8, out + 8 * i, 8);
+      memcpy(A + 8, out + 8 * i, 8);
      AES_encrypt(A, A, key);

      uint32_t t = (uint32_t)(n * j + i);
@@ -91,11 +89,11 @@ int AES_wrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
      A[6] ^= (t >> 8) & 0xff;
      A[5] ^= (t >> 16) & 0xff;
      A[4] ^= (t >> 24) & 0xff;
-      OPENSSL_memcpy(out + 8 * i, A + 8, 8);
+      memcpy(out + 8 * i, A + 8, 8);
    }
  }

-  OPENSSL_memcpy(out, A, 8);
+  memcpy(out, A, 8);
  return (int)in_len + 8;
 }

@@ -112,8 +110,8 @@ int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
  }

  uint8_t A[AES_BLOCK_SIZE];
-  OPENSSL_memcpy(A, in, 8);
-  OPENSSL_memmove(out, in + 8, in_len - 8);
+  memcpy(A, in, 8);
+  memmove(out, in + 8, in_len - 8);

  size_t n = (in_len / 8) - 1;

@@ -124,9 +122,9 @@ int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
      A[6] ^= (t >> 8) & 0xff;
      A[5] ^= (t >> 16) & 0xff;
      A[4] ^= (t >> 24) & 0xff;
-      OPENSSL_memcpy(A + 8, out + 8 * (i - 1), 8);
+      memcpy(A + 8, out + 8 * (i - 1), 8);
      AES_decrypt(A, A, key);
-      OPENSSL_memcpy(out + 8 * (i - 1), A + 8, 8);
+      memcpy(out + 8 * (i - 1), A + 8, 8);
    }
  }

@@ -35,5 +35,17 @@ add_library(
  tasn_new.c
  tasn_typ.c
  tasn_utl.c
-  time_support.c
+  x_bignum.c
+  x_long.c
 )
+
+add_executable(
+  asn1_test
+
+  asn1_test.cc
+
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(asn1_test crypto)
+add_dependencies(all_tests asn1_test)
@@ -61,9 +61,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../internal.h"
-
-
 int ASN1_BIT_STRING_set(ASN1_BIT_STRING *x, unsigned char *d, int len)
 {
    return M_ASN1_BIT_STRING_set(x, d, len);
@@ -118,7 +115,7 @@ int i2c_ASN1_BIT_STRING(ASN1_BIT_STRING *a, unsigned char **pp)

    *(p++) = (unsigned char)bits;
    d = a->data;
-    OPENSSL_memcpy(p, d, len);
+    memcpy(p, d, len);
    p += len;
    if (len > 0)
        p[-1] &= (0xff << bits);
@@ -165,7 +162,7 @@ ASN1_BIT_STRING *c2i_ASN1_BIT_STRING(ASN1_BIT_STRING **a,
            OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
            goto err;
        }
-        OPENSSL_memcpy(s, p, (int)len);
+        memcpy(s, p, (int)len);
        s[len - 1] &= (0xff << padding);
        p += len;
    } else
@@ -218,7 +215,7 @@ int ASN1_BIT_STRING_set_bit(ASN1_BIT_STRING *a, int n, int value)
            return 0;
        }
        if (w + 1 - a->length > 0)
-            OPENSSL_memset(c + a->length, 0, w + 1 - a->length);
+            memset(c + a->length, 0, w + 1 - a->length);
        a->data = c;
        a->length = w + 1;
    }
@@ -61,9 +61,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../internal.h"
-
-
 /*
 * Code for ENUMERATED type: identical to INTEGER apart from a different tag.
 * for comments on encoding see a_int.c
@@ -82,7 +79,7 @@ int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v)
            OPENSSL_free(a->data);
        if ((a->data =
             (unsigned char *)OPENSSL_malloc(sizeof(long) + 1)) != NULL)
-            OPENSSL_memset((char *)a->data, 0, sizeof(long) + 1);
+            memset((char *)a->data, 0, sizeof(long) + 1);
    }
    if (a->data == NULL) {
        OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
@@ -61,6 +61,7 @@

 #include <openssl/err.h>
 #include <openssl/mem.h>
+#include <openssl/time_support.h>

 #include "asn1_locl.h"

@@ -148,7 +149,7 @@ int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d)
    if (a[o] == 'Z')
        o++;
    else if ((a[o] == '+') || (a[o] == '-')) {
-        int offsign = a[o] == '-' ? 1 : -1, offset = 0;
+        int offsign = a[o] == '-' ? -1 : 1, offset = 0;
        o++;
        if (o + 4 > l)
            goto err;
@@ -61,9 +61,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../internal.h"
-
-
 ASN1_INTEGER *ASN1_INTEGER_dup(const ASN1_INTEGER *x)
 {
    return M_ASN1_INTEGER_dup(x);
@@ -160,7 +157,7 @@ int i2c_ASN1_INTEGER(ASN1_INTEGER *a, unsigned char **pp)
    if (a->length == 0)
        *(p++) = 0;
    else if (!neg)
-        OPENSSL_memcpy(p, a->data, (unsigned int)a->length);
+        memcpy(p, a->data, (unsigned int)a->length);
    else {
        /* Begin at the end of the encoding */
        n = a->data + a->length - 1;
@@ -257,7 +254,7 @@ ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **a, const unsigned char **pp,
            p++;
            len--;
        }
-        OPENSSL_memcpy(s, p, (int)len);
+        memcpy(s, p, (int)len);
    }

    if (ret->data != NULL)
@@ -325,7 +322,7 @@ ASN1_INTEGER *d2i_ASN1_UINTEGER(ASN1_INTEGER **a, const unsigned char **pp,
            p++;
            len--;
        }
-        OPENSSL_memcpy(s, p, (int)len);
+        memcpy(s, p, (int)len);
        p += len;
    }

@@ -357,7 +354,7 @@ int ASN1_INTEGER_set(ASN1_INTEGER *a, long v)
            OPENSSL_free(a->data);
        if ((a->data =
             (unsigned char *)OPENSSL_malloc(sizeof(long) + 1)) != NULL)
-            OPENSSL_memset((char *)a->data, 0, sizeof(long) + 1);
+            memset((char *)a->data, 0, sizeof(long) + 1);
    }
    if (a->data == NULL) {
        OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
@@ -63,9 +63,6 @@
 #include <openssl/mem.h>
 #include <openssl/obj.h>

-#include "../internal.h"
-
-
 int i2d_ASN1_OBJECT(ASN1_OBJECT *a, unsigned char **pp)
 {
    unsigned char *p;
@@ -80,7 +77,7 @@ int i2d_ASN1_OBJECT(ASN1_OBJECT *a, unsigned char **pp)

    p = *pp;
    ASN1_put_object(&p, 0, a->length, V_ASN1_OBJECT, V_ASN1_UNIVERSAL);
-    OPENSSL_memcpy(p, a->data, a->length);
+    memcpy(p, a->data, a->length);
    p += a->length;

    *pp = p;
@@ -324,7 +321,7 @@ ASN1_OBJECT *c2i_ASN1_OBJECT(ASN1_OBJECT **a, const unsigned char **pp,
        }
        ret->flags |= ASN1_OBJECT_FLAG_DYNAMIC_DATA;
    }
-    OPENSSL_memcpy(data, p, length);
+    memcpy(data, p, length);
    /* reattach data to object, after which it remains const */
    ret->data = data;
    ret->length = length;
@@ -62,9 +62,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>
 #include <openssl/obj.h>
-#include <openssl/stack.h>
-
-DEFINE_STACK_OF(ASN1_STRING_TABLE)

 static STACK_OF(ASN1_STRING_TABLE) *stable = NULL;
 static void st_free(ASN1_STRING_TABLE *tbl);
@@ -63,6 +63,7 @@
 #include <openssl/buf.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
+#include <openssl/time_support.h>

 #include "asn1_locl.h"

@@ -114,7 +115,7 @@ int ASN1_TIME_check(ASN1_TIME *t)
 ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(ASN1_TIME *t,
                                                   ASN1_GENERALIZEDTIME **out)
 {
-    ASN1_GENERALIZEDTIME *ret = NULL;
+    ASN1_GENERALIZEDTIME *ret;
    char *str;
    int newlen;

@@ -123,21 +124,22 @@ ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(ASN1_TIME *t,

    if (!out || !*out) {
        if (!(ret = ASN1_GENERALIZEDTIME_new()))
-            goto err;
-    } else {
+            return NULL;
+        if (out)
+            *out = ret;
+    } else
        ret = *out;
-    }

    /* If already GeneralizedTime just copy across */
    if (t->type == V_ASN1_GENERALIZEDTIME) {
        if (!ASN1_STRING_set(ret, t->data, t->length))
-            goto err;
-        goto done;
+            return NULL;
+        return ret;
    }

    /* grow the string */
    if (!ASN1_STRING_set(ret, NULL, t->length + 2))
-        goto err;
+        return NULL;
    /* ASN1_STRING_set() allocated 'len + 1' bytes. */
    newlen = t->length + 2 + 1;
    str = (char *)ret->data;
@@ -149,18 +151,9 @@ ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(ASN1_TIME *t,

    BUF_strlcat(str, (char *)t->data, newlen);

- done:
-   if (out != NULL && *out == NULL)
-       *out = ret;
-   return ret;
-
- err:
-    if (out == NULL || *out != ret)
-        ASN1_GENERALIZEDTIME_free(ret);
-    return NULL;
+    return ret;
 }

-
 int ASN1_TIME_set_string(ASN1_TIME *s, const char *str)
 {
    ASN1_TIME t;
@@ -61,6 +61,7 @@

 #include <openssl/err.h>
 #include <openssl/mem.h>
+#include <openssl/time_support.h>

 #include "asn1_locl.h"

@@ -127,7 +128,7 @@ int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d)
    if (a[o] == 'Z')
        o++;
    else if ((a[o] == '+') || (a[o] == '-')) {
-        int offsign = a[o] == '-' ? 1 : -1, offset = 0;
+        int offsign = a[o] == '-' ? -1 : 1, offset = 0;
        o++;
        if (o + 4 > l)
            goto err;
@@ -269,7 +270,7 @@ time_t ASN1_UTCTIME_get(const ASN1_UTCTIME *s)
    struct tm tm;
    int offset;

-    OPENSSL_memset(&tm, '\0', sizeof tm);
+    memset(&tm, '\0', sizeof tm);

 # define g2(p) (((p)[0]-'0')*10+(p)[1]-'0')
    tm.tm_year = g2(s->data);
@@ -63,9 +63,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../internal.h"
-
-
 /* Cross-module errors from crypto/x509/i2d_pr.c. */
 OPENSSL_DECLARE_ERROR_REASON(ASN1, UNSUPPORTED_PUBLIC_KEY_TYPE)

@@ -404,7 +401,7 @@ int ASN1_STRING_set(ASN1_STRING *str, const void *_data, int len)
    }
    str->length = len;
    if (data != NULL) {
-        OPENSSL_memcpy(str->data, data, len);
+        memcpy(str->data, data, len);
        /* an allowance for strings :-) */
        str->data[len] = '\0';
    }
@@ -455,7 +452,7 @@ int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b)

    i = (a->length - b->length);
    if (i == 0) {
-        i = OPENSSL_memcmp(a->data, b->data, a->length);
+        i = memcmp(a->data, b->data, a->length);
        if (i == 0)
            return (a->type - b->type);
        else
@@ -57,42 +57,7 @@
 *
 */

-#ifndef OPENSSL_HEADER_ASN1_ASN1_LOCL_H
-#define OPENSSL_HEADER_ASN1_ASN1_LOCL_H
-
-#include <time.h>
-
-#include <openssl/asn1.h>
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-
-/* Wrapper functions for time functions. */
-
-/* OPENSSL_gmtime wraps |gmtime_r|. See the manual page for that function. */
-struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result);
-
-/* OPENSSL_gmtime_adj updates |tm| by adding |offset_day| days and |offset_sec|
- * seconds. */
-int OPENSSL_gmtime_adj(struct tm *tm, int offset_day, long offset_sec);
-
-/* OPENSSL_gmtime_diff calculates the difference between |from| and |to| and
- * outputs the difference as a number of days and seconds in |*out_days| and
- * |*out_secs|. */
-int OPENSSL_gmtime_diff(int *out_days, int *out_secs, const struct tm *from,
-                        const struct tm *to);
-
-
 /* Internal ASN1 structures and functions: not for application use */

 int asn1_utctime_to_tm(struct tm *tm, const ASN1_UTCTIME *d);
 int asn1_generalizedtime_to_tm(struct tm *tm, const ASN1_GENERALIZEDTIME *d);
-
-
-#if defined(__cplusplus)
-}  /* extern C */
-#endif
-
-#endif  /* OPENSSL_HEADER_ASN1_ASN1_LOCL_H */
@@ -14,13 +14,10 @@

 #include <stdio.h>

-#include <gtest/gtest.h>
-
 #include <openssl/asn1.h>
+#include <openssl/crypto.h>
 #include <openssl/err.h>

-#include "../test/test_util.h"
-

 // kTag128 is an ASN.1 structure with a universal tag with number 128.
 static const uint8_t kTag128[] = {
@@ -41,22 +38,42 @@ static const uint8_t kTagOverflow[] = {
    0x1f, 0xff, 0xff, 0xff, 0xff, 0x7f, 0x01, 0x00,
 };

-TEST(ASN1Test, LargeTags) {
+static bool TestLargeTags() {
  const uint8_t *p = kTag258;
  bssl::UniquePtr<ASN1_TYPE> obj(d2i_ASN1_TYPE(NULL, &p, sizeof(kTag258)));
-  EXPECT_FALSE(obj) << "Parsed value with illegal tag" << obj->type;
+  if (obj) {
+    fprintf(stderr, "Parsed value with illegal tag (type = %d).\n", obj->type);
+    return false;
+  }
  ERR_clear_error();

  p = kTagOverflow;
  obj.reset(d2i_ASN1_TYPE(NULL, &p, sizeof(kTagOverflow)));
-  EXPECT_FALSE(obj) << "Parsed value with tag overflow" << obj->type;
+  if (obj) {
+    fprintf(stderr, "Parsed value with tag overflow (type = %d).\n", obj->type);
+    return false;
+  }
  ERR_clear_error();

  p = kTag128;
  obj.reset(d2i_ASN1_TYPE(NULL, &p, sizeof(kTag128)));
-  ASSERT_TRUE(obj);
-  EXPECT_EQ(128, obj->type);
-  const uint8_t kZero = 0;
-  EXPECT_EQ(Bytes(&kZero, 1), Bytes(obj->value.asn1_string->data,
-                                    obj->value.asn1_string->length));
+  if (!obj || obj->type != 128 || obj->value.asn1_string->length != 1 ||
+      obj->value.asn1_string->data[0] != 0) {
+    fprintf(stderr, "Failed to parse value with tag 128.\n");
+    ERR_print_errors_fp(stderr);
+    return false;
+  }
+
+  return true;
+}
+
+int main() {
+  CRYPTO_library_init();
+
+  if (!TestLargeTags()) {
+    return 1;
+  }
+
+  printf("PASS\n");
+  return 0;
 }
@@ -180,7 +180,6 @@ int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len,
    int ret = 0;
    ASN1_VALUE **pchptr, *ptmpval;
    int combine = aclass & ASN1_TFLG_COMBINE;
-    aclass &= ~ASN1_TFLG_COMBINE;
    if (!pval)
        return 0;
    if (aux && aux->asn1_cb)
@@ -668,7 +667,6 @@ static int asn1_template_noexp_d2i(ASN1_VALUE **val,
            }
            len -= p - q;
            if (!sk_ASN1_VALUE_push((STACK_OF(ASN1_VALUE) *)*val, skfield)) {
-                ASN1_item_ex_free(&skfield, ASN1_ITEM_ptr(tt->item));
                OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
                goto err;
            }
@@ -1110,7 +1108,7 @@ static int collect_data(BUF_MEM *buf, const unsigned char **p, long plen)
            OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
            return 0;
        }
-        OPENSSL_memcpy(buf->data + len, *p, plen);
+        memcpy(buf->data + len, *p, plen);
    }
    *p += plen;
    return 1;
@@ -62,9 +62,6 @@
 #include <openssl/asn1t.h>
 #include <openssl/mem.h>

-#include "../internal.h"
-
-
 static int asn1_i2d_ex_primitive(ASN1_VALUE **pval, unsigned char **out,
                                 const ASN1_ITEM *it, int tag, int aclass);
 static int asn1_set_seq_out(STACK_OF(ASN1_VALUE) *sk, unsigned char **out,
@@ -418,7 +415,7 @@ static int der_cmp(const void *a, const void *b)
    const DER_ENC *d1 = a, *d2 = b;
    int cmplen, i;
    cmplen = (d1->length < d2->length) ? d1->length : d2->length;
-    i = OPENSSL_memcmp(d1->data, d2->data, cmplen);
+    i = memcmp(d1->data, d2->data, cmplen);
    if (i)
        return i;
    return d1->length - d2->length;
@@ -473,7 +470,7 @@ static int asn1_set_seq_out(STACK_OF(ASN1_VALUE) *sk, unsigned char **out,
    /* Output sorted DER encoding */
    p = *out;
    for (i = 0, tder = derlst; i < sk_ASN1_VALUE_num(sk); i++, tder++) {
-        OPENSSL_memcpy(p, tder->data, tder->length);
+        memcpy(p, tder->data, tder->length);
        p += tder->length;
    }
    *out = p;
@@ -663,6 +660,6 @@ int asn1_ex_i2c(ASN1_VALUE **pval, unsigned char *cout, int *putype,

    }
    if (cout && len)
-        OPENSSL_memcpy(cout, cont, len);
+        memcpy(cout, cont, len);
    return len;
 }
@@ -63,9 +63,6 @@
 #include <openssl/mem.h>
 #include <openssl/obj.h>

-#include "../internal.h"
-
-
 static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it,
                                    int combine);
 static void asn1_item_clear(ASN1_VALUE **pval, const ASN1_ITEM *it);
@@ -156,11 +153,11 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it,
            *pval = OPENSSL_malloc(it->size);
            if (!*pval)
                goto memerr;
-            OPENSSL_memset(*pval, 0, it->size);
+            memset(*pval, 0, it->size);
        }
        asn1_set_choice_selector(pval, -1, it);
        if (asn1_cb && !asn1_cb(ASN1_OP_NEW_POST, pval, it, NULL))
-            goto auxerr2;
+            goto auxerr;
        break;

    case ASN1_ITYPE_NDEF_SEQUENCE:
@@ -181,17 +178,17 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it,
            *pval = OPENSSL_malloc(it->size);
            if (!*pval)
                goto memerr;
-            OPENSSL_memset(*pval, 0, it->size);
+            memset(*pval, 0, it->size);
            asn1_refcount_set_one(pval, it);
            asn1_enc_init(pval, it);
        }
        for (i = 0, tt = it->templates; i < it->tcount; tt++, i++) {
            pseqval = asn1_get_field_ptr(pval, tt);
            if (!ASN1_template_new(pseqval, tt))
-                goto memerr2;
+                goto memerr;
        }
        if (asn1_cb && !asn1_cb(ASN1_OP_NEW_POST, pval, it, NULL))
-            goto auxerr2;
+            goto auxerr;
        break;
    }
 #ifdef CRYPTO_MDEBUG
@@ -200,20 +197,18 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it,
 #endif
    return 1;

- memerr2:
-    ASN1_item_ex_free(pval, it);
 memerr:
    OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
+    ASN1_item_ex_free(pval, it);
 #ifdef CRYPTO_MDEBUG
    if (it->sname)
        CRYPTO_pop_info();
 #endif
    return 0;

- auxerr2:
-    ASN1_item_ex_free(pval, it);
 auxerr:
    OPENSSL_PUT_ERROR(ASN1, ASN1_R_AUX_ERROR);
+    ASN1_item_ex_free(pval, it);
 #ifdef CRYPTO_MDEBUG
    if (it->sname)
        CRYPTO_pop_info();
@@ -178,7 +178,7 @@ int asn1_enc_save(ASN1_VALUE **pval, const unsigned char *in, int inlen,
    if (!enc->enc) {
      return 0;
    }
-    OPENSSL_memcpy(enc->enc, in, inlen);
+    memcpy(enc->enc, in, inlen);
  }

  enc->len = inlen;
@@ -195,7 +195,7 @@ int asn1_enc_restore(int *len, unsigned char **out, ASN1_VALUE **pval,
    return 0;
  }
  if (out) {
-    OPENSSL_memcpy(*out, enc->enc, enc->len);
+    memcpy(*out, enc->enc, enc->len);
    *out += enc->len;
  }
  if (len) {
@@ -54,66 +54,100 @@
 * copied and put under another distribution licence
 * [including the GNU Public Licence.] */

-#ifndef OPENSSL_HEADER_CIPHER_INTERNAL_H
-#define OPENSSL_HEADER_CIPHER_INTERNAL_H
+#include <openssl/asn1.h>

-#include <openssl/base.h>
+#include <openssl/asn1t.h>
+#include <openssl/bn.h>

-#include <openssl/aead.h>
-#include <openssl/aes.h>
+/*
+ * Custom primitive type for BIGNUM handling. This reads in an ASN1_INTEGER
+ * as a BIGNUM directly. Currently it ignores the sign which isn't a problem
+ * since all BIGNUMs used are non negative and anything that looks negative
+ * is normally due to an encoding error.
+ */

-#include "../../internal.h"
-#include "../modes/internal.h"
+#define BN_SENSITIVE    1

-#if defined(__cplusplus)
-extern "C" {
-#endif
+static int bn_new(ASN1_VALUE **pval, const ASN1_ITEM *it);
+static void bn_free(ASN1_VALUE **pval, const ASN1_ITEM *it);

+static int bn_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype,
+                  const ASN1_ITEM *it);
+static int bn_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
+                  int utype, char *free_cont, const ASN1_ITEM *it);

-/* EVP_CIPH_MODE_MASK contains the bits of |flags| that represent the mode. */
-#define EVP_CIPH_MODE_MASK 0x3f
-
-/* EVP_AEAD represents a specific AEAD algorithm. */
-struct evp_aead_st {
-  uint8_t key_len;
-  uint8_t nonce_len;
-  uint8_t overhead;
-  uint8_t max_tag_len;
-
-  /* init initialises an |EVP_AEAD_CTX|. If this call returns zero then
-   * |cleanup| will not be called for that context. */
-  int (*init)(EVP_AEAD_CTX *, const uint8_t *key, size_t key_len,
-              size_t tag_len);
-  int (*init_with_direction)(EVP_AEAD_CTX *, const uint8_t *key, size_t key_len,
-                             size_t tag_len, enum evp_aead_direction_t dir);
-  void (*cleanup)(EVP_AEAD_CTX *);
-
-  int (*seal)(const EVP_AEAD_CTX *ctx, uint8_t *out, size_t *out_len,
-              size_t max_out_len, const uint8_t *nonce, size_t nonce_len,
-              const uint8_t *in, size_t in_len, const uint8_t *ad,
-              size_t ad_len);
-
-  int (*open)(const EVP_AEAD_CTX *ctx, uint8_t *out, size_t *out_len,
-              size_t max_out_len, const uint8_t *nonce, size_t nonce_len,
-              const uint8_t *in, size_t in_len, const uint8_t *ad,
-              size_t ad_len);
-
-  int (*get_iv)(const EVP_AEAD_CTX *ctx, const uint8_t **out_iv,
-                size_t *out_len);
+static const ASN1_PRIMITIVE_FUNCS bignum_pf = {
+    NULL, 0,
+    bn_new,
+    bn_free,
+    0,
+    bn_c2i,
+    bn_i2c,
+    NULL /* prim_print */ ,
 };

-/* aes_ctr_set_key initialises |*aes_key| using |key_bytes| bytes from |key|,
- * where |key_bytes| must either be 16, 24 or 32. If not NULL, |*out_block| is
- * set to a function that encrypts single blocks. If not NULL, |*gcm_ctx| is
- * initialised to do GHASH with the given key. It returns a function for
- * optimised CTR-mode, or NULL if CTR-mode should be built using
- * |*out_block|. */
-ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx,
-                         block128_f *out_block, const uint8_t *key,
-                         size_t key_bytes);
+ASN1_ITEM_start(BIGNUM)
+        ASN1_ITYPE_PRIMITIVE, V_ASN1_INTEGER, NULL, 0, &bignum_pf, 0, "BIGNUM"
+ASN1_ITEM_end(BIGNUM)

-#if defined(__cplusplus)
-} /* extern C */
-#endif
+ASN1_ITEM_start(CBIGNUM)
+        ASN1_ITYPE_PRIMITIVE, V_ASN1_INTEGER, NULL, 0, &bignum_pf, BN_SENSITIVE, "BIGNUM"
+ASN1_ITEM_end(CBIGNUM)

-#endif /* OPENSSL_HEADER_CIPHER_INTERNAL_H */
+static int bn_new(ASN1_VALUE **pval, const ASN1_ITEM *it)
+{
+    *pval = (ASN1_VALUE *)BN_new();
+    if (*pval)
+        return 1;
+    else
+        return 0;
+}
+
+static void bn_free(ASN1_VALUE **pval, const ASN1_ITEM *it)
+{
+    if (!*pval)
+        return;
+    if (it->size & BN_SENSITIVE)
+        BN_clear_free((BIGNUM *)*pval);
+    else
+        BN_free((BIGNUM *)*pval);
+    *pval = NULL;
+}
+
+static int bn_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype,
+                  const ASN1_ITEM *it)
+{
+    BIGNUM *bn;
+    int pad;
+    if (!*pval)
+        return -1;
+    bn = (BIGNUM *)*pval;
+    /* If MSB set in an octet we need a padding byte */
+    if (BN_num_bits(bn) & 0x7)
+        pad = 0;
+    else
+        pad = 1;
+    if (cont) {
+        if (pad)
+            *cont++ = 0;
+        BN_bn2bin(bn, cont);
+    }
+    return pad + BN_num_bytes(bn);
+}
+
+static int bn_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
+                  int utype, char *free_cont, const ASN1_ITEM *it)
+{
+    BIGNUM *bn;
+    if (!*pval) {
+        if (!bn_new(pval, it)) {
+            return 0;
+        }
+    }
+    bn = (BIGNUM *)*pval;
+    if (!BN_bin2bn(cont, len, bn)) {
+        bn_free(pval, it);
+        return 0;
+    }
+    return 1;
+}
@@ -0,0 +1,197 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <openssl/asn1.h>
+
+#include <string.h>
+
+#include <openssl/asn1t.h>
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/mem.h>
+
+/*
+ * Custom primitive type for long handling. This converts between an
+ * ASN1_INTEGER and a long directly.
+ */
+
+static int long_new(ASN1_VALUE **pval, const ASN1_ITEM *it);
+static void long_free(ASN1_VALUE **pval, const ASN1_ITEM *it);
+
+static int long_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype,
+                    const ASN1_ITEM *it);
+static int long_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
+                    int utype, char *free_cont, const ASN1_ITEM *it);
+static int long_print(BIO *out, ASN1_VALUE **pval, const ASN1_ITEM *it,
+                      int indent, const ASN1_PCTX *pctx);
+
+static const ASN1_PRIMITIVE_FUNCS long_pf = {
+    NULL, 0,
+    long_new,
+    long_free,
+    long_free,                  /* Clear should set to initial value */
+    long_c2i,
+    long_i2c,
+    long_print
+};
+
+ASN1_ITEM_start(LONG)
+        ASN1_ITYPE_PRIMITIVE, V_ASN1_INTEGER, NULL, 0, &long_pf, ASN1_LONG_UNDEF, "LONG"
+ASN1_ITEM_end(LONG)
+
+ASN1_ITEM_start(ZLONG)
+        ASN1_ITYPE_PRIMITIVE, V_ASN1_INTEGER, NULL, 0, &long_pf, 0, "ZLONG"
+ASN1_ITEM_end(ZLONG)
+
+static int long_new(ASN1_VALUE **pval, const ASN1_ITEM *it)
+{
+    *(long *)pval = it->size;
+    return 1;
+}
+
+static void long_free(ASN1_VALUE **pval, const ASN1_ITEM *it)
+{
+    *(long *)pval = it->size;
+}
+
+static int long_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype,
+                    const ASN1_ITEM *it)
+{
+    long ltmp;
+    unsigned long utmp;
+    int clen, pad, i;
+    /* this exists to bypass broken gcc optimization */
+    char *cp = (char *)pval;
+
+    /* use memcpy, because we may not be long aligned */
+    memcpy(&ltmp, cp, sizeof(long));
+
+    if (ltmp == it->size)
+        return -1;
+    /*
+     * Convert the long to positive: we subtract one if negative so we can
+     * cleanly handle the padding if only the MSB of the leading octet is
+     * set.
+     */
+    if (ltmp < 0)
+        utmp = -ltmp - 1;
+    else
+        utmp = ltmp;
+    clen = BN_num_bits_word(utmp);
+    /* If MSB of leading octet set we need to pad */
+    if (!(clen & 0x7))
+        pad = 1;
+    else
+        pad = 0;
+
+    /* Convert number of bits to number of octets */
+    clen = (clen + 7) >> 3;
+
+    if (cont) {
+        if (pad)
+            *cont++ = (ltmp < 0) ? 0xff : 0;
+        for (i = clen - 1; i >= 0; i--) {
+            cont[i] = (unsigned char)(utmp & 0xff);
+            if (ltmp < 0)
+                cont[i] ^= 0xff;
+            utmp >>= 8;
+        }
+    }
+    return clen + pad;
+}
+
+static int long_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
+                    int utype, char *free_cont, const ASN1_ITEM *it)
+{
+    int neg, i;
+    long ltmp;
+    unsigned long utmp = 0;
+    char *cp = (char *)pval;
+    if (len > (int)sizeof(long)) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_INTEGER_TOO_LARGE_FOR_LONG);
+        return 0;
+    }
+    /* Is it negative? */
+    if (len && (cont[0] & 0x80))
+        neg = 1;
+    else
+        neg = 0;
+    utmp = 0;
+    for (i = 0; i < len; i++) {
+        utmp <<= 8;
+        if (neg)
+            utmp |= cont[i] ^ 0xff;
+        else
+            utmp |= cont[i];
+    }
+    ltmp = (long)utmp;
+    if (neg) {
+        ltmp++;
+        ltmp = -ltmp;
+    }
+    if (ltmp == it->size) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_INTEGER_TOO_LARGE_FOR_LONG);
+        return 0;
+    }
+    memcpy(cp, &ltmp, sizeof(long));
+    return 1;
+}
+
+static int long_print(BIO *out, ASN1_VALUE **pval, const ASN1_ITEM *it,
+                      int indent, const ASN1_PCTX *pctx)
+{
+    return BIO_printf(out, "%ld\n", *(long *)pval);
+}
@@ -7,3 +7,14 @@ add_library(

  base64.c
 )
+
+add_executable(
+  base64_test
+
+  base64_test.cc
+
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(base64_test crypto)
+add_dependencies(all_tests base64_test)
@@ -62,41 +62,13 @@

 #include <openssl/type_check.h>

-#include "../internal.h"
-
-
-/* constant_time_lt_args_8 behaves like |constant_time_lt_8| but takes |uint8_t|
- * arguments for a slightly simpler implementation. */
-static inline uint8_t constant_time_lt_args_8(uint8_t a, uint8_t b) {
-  crypto_word_t aw = a;
-  crypto_word_t bw = b;
-  /* |crypto_word_t| is larger than |uint8_t|, so |aw| and |bw| have the same
-   * MSB. |aw| < |bw| iff MSB(|aw| - |bw|) is 1. */
-  return constant_time_msb_w(aw - bw);
-}
-
-/* constant_time_in_range_8 returns |CONSTTIME_TRUE_8| if |min| <= |a| <= |max|
- * and |CONSTTIME_FALSE_8| otherwise. */
-static inline uint8_t constant_time_in_range_8(uint8_t a, uint8_t min,
-                                               uint8_t max) {
-  a -= min;
-  return constant_time_lt_args_8(a, max - min + 1);
-}

 /* Encoding. */

-static uint8_t conv_bin2ascii(uint8_t a) {
-  /* Since PEM is sometimes used to carry private keys, we encode base64 data
-   * itself in constant-time. */
-  a &= 0x3f;
-  uint8_t ret = constant_time_select_8(constant_time_eq_8(a, 62), '+', '/');
-  ret =
-      constant_time_select_8(constant_time_lt_args_8(a, 62), a - 52 + '0', ret);
-  ret =
-      constant_time_select_8(constant_time_lt_args_8(a, 52), a - 26 + 'a', ret);
-  ret = constant_time_select_8(constant_time_lt_args_8(a, 26), a + 'A', ret);
-  return ret;
-}
+static const unsigned char data_bin2ascii[65] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+#define conv_bin2ascii(a) (data_bin2ascii[(a) & 0x3f])

 OPENSSL_COMPILE_ASSERT(sizeof(((EVP_ENCODE_CTX *)(NULL))->data) % 3 == 0,
                       data_length_must_be_multiple_of_base64_chunk_size);
@@ -123,7 +95,7 @@ int EVP_EncodedLength(size_t *out_len, size_t len) {
 }

 void EVP_EncodeInit(EVP_ENCODE_CTX *ctx) {
-  OPENSSL_memset(ctx, 0, sizeof(EVP_ENCODE_CTX));
+  memset(ctx, 0, sizeof(EVP_ENCODE_CTX));
 }

 void EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len,
@@ -138,14 +110,14 @@ void EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len,
  assert(ctx->data_used < sizeof(ctx->data));

  if (sizeof(ctx->data) - ctx->data_used > in_len) {
-    OPENSSL_memcpy(&ctx->data[ctx->data_used], in, in_len);
+    memcpy(&ctx->data[ctx->data_used], in, in_len);
    ctx->data_used += (unsigned)in_len;
    return;
  }

  if (ctx->data_used != 0) {
    const size_t todo = sizeof(ctx->data) - ctx->data_used;
-    OPENSSL_memcpy(&ctx->data[ctx->data_used], in, todo);
+    memcpy(&ctx->data[ctx->data_used], in, todo);
    in += todo;
    in_len -= todo;

@@ -177,7 +149,7 @@ void EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len,
  }

  if (in_len != 0) {
-    OPENSSL_memcpy(ctx->data, in, in_len);
+    memcpy(ctx->data, in, in_len);
  }

  ctx->data_used = (unsigned)in_len;
@@ -252,28 +224,32 @@ int EVP_DecodedLength(size_t *out_len, size_t len) {
 }

 void EVP_DecodeInit(EVP_ENCODE_CTX *ctx) {
-  OPENSSL_memset(ctx, 0, sizeof(EVP_ENCODE_CTX));
+  memset(ctx, 0, sizeof(EVP_ENCODE_CTX));
 }

-static uint8_t base64_ascii_to_bin(uint8_t a) {
-  /* Since PEM is sometimes used to carry private keys, we decode base64 data
-   * itself in constant-time. */
-  const uint8_t is_upper = constant_time_in_range_8(a, 'A', 'Z');
-  const uint8_t is_lower = constant_time_in_range_8(a, 'a', 'z');
-  const uint8_t is_digit = constant_time_in_range_8(a, '0', '9');
-  const uint8_t is_plus = constant_time_eq_8(a, '+');
-  const uint8_t is_slash = constant_time_eq_8(a, '/');
-  const uint8_t is_equals = constant_time_eq_8(a, '=');
+/* kBase64ASCIIToBinData maps characters (c < 128) to their base64 value, or
+ * else 0xff if they are invalid. As a special case, the padding character
+ * ('=') is mapped to zero. */
+static const uint8_t kBase64ASCIIToBinData[128] = {
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
+    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0xff, 0xff,
+    0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+    0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12,
+    0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24,
+    0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
+};

-  uint8_t ret = 0xff; /* 0xff signals invalid. */
-  ret = constant_time_select_8(is_upper, a - 'A', ret);      /* [0,26) */
-  ret = constant_time_select_8(is_lower, a - 'a' + 26, ret); /* [26,52) */
-  ret = constant_time_select_8(is_digit, a - '0' + 52, ret); /* [52,62) */
-  ret = constant_time_select_8(is_plus, 62, ret);
-  ret = constant_time_select_8(is_slash, 63, ret);
-  /* Padding maps to zero, to be further handled by the caller. */
-  ret = constant_time_select_8(is_equals, 0, ret);
-  return ret;
+static uint8_t base64_ascii_to_bin(uint8_t a) {
+  if (a >= 128) {
+    return 0xFF;
+  }
+
+  return kBase64ASCIIToBinData[a];
 }

 /* base64_decode_quad decodes a single “quad” (i.e. four characters) of base64
@@ -344,7 +320,7 @@ int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len,
        continue;
    }

-    if (ctx->eof_seen) {
+    if (base64_ascii_to_bin(c) == 0xff || ctx->eof_seen) {
      ctx->error_encountered = 1;
      return -1;
    }
@@ -18,14 +18,11 @@
 #include <string>
 #include <vector>

-#include <gtest/gtest.h>
-
 #include <openssl/base64.h>
 #include <openssl/crypto.h>
 #include <openssl/err.h>

 #include "../internal.h"
-#include "../test/test_util.h"


 enum encoding_relation {
@@ -103,9 +100,7 @@ static const TestVector kTestVectors[] = {
     "=======\n"},
 };

-class Base64Test : public testing::TestWithParam<TestVector> {};
-
-INSTANTIATE_TEST_CASE_P(, Base64Test, testing::ValuesIn(kTestVectors));
+static const size_t kNumTests = OPENSSL_ARRAY_SIZE(kTestVectors);

 // RemoveNewlines returns a copy of |in| with all '\n' characters removed.
 static std::string RemoveNewlines(const char *in) {
@@ -121,187 +116,279 @@ static std::string RemoveNewlines(const char *in) {
  return ret;
 }

-TEST_P(Base64Test, EncodeBlock) {
-  const TestVector &t = GetParam();
-  if (t.relation != canonical) {
-    return;
-  }
+static bool TestEncodeBlock() {
+  for (unsigned i = 0; i < kNumTests; i++) {
+    const TestVector *t = &kTestVectors[i];
+    if (t->relation != canonical) {
+      continue;
+    }

-  const size_t decoded_len = strlen(t.decoded);
-  size_t max_encoded_len;
-  ASSERT_TRUE(EVP_EncodedLength(&max_encoded_len, decoded_len));
-
-  std::vector<uint8_t> out_vec(max_encoded_len);
-  uint8_t *out = out_vec.data();
-  size_t len = EVP_EncodeBlock(out, (const uint8_t *)t.decoded, decoded_len);
-
-  std::string encoded(RemoveNewlines(t.encoded));
-  EXPECT_EQ(Bytes(encoded), Bytes(out, len));
-}
-
-TEST_P(Base64Test, DecodeBase64) {
-  const TestVector &t = GetParam();
-  if (t.relation == valid) {
-    // The non-canonical encodings will generally have odd whitespace etc
-    // that |EVP_DecodeBase64| will reject.
-    return;
-  }
-
-  const std::string encoded(RemoveNewlines(t.encoded));
-  std::vector<uint8_t> out_vec(encoded.size());
-  uint8_t *out = out_vec.data();
-
-  size_t len;
-  int ok = EVP_DecodeBase64(out, &len, out_vec.size(),
-                            (const uint8_t *)encoded.data(), encoded.size());
-
-  if (t.relation == invalid) {
-    EXPECT_FALSE(ok);
-  } else if (t.relation == canonical) {
-    ASSERT_TRUE(ok);
-    EXPECT_EQ(Bytes(t.decoded), Bytes(out, len));
-  }
-}
-
-TEST_P(Base64Test, DecodeBlock) {
-  const TestVector &t = GetParam();
-  if (t.relation != canonical) {
-    return;
-  }
-
-  std::string encoded(RemoveNewlines(t.encoded));
-
-  std::vector<uint8_t> out_vec(encoded.size());
-  uint8_t *out = out_vec.data();
-
-  // Test that the padding behavior of the deprecated API is preserved.
-  int ret =
-      EVP_DecodeBlock(out, (const uint8_t *)encoded.data(), encoded.size());
-  ASSERT_GE(ret, 0);
-  // EVP_DecodeBlock should ignore padding.
-  ASSERT_EQ(0, ret % 3);
-  size_t expected_len = strlen(t.decoded);
-  if (expected_len % 3 != 0) {
-    ret -= 3 - (expected_len % 3);
-  }
-  EXPECT_EQ(Bytes(t.decoded), Bytes(out, static_cast<size_t>(ret)));
-}
-
-TEST_P(Base64Test, EncodeDecode) {
-  const TestVector &t = GetParam();
-
-  EVP_ENCODE_CTX ctx;
-  const size_t decoded_len = strlen(t.decoded);
-
-  if (t.relation == canonical) {
+    const size_t decoded_len = strlen(t->decoded);
    size_t max_encoded_len;
-    ASSERT_TRUE(EVP_EncodedLength(&max_encoded_len, decoded_len));
+    if (!EVP_EncodedLength(&max_encoded_len, decoded_len)) {
+      fprintf(stderr, "#%u: EVP_EncodedLength failed\n", i);
+      return false;
+    }

-    // EVP_EncodeUpdate will output new lines every 64 bytes of output so we
-    // need slightly more than |EVP_EncodedLength| returns. */
-    max_encoded_len += (max_encoded_len + 63) >> 6;
    std::vector<uint8_t> out_vec(max_encoded_len);
    uint8_t *out = out_vec.data();
+    size_t len = EVP_EncodeBlock(out, (const uint8_t *)t->decoded, decoded_len);

-    EVP_EncodeInit(&ctx);
-
-    int out_len;
-    EVP_EncodeUpdate(&ctx, out, &out_len,
-                     reinterpret_cast<const uint8_t *>(t.decoded),
-                     decoded_len);
-    size_t total = out_len;
-
-    EVP_EncodeFinal(&ctx, out + total, &out_len);
-    total += out_len;
-
-    EXPECT_EQ(Bytes(t.encoded), Bytes(out, total));
+    std::string encoded(RemoveNewlines(t->encoded));
+    if (len != encoded.size() ||
+        memcmp(out, encoded.data(), len) != 0) {
+      fprintf(stderr, "encode(\"%s\") = \"%.*s\", want \"%s\"\n",
+              t->decoded, (int)len, (const char*)out, encoded.c_str());
+      return false;
+    }
  }

-  std::vector<uint8_t> out_vec(strlen(t.encoded));
-  uint8_t *out = out_vec.data();
-
-  EVP_DecodeInit(&ctx);
-  int out_len;
-  size_t total = 0;
-  int ret = EVP_DecodeUpdate(&ctx, out, &out_len,
-                             reinterpret_cast<const uint8_t *>(t.encoded),
-                             strlen(t.encoded));
-  if (ret != -1) {
-    total = out_len;
-    ret = EVP_DecodeFinal(&ctx, out + total, &out_len);
-    total += out_len;
-  }
-
-  switch (t.relation) {
-    case canonical:
-    case valid:
-      ASSERT_NE(-1, ret);
-      EXPECT_EQ(Bytes(t.decoded), Bytes(out, total));
-      break;
-
-    case invalid:
-      EXPECT_EQ(-1, ret);
-      break;
-  }
+  return true;
 }

-TEST_P(Base64Test, DecodeUpdateStreaming) {
-  const TestVector &t = GetParam();
-  if (t.relation == invalid) {
-    return;
-  }
+static bool TestDecodeBase64() {
+  size_t len;

-  const size_t encoded_len = strlen(t.encoded);
+  for (unsigned i = 0; i < kNumTests; i++) {
+    const TestVector *t = &kTestVectors[i];

-  std::vector<uint8_t> out(encoded_len);
+    if (t->relation == valid) {
+      // The non-canonical encodings will generally have odd whitespace etc
+      // that |EVP_DecodeBase64| will reject.
+      continue;
+    }

-  for (size_t chunk_size = 1; chunk_size <= encoded_len; chunk_size++) {
-    SCOPED_TRACE(chunk_size);
-    size_t out_len = 0;
-    EVP_ENCODE_CTX ctx;
-    EVP_DecodeInit(&ctx);
+    const std::string encoded(RemoveNewlines(t->encoded));
+    std::vector<uint8_t> out_vec(encoded.size());
+    uint8_t *out = out_vec.data();

-    for (size_t i = 0; i < encoded_len;) {
-      size_t todo = encoded_len - i;
-      if (todo > chunk_size) {
-        todo = chunk_size;
+    int ok = EVP_DecodeBase64(out, &len, out_vec.size(),
+                              (const uint8_t *)encoded.data(), encoded.size());
+
+    if (t->relation == invalid) {
+      if (ok) {
+        fprintf(stderr, "decode(\"%s\") didn't fail but should have\n",
+                encoded.c_str());
+        return false;
+      }
+    } else if (t->relation == canonical) {
+      if (!ok) {
+        fprintf(stderr, "decode(\"%s\") failed\n", encoded.c_str());
+        return false;
      }

-      int bytes_written;
-      int ret = EVP_DecodeUpdate(
-          &ctx, out.data() + out_len, &bytes_written,
-          reinterpret_cast<const uint8_t *>(t.encoded + i), todo);
-      i += todo;
+      if (len != strlen(t->decoded) ||
+          memcmp(out, t->decoded, len) != 0) {
+        fprintf(stderr, "decode(\"%s\") = \"%.*s\", want \"%s\"\n",
+                encoded.c_str(), (int)len, (const char*)out, t->decoded);
+        return false;
+      }
+    }
+  }

-      switch (ret) {
-        case -1:
-          FAIL() << "EVP_DecodeUpdate failed";
-        case 0:
-          out_len += bytes_written;
-          if (i == encoded_len ||
-              (i + 1 == encoded_len && t.encoded[i] == '\n') ||
-              /* If there was an '-' in the input (which means “EOF”) then
-               * this loop will continue to test that |EVP_DecodeUpdate| will
-               * ignore the remainder of the input. */
-              strchr(t.encoded, '-') != nullptr) {
-            break;
-          }
+  return true;
+}

-          FAIL()
-              << "EVP_DecodeUpdate returned zero before end of encoded data.";
-        case 1:
-          out_len += bytes_written;
-          break;
-        default:
-          FAIL() << "Invalid return value " << ret;
+static bool TestDecodeBlock() {
+  for (unsigned i = 0; i < kNumTests; i++) {
+    const TestVector *t = &kTestVectors[i];
+    if (t->relation != canonical) {
+      continue;
+    }
+
+    std::string encoded(RemoveNewlines(t->encoded));
+
+    std::vector<uint8_t> out_vec(encoded.size());
+    uint8_t *out = out_vec.data();
+
+    // Test that the padding behavior of the deprecated API is preserved.
+    int ret =
+        EVP_DecodeBlock(out, (const uint8_t *)encoded.data(), encoded.size());
+    if (ret < 0) {
+      fprintf(stderr, "EVP_DecodeBlock(\"%s\") failed\n", t->encoded);
+      return false;
+    }
+    if (ret % 3 != 0) {
+      fprintf(stderr, "EVP_DecodeBlock did not ignore padding\n");
+      return false;
+    }
+    size_t expected_len = strlen(t->decoded);
+    if (expected_len % 3 != 0) {
+      ret -= 3 - (expected_len % 3);
+    }
+    if (static_cast<size_t>(ret) != strlen(t->decoded) ||
+        memcmp(out, t->decoded, ret) != 0) {
+      fprintf(stderr, "decode(\"%s\") = \"%.*s\", want \"%s\"\n",
+              t->encoded, ret, (const char*)out, t->decoded);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static bool TestEncodeDecode() {
+  for (unsigned test_num = 0; test_num < kNumTests; test_num++) {
+    const TestVector *t = &kTestVectors[test_num];
+
+    EVP_ENCODE_CTX ctx;
+    const size_t decoded_len = strlen(t->decoded);
+
+    if (t->relation == canonical) {
+      size_t max_encoded_len;
+      if (!EVP_EncodedLength(&max_encoded_len, decoded_len)) {
+        fprintf(stderr, "#%u: EVP_EncodedLength failed\n", test_num);
+        return false;
+      }
+
+      // EVP_EncodeUpdate will output new lines every 64 bytes of output so we
+      // need slightly more than |EVP_EncodedLength| returns. */
+      max_encoded_len += (max_encoded_len + 63) >> 6;
+      std::vector<uint8_t> out_vec(max_encoded_len);
+      uint8_t *out = out_vec.data();
+
+      EVP_EncodeInit(&ctx);
+
+      int out_len;
+      EVP_EncodeUpdate(&ctx, out, &out_len,
+                       reinterpret_cast<const uint8_t *>(t->decoded),
+                       decoded_len);
+      size_t total = out_len;
+
+      EVP_EncodeFinal(&ctx, out + total, &out_len);
+      total += out_len;
+
+      if (total != strlen(t->encoded) || memcmp(out, t->encoded, total) != 0) {
+        fprintf(stderr, "#%u: EVP_EncodeUpdate produced different output: '%s' (%u)\n",
+                test_num, out, static_cast<unsigned>(total));
+        return false;
      }
    }

-    int bytes_written;
-    int ret = EVP_DecodeFinal(&ctx, out.data() + out_len, &bytes_written);
-    ASSERT_NE(ret, -1);
-    out_len += bytes_written;
+    std::vector<uint8_t> out_vec(strlen(t->encoded));
+    uint8_t *out = out_vec.data();

-    EXPECT_EQ(Bytes(t.decoded), Bytes(out.data(), out_len));
+    EVP_DecodeInit(&ctx);
+    int out_len;
+    size_t total = 0;
+    int ret = EVP_DecodeUpdate(&ctx, out, &out_len,
+                               reinterpret_cast<const uint8_t *>(t->encoded),
+                               strlen(t->encoded));
+    if (ret != -1) {
+      total = out_len;
+      ret = EVP_DecodeFinal(&ctx, out + total, &out_len);
+      total += out_len;
+    }
+
+    switch (t->relation) {
+      case canonical:
+      case valid:
+        if (ret == -1) {
+          fprintf(stderr, "#%u: EVP_DecodeUpdate failed\n", test_num);
+          return false;
+        }
+        if (total != decoded_len || memcmp(out, t->decoded, decoded_len)) {
+          fprintf(stderr, "#%u: EVP_DecodeUpdate produced incorrect output\n",
+                  test_num);
+          return false;
+        }
+        break;
+
+      case invalid:
+        if (ret != -1) {
+          fprintf(stderr, "#%u: EVP_DecodeUpdate was successful but shouldn't have been\n", test_num);
+          return false;
+        }
+        break;
+    }
  }
+
+  return true;
+}
+
+static bool TestDecodeUpdateStreaming() {
+  for (unsigned test_num = 0; test_num < kNumTests; test_num++) {
+    const TestVector *t = &kTestVectors[test_num];
+    if (t->relation == invalid) {
+      continue;
+    }
+
+    const size_t encoded_len = strlen(t->encoded);
+
+    std::vector<uint8_t> out(encoded_len);
+
+    for (size_t chunk_size = 1; chunk_size <= encoded_len; chunk_size++) {
+      size_t out_len = 0;
+      EVP_ENCODE_CTX ctx;
+      EVP_DecodeInit(&ctx);
+
+      for (size_t i = 0; i < encoded_len;) {
+        size_t todo = encoded_len - i;
+        if (todo > chunk_size) {
+          todo = chunk_size;
+        }
+
+        int bytes_written;
+        int ret = EVP_DecodeUpdate(
+            &ctx, out.data() + out_len, &bytes_written,
+            reinterpret_cast<const uint8_t *>(t->encoded + i), todo);
+        i += todo;
+
+        switch (ret) {
+          case -1:
+            fprintf(stderr, "#%u: EVP_DecodeUpdate returned error\n", test_num);
+            return 0;
+          case 0:
+            out_len += bytes_written;
+            if (i == encoded_len ||
+                (i + 1 == encoded_len && t->encoded[i] == '\n') ||
+                /* If there was an '-' in the input (which means “EOF”) then
+                 * this loop will continue to test that |EVP_DecodeUpdate| will
+                 * ignore the remainder of the input. */
+                strchr(t->encoded, '-') != nullptr) {
+              break;
+            }
+
+            fprintf(stderr,
+                    "#%u: EVP_DecodeUpdate returned zero before end of "
+                    "encoded data\n",
+                    test_num);
+            return 0;
+          default:
+            out_len += bytes_written;
+        }
+      }
+
+      int bytes_written;
+      int ret = EVP_DecodeFinal(&ctx, out.data() + out_len, &bytes_written);
+      if (ret == -1) {
+        fprintf(stderr, "#%u: EVP_DecodeFinal returned error\n", test_num);
+        return 0;
+      }
+      out_len += bytes_written;
+
+      if (out_len != strlen(t->decoded) ||
+          memcmp(out.data(), t->decoded, out_len) != 0) {
+        fprintf(stderr, "#%u: incorrect output\n", test_num);
+        return 0;
+      }
+    }
+  }
+
+  return true;
+}
+
+int main(void) {
+  CRYPTO_library_init();
+
+  if (!TestEncodeBlock() ||
+      !TestDecodeBase64() ||
+      !TestDecodeBlock() ||
+      !TestDecodeUpdateStreaming() ||
+      !TestEncodeDecode()) {
+    return 1;
+  }
+
+  printf("PASS\n");
+  return 0;
 }
@@ -7,6 +7,7 @@ add_library(

  bio.c
  bio_mem.c
+  buffer.c
  connect.c
  fd.c
  file.c
@@ -16,3 +17,17 @@ add_library(
  socket.c
  socket_helper.c
 )
+
+add_executable(
+  bio_test
+
+  bio_test.cc
+
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(bio_test crypto)
+if (WIN32)
+  target_link_libraries(bio_test ws2_32)
+endif()
+add_dependencies(all_tests bio_test)
@@ -75,7 +75,7 @@ BIO *BIO_new(const BIO_METHOD *method) {
    return NULL;
  }

-  OPENSSL_memset(ret, 0, sizeof(BIO));
+  memset(ret, 0, sizeof(BIO));
  ret->method = method;
  ret->shutdown = 1;
  ret->references = 1;
@@ -488,7 +488,7 @@ static int bio_read_all(BIO *bio, uint8_t **out, size_t *out_len,
  if (*out == NULL) {
    return 0;
  }
-  OPENSSL_memcpy(*out, prefix, prefix_len);
+  memcpy(*out, prefix, prefix_len);
  size_t done = prefix_len;

  for (;;) {
@@ -595,7 +595,7 @@ int BIO_read_asn1(BIO *bio, uint8_t **out, size_t *out_len, size_t max_len) {
  if (*out == NULL) {
    return 0;
  }
-  OPENSSL_memcpy(*out, header, header_len);
+  memcpy(*out, header, header_len);
  if (BIO_read(bio, (*out) + header_len, len - header_len) !=
      (int) (len - header_len)) {
    OPENSSL_free(*out);
@@ -604,9 +604,3 @@ int BIO_read_asn1(BIO *bio, uint8_t **out, size_t *out_len, size_t max_len) {

  return 1;
 }
-
-void BIO_set_retry_special(BIO *bio) {
-  bio->flags |= BIO_FLAGS_READ | BIO_FLAGS_IO_SPECIAL;
-}
-
-int BIO_set_write_buffer_size(BIO *bio, int buffer_size) { return 0; }
@@ -63,8 +63,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../internal.h"
-

 BIO *BIO_new_mem_buf(const void *buf, int len) {
  BIO *ret;
@@ -146,12 +144,12 @@ static int mem_read(BIO *bio, char *out, int outl) {
  }

  if (ret > 0) {
-    OPENSSL_memcpy(out, b->data, ret);
+    memcpy(out, b->data, ret);
    b->length -= ret;
    if (bio->flags & BIO_FLAGS_MEM_RDONLY) {
      b->data += ret;
    } else {
-      OPENSSL_memmove(b->data, &b->data[ret], b->length);
+      memmove(b->data, &b->data[ret], b->length);
    }
  } else if (b->length == 0) {
    ret = bio->num;
@@ -182,13 +180,17 @@ static int mem_write(BIO *bio, const char *in, int inl) {
  if (BUF_MEM_grow_clean(b, blen + inl) != ((size_t) blen) + inl) {
    goto err;
  }
-  OPENSSL_memcpy(&b->data[blen], in, inl);
+  memcpy(&b->data[blen], in, inl);
  ret = inl;

 err:
  return ret;
 }

+static int mem_puts(BIO *bp, const char *str) {
+  return mem_write(bp, str, strlen(str));
+}
+
 static int mem_gets(BIO *bio, char *buf, int size) {
  int i, j;
  char *p;
@@ -238,7 +240,7 @@ static long mem_ctrl(BIO *bio, int cmd, long num, void *ptr) {
          b->data -= b->max - b->length;
          b->length = b->max;
        } else {
-          OPENSSL_memset(b->data, 0, b->max);
+          memset(b->data, 0, b->max);
          b->length = 0;
        }
      }
@@ -291,12 +293,8 @@ static long mem_ctrl(BIO *bio, int cmd, long num, void *ptr) {
 }

 static const BIO_METHOD mem_method = {
-    BIO_TYPE_MEM,    "memory buffer",
-    mem_write,       mem_read,
-    NULL /* puts */, mem_gets,
-    mem_ctrl,        mem_new,
-    mem_free,        NULL /* callback_ctrl */,
-};
+    BIO_TYPE_MEM, "memory buffer", mem_write, mem_read, mem_puts,
+    mem_gets,     mem_ctrl,        mem_new,   mem_free, NULL, };

 const BIO_METHOD *BIO_s_mem(void) { return &mem_method; }

@@ -16,18 +16,7 @@
 #define _POSIX_C_SOURCE 201410L
 #endif

-#include <algorithm>
-#include <string>
-
-#include <gtest/gtest.h>
-
-#include <openssl/bio.h>
-#include <openssl/crypto.h>
-#include <openssl/err.h>
-#include <openssl/mem.h>
-
-#include "../internal.h"
-#include "../test/test_util.h"
+#include <openssl/base.h>

 #if !defined(OPENSSL_WINDOWS)
 #include <arpa/inet.h>
@@ -44,15 +33,27 @@ OPENSSL_MSVC_PRAGMA(warning(push, 3))
 OPENSSL_MSVC_PRAGMA(warning(pop))
 #endif

+#include <openssl/bio.h>
+#include <openssl/crypto.h>
+#include <openssl/err.h>
+#include <openssl/mem.h>
+
+#include <algorithm>
+
+#include "../internal.h"
+

 #if !defined(OPENSSL_WINDOWS)
-static int closesocket(int sock) { return close(sock); }
-static std::string LastSocketError() { return strerror(errno); }
+static int closesocket(int sock) {
+  return close(sock);
+}
+
+static void PrintSocketError(const char *func) {
+  perror(func);
+}
 #else
-static std::string LastSocketError() {
-  char buf[DECIMAL_SIZE(int) + 1];
-  BIO_snprintf(buf, sizeof(buf), "%d", WSAGetLastError());
-  return buf;
+static void PrintSocketError(const char *func) {
+  fprintf(stderr, "%s: %d\n", func, WSAGetLastError());
 }
 #endif

@@ -67,246 +68,372 @@ class ScopedSocket {
  const int sock_;
 };

-TEST(BIOTest, SocketConnect) {
+static bool TestSocketConnect() {
  static const char kTestMessage[] = "test";

-  // Set up a listening socket on localhost.
  int listening_sock = socket(AF_INET, SOCK_STREAM, 0);
-  ASSERT_NE(-1, listening_sock) << LastSocketError();
+  if (listening_sock == -1) {
+    PrintSocketError("socket");
+    return false;
+  }
  ScopedSocket listening_sock_closer(listening_sock);

  struct sockaddr_in sin;
-  OPENSSL_memset(&sin, 0, sizeof(sin));
+  memset(&sin, 0, sizeof(sin));
  sin.sin_family = AF_INET;
-  ASSERT_EQ(1, inet_pton(AF_INET, "127.0.0.1", &sin.sin_addr))
-      << LastSocketError();
-  ASSERT_EQ(0, bind(listening_sock, (struct sockaddr *)&sin, sizeof(sin)))
-      << LastSocketError();
-  ASSERT_EQ(0, listen(listening_sock, 1)) << LastSocketError();
+  if (!inet_pton(AF_INET, "127.0.0.1", &sin.sin_addr)) {
+    PrintSocketError("inet_pton");
+    return false;
+  }
+  if (bind(listening_sock, (struct sockaddr *)&sin, sizeof(sin)) != 0) {
+    PrintSocketError("bind");
+    return false;
+  }
+  if (listen(listening_sock, 1)) {
+    PrintSocketError("listen");
+    return false;
+  }
  socklen_t sockaddr_len = sizeof(sin);
-  ASSERT_EQ(0,
-            getsockname(listening_sock, (struct sockaddr *)&sin, &sockaddr_len))
-      << LastSocketError();
-  // The Android NDK, contrary to POSIX, makes |socklen_t| signed.
-  ASSERT_EQ(sizeof(sin), static_cast<size_t>(sockaddr_len));
+  if (getsockname(listening_sock, (struct sockaddr *)&sin, &sockaddr_len) ||
+      sockaddr_len != sizeof(sin)) {
+    PrintSocketError("getsockname");
+    return false;
+  }

-  // Connect to it with a connect BIO.
  char hostname[80];
  BIO_snprintf(hostname, sizeof(hostname), "%s:%d", "127.0.0.1",
               ntohs(sin.sin_port));
  bssl::UniquePtr<BIO> bio(BIO_new_connect(hostname));
-  ASSERT_TRUE(bio);
+  if (!bio) {
+    fprintf(stderr, "BIO_new_connect failed.\n");
+    return false;
+  }

-  // Write a test message to the BIO.
-  ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
-            BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)));
+  if (BIO_write(bio.get(), kTestMessage, sizeof(kTestMessage)) !=
+      sizeof(kTestMessage)) {
+    fprintf(stderr, "BIO_write failed.\n");
+    ERR_print_errors_fp(stderr);
+    return false;
+  }

-  // Accept the socket.
  int sock = accept(listening_sock, (struct sockaddr *) &sin, &sockaddr_len);
-  ASSERT_NE(-1, sock) << LastSocketError();
+  if (sock == -1) {
+    PrintSocketError("accept");
+    return false;
+  }
  ScopedSocket sock_closer(sock);

-  // Check the same message is read back out.
-  char buf[sizeof(kTestMessage)];
-  ASSERT_EQ(static_cast<int>(sizeof(kTestMessage)),
-            recv(sock, buf, sizeof(buf), 0))
-      << LastSocketError();
-  EXPECT_EQ(Bytes(kTestMessage, sizeof(kTestMessage)), Bytes(buf, sizeof(buf)));
+  char buf[5];
+  if (recv(sock, buf, sizeof(buf), 0) != sizeof(kTestMessage)) {
+    PrintSocketError("read");
+    return false;
+  }
+  if (memcmp(buf, kTestMessage, sizeof(kTestMessage))) {
+    return false;
+  }
+
+  return true;
 }

-TEST(BIOTest, Printf) {
+
+// BioReadZeroCopyWrapper is a wrapper around the zero-copy APIs to make
+// testing easier.
+static size_t BioReadZeroCopyWrapper(BIO *bio, uint8_t *data, size_t len) {
+  uint8_t *read_buf;
+  size_t read_buf_offset;
+  size_t available_bytes;
+  size_t len_read = 0;
+
+  do {
+    if (!BIO_zero_copy_get_read_buf(bio, &read_buf, &read_buf_offset,
+                                    &available_bytes)) {
+      return 0;
+    }
+
+    available_bytes = std::min(available_bytes, len - len_read);
+    memmove(data + len_read, read_buf + read_buf_offset, available_bytes);
+
+    BIO_zero_copy_get_read_buf_done(bio, available_bytes);
+
+    len_read += available_bytes;
+  } while (len - len_read > 0 && available_bytes > 0);
+
+  return len_read;
+}
+
+// BioWriteZeroCopyWrapper is a wrapper around the zero-copy APIs to make
+// testing easier.
+static size_t BioWriteZeroCopyWrapper(BIO *bio, const uint8_t *data,
+                                      size_t len) {
+  uint8_t *write_buf;
+  size_t write_buf_offset;
+  size_t available_bytes;
+  size_t len_written = 0;
+
+  do {
+    if (!BIO_zero_copy_get_write_buf(bio, &write_buf, &write_buf_offset,
+                                     &available_bytes)) {
+      return 0;
+    }
+
+    available_bytes = std::min(available_bytes, len - len_written);
+    memmove(write_buf + write_buf_offset, data + len_written, available_bytes);
+
+    BIO_zero_copy_get_write_buf_done(bio, available_bytes);
+
+    len_written += available_bytes;
+  } while (len - len_written > 0 && available_bytes > 0);
+
+  return len_written;
+}
+
+static bool TestZeroCopyBioPairs() {
+  // Test read and write, especially triggering the ring buffer wrap-around.
+  uint8_t bio1_application_send_buffer[1024];
+  uint8_t bio2_application_recv_buffer[1024];
+
+  const size_t kLengths[] = {254, 255, 256, 257, 510, 511, 512, 513};
+
+  // These trigger ring buffer wrap around.
+  const size_t kPartialLengths[] = {0, 1, 2, 3, 128, 255, 256, 257, 511, 512};
+
+  static const size_t kBufferSize = 512;
+
+  srand(1);
+  for (size_t i = 0; i < sizeof(bio1_application_send_buffer); i++) {
+    bio1_application_send_buffer[i] = rand() & 255;
+  }
+
+  // Transfer bytes from bio1_application_send_buffer to
+  // bio2_application_recv_buffer in various ways.
+  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kLengths); i++) {
+    for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(kPartialLengths); j++) {
+      size_t total_write = 0;
+      size_t total_read = 0;
+
+      BIO *bio1, *bio2;
+      if (!BIO_new_bio_pair(&bio1, kBufferSize, &bio2, kBufferSize)) {
+        return false;
+      }
+      bssl::UniquePtr<BIO> bio1_scoper(bio1);
+      bssl::UniquePtr<BIO> bio2_scoper(bio2);
+
+      total_write += BioWriteZeroCopyWrapper(
+          bio1, bio1_application_send_buffer, kLengths[i]);
+
+      // This tests interleaved read/write calls. Do a read between zero copy
+      // write calls.
+      uint8_t *write_buf;
+      size_t write_buf_offset;
+      size_t available_bytes;
+      if (!BIO_zero_copy_get_write_buf(bio1, &write_buf, &write_buf_offset,
+                                       &available_bytes)) {
+        return false;
+      }
+
+      // Free kPartialLengths[j] bytes in the beginning of bio1 write buffer.
+      // This enables ring buffer wrap around for the next write.
+      total_read += BIO_read(bio2, bio2_application_recv_buffer + total_read,
+                             kPartialLengths[j]);
+
+      size_t interleaved_write_len = std::min(kPartialLengths[j],
+                                              available_bytes);
+
+      // Write the data for the interleaved write call. If the buffer becomes
+      // empty after a read, the write offset is normally set to 0. Check that
+      // this does not happen for interleaved read/write and that
+      // |write_buf_offset| is still valid.
+      memcpy(write_buf + write_buf_offset,
+             bio1_application_send_buffer + total_write, interleaved_write_len);
+      if (BIO_zero_copy_get_write_buf_done(bio1, interleaved_write_len)) {
+        total_write += interleaved_write_len;
+      }
+
+      // Do another write in case |write_buf_offset| was wrapped.
+      total_write += BioWriteZeroCopyWrapper(
+          bio1, bio1_application_send_buffer + total_write,
+          kPartialLengths[j] - interleaved_write_len);
+
+      // Drain the rest.
+      size_t bytes_left = BIO_pending(bio2);
+      total_read += BioReadZeroCopyWrapper(
+          bio2, bio2_application_recv_buffer + total_read, bytes_left);
+
+      if (total_read != total_write) {
+        fprintf(stderr, "Lengths not equal in round (%u, %u)\n", (unsigned)i,
+                (unsigned)j);
+        return false;
+      }
+      if (total_read > kLengths[i] + kPartialLengths[j]) {
+        fprintf(stderr, "Bad lengths in round (%u, %u)\n", (unsigned)i,
+                (unsigned)j);
+        return false;
+      }
+      if (memcmp(bio1_application_send_buffer, bio2_application_recv_buffer,
+                 total_read) != 0) {
+        fprintf(stderr, "Buffers not equal in round (%u, %u)\n", (unsigned)i,
+                (unsigned)j);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool TestPrintf() {
  // Test a short output, a very long one, and various sizes around
  // 256 (the size of the buffer) to ensure edge cases are correct.
-  static const size_t kLengths[] = {5, 250, 251, 252, 253, 254, 1023};
+  static const size_t kLengths[] = { 5, 250, 251, 252, 253, 254, 1023 };

  bssl::UniquePtr<BIO> bio(BIO_new(BIO_s_mem()));
-  ASSERT_TRUE(bio);
+  if (!bio) {
+    fprintf(stderr, "BIO_new failed\n");
+    return false;
+  }

-  for (size_t length : kLengths) {
-    SCOPED_TRACE(length);
-
-    std::string in(length, 'a');
-
-    int ret = BIO_printf(bio.get(), "test %s", in.c_str());
-    ASSERT_GE(ret, 0);
-    EXPECT_EQ(5 + length, static_cast<size_t>(ret));
+  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kLengths); i++) {
+    char string[1024];
+    if (kLengths[i] >= sizeof(string)) {
+      fprintf(stderr, "Bad test string length\n");
+      return false;
+    }
+    memset(string, 'a', sizeof(string));
+    string[kLengths[i]] = '\0';

+    int ret = BIO_printf(bio.get(), "test %s", string);
+    if (ret < 0 || static_cast<size_t>(ret) != 5 + kLengths[i]) {
+      fprintf(stderr, "BIO_printf failed: %d\n", ret);
+      return false;
+    }
    const uint8_t *contents;
    size_t len;
-    ASSERT_TRUE(BIO_mem_contents(bio.get(), &contents, &len));
-    EXPECT_EQ("test " + in,
-              std::string(reinterpret_cast<const char *>(contents), len));
+    if (!BIO_mem_contents(bio.get(), &contents, &len)) {
+      fprintf(stderr, "BIO_mem_contents failed\n");
+      return false;
+    }
+    if (len != 5 + kLengths[i] ||
+        strncmp((const char *)contents, "test ", 5) != 0 ||
+        strncmp((const char *)contents + 5, string, kLengths[i]) != 0) {
+      fprintf(stderr, "Contents did not match: %.*s\n", (int)len, contents);
+      return false;
+    }

-    ASSERT_TRUE(BIO_reset(bio.get()));
+    if (!BIO_reset(bio.get())) {
+      fprintf(stderr, "BIO_reset failed\n");
+      return false;
+    }
  }
+
+  return true;
 }

-static const size_t kLargeASN1PayloadLen = 8000;
-
-struct ASN1TestParam {
-  bool should_succeed;
-  std::vector<uint8_t> input;
-  // suffix_len is the number of zeros to append to |input|.
-  size_t suffix_len;
-  // expected_len, if |should_succeed| is true, is the expected length of the
-  // ASN.1 element.
-  size_t expected_len;
-  size_t max_len;
-} kASN1TestParams[] = {
-    {true, {0x30, 2, 1, 2, 0, 0}, 0, 4, 100},
-    {false /* truncated */, {0x30, 3, 1, 2}, 0, 0, 100},
-    {false /* should be short len */, {0x30, 0x81, 1, 1}, 0, 0, 100},
-    {false /* zero padded */, {0x30, 0x82, 0, 1, 1}, 0, 0, 100},
-
-    // Test a large payload.
-    {true,
-     {0x30, 0x82, kLargeASN1PayloadLen >> 8, kLargeASN1PayloadLen & 0xff},
-     kLargeASN1PayloadLen,
-     4 + kLargeASN1PayloadLen,
-     kLargeASN1PayloadLen * 2},
-    {false /* max_len too short */,
-     {0x30, 0x82, kLargeASN1PayloadLen >> 8, kLargeASN1PayloadLen & 0xff},
-     kLargeASN1PayloadLen,
-     4 + kLargeASN1PayloadLen,
-     3 + kLargeASN1PayloadLen},
-
-    // Test an indefinite-length input.
-    {true,
-     {0x30, 0x80},
-     kLargeASN1PayloadLen + 2,
-     2 + kLargeASN1PayloadLen + 2,
-     kLargeASN1PayloadLen * 2},
-    {false /* max_len too short */,
-     {0x30, 0x80},
-     kLargeASN1PayloadLen + 2,
-     2 + kLargeASN1PayloadLen + 2,
-     2 + kLargeASN1PayloadLen + 1},
-};
-
-class BIOASN1Test : public testing::TestWithParam<ASN1TestParam> {};
-
-TEST_P(BIOASN1Test, ReadASN1) {
-  const ASN1TestParam& param = GetParam();
-  std::vector<uint8_t> input = param.input;
-  input.resize(input.size() + param.suffix_len, 0);
-
-  bssl::UniquePtr<BIO> bio(BIO_new_mem_buf(input.data(), input.size()));
-  ASSERT_TRUE(bio);
+static bool ReadASN1(bool should_succeed, const uint8_t *data, size_t data_len,
+                     size_t expected_len, size_t max_len) {
+  bssl::UniquePtr<BIO> bio(BIO_new_mem_buf(data, data_len));

  uint8_t *out;
  size_t out_len;
-  int ok = BIO_read_asn1(bio.get(), &out, &out_len, param.max_len);
+  int ok = BIO_read_asn1(bio.get(), &out, &out_len, max_len);
  if (!ok) {
    out = nullptr;
  }
  bssl::UniquePtr<uint8_t> out_storage(out);

-  ASSERT_EQ(param.should_succeed, (ok == 1));
-  if (param.should_succeed) {
-    EXPECT_EQ(Bytes(input.data(), param.expected_len), Bytes(out, out_len));
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(, BIOASN1Test, testing::ValuesIn(kASN1TestParams));
-
-// Run through the tests twice, swapping |bio1| and |bio2|, for symmetry.
-class BIOPairTest : public testing::TestWithParam<bool> {};
-
-TEST_P(BIOPairTest, TestPair) {
-  BIO *bio1, *bio2;
-  ASSERT_TRUE(BIO_new_bio_pair(&bio1, 10, &bio2, 10));
-  bssl::UniquePtr<BIO> free_bio1(bio1), free_bio2(bio2);
-
-  if (GetParam()) {
-    std::swap(bio1, bio2);
+  if (should_succeed != (ok == 1)) {
+    return false;
  }

-  // Check initial states.
-  EXPECT_EQ(10u, BIO_ctrl_get_write_guarantee(bio1));
-  EXPECT_EQ(0u, BIO_ctrl_get_read_request(bio1));
+  if (should_succeed &&
+      (out_len != expected_len || memcmp(data, out, expected_len) != 0)) {
+    return false;
+  }

-  // Data written in one end may be read out the other.
-  uint8_t buf[20];
-  EXPECT_EQ(5, BIO_write(bio1, "12345", 5));
-  EXPECT_EQ(5u, BIO_ctrl_get_write_guarantee(bio1));
-  ASSERT_EQ(5, BIO_read(bio2, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("12345"), Bytes(buf, 5));
-  EXPECT_EQ(10u, BIO_ctrl_get_write_guarantee(bio1));
-
-  // Attempting to write more than 10 bytes will write partially.
-  EXPECT_EQ(10, BIO_write(bio1, "1234567890___", 13));
-  EXPECT_EQ(0u, BIO_ctrl_get_write_guarantee(bio1));
-  EXPECT_EQ(-1, BIO_write(bio1, "z", 1));
-  EXPECT_TRUE(BIO_should_write(bio1));
-  ASSERT_EQ(10, BIO_read(bio2, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("1234567890"), Bytes(buf, 10));
-  EXPECT_EQ(10u, BIO_ctrl_get_write_guarantee(bio1));
-
-  // Unsuccessful reads update the read request.
-  EXPECT_EQ(-1, BIO_read(bio2, buf, 5));
-  EXPECT_TRUE(BIO_should_read(bio2));
-  EXPECT_EQ(5u, BIO_ctrl_get_read_request(bio1));
-
-  // The read request is clamped to the size of the buffer.
-  EXPECT_EQ(-1, BIO_read(bio2, buf, 20));
-  EXPECT_TRUE(BIO_should_read(bio2));
-  EXPECT_EQ(10u, BIO_ctrl_get_read_request(bio1));
-
-  // Data may be written and read in chunks.
-  EXPECT_EQ(5, BIO_write(bio1, "12345", 5));
-  EXPECT_EQ(5u, BIO_ctrl_get_write_guarantee(bio1));
-  EXPECT_EQ(5, BIO_write(bio1, "67890___", 8));
-  EXPECT_EQ(0u, BIO_ctrl_get_write_guarantee(bio1));
-  ASSERT_EQ(3, BIO_read(bio2, buf, 3));
-  EXPECT_EQ(Bytes("123"), Bytes(buf, 3));
-  EXPECT_EQ(3u, BIO_ctrl_get_write_guarantee(bio1));
-  ASSERT_EQ(7, BIO_read(bio2, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("4567890"), Bytes(buf, 7));
-  EXPECT_EQ(10u, BIO_ctrl_get_write_guarantee(bio1));
-
-  // Successful reads reset the read request.
-  EXPECT_EQ(0u, BIO_ctrl_get_read_request(bio1));
-
-  // Test writes and reads starting in the middle of the ring buffer and
-  // wrapping to front.
-  EXPECT_EQ(8, BIO_write(bio1, "abcdefgh", 8));
-  EXPECT_EQ(2u, BIO_ctrl_get_write_guarantee(bio1));
-  ASSERT_EQ(3, BIO_read(bio2, buf, 3));
-  EXPECT_EQ(Bytes("abc"), Bytes(buf, 3));
-  EXPECT_EQ(5u, BIO_ctrl_get_write_guarantee(bio1));
-  EXPECT_EQ(5, BIO_write(bio1, "ijklm___", 8));
-  EXPECT_EQ(0u, BIO_ctrl_get_write_guarantee(bio1));
-  ASSERT_EQ(10, BIO_read(bio2, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("defghijklm"), Bytes(buf, 10));
-  EXPECT_EQ(10u, BIO_ctrl_get_write_guarantee(bio1));
-
-  // Data may flow from both ends in parallel.
-  EXPECT_EQ(5, BIO_write(bio1, "12345", 5));
-  EXPECT_EQ(5, BIO_write(bio2, "67890", 5));
-  ASSERT_EQ(5, BIO_read(bio2, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("12345"), Bytes(buf, 5));
-  ASSERT_EQ(5, BIO_read(bio1, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("67890"), Bytes(buf, 5));
-
-  // Closing the write end causes an EOF on the read half, after draining.
-  EXPECT_EQ(5, BIO_write(bio1, "12345", 5));
-  EXPECT_TRUE(BIO_shutdown_wr(bio1));
-  ASSERT_EQ(5, BIO_read(bio2, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("12345"), Bytes(buf, 5));
-  EXPECT_EQ(0, BIO_read(bio2, buf, sizeof(buf)));
-
-  // A closed write end may not be written to.
-  EXPECT_EQ(0u, BIO_ctrl_get_write_guarantee(bio1));
-  EXPECT_EQ(-1, BIO_write(bio1, "_____", 5));
-
-  uint32_t err = ERR_get_error();
-  EXPECT_EQ(ERR_LIB_BIO, ERR_GET_LIB(err));
-  EXPECT_EQ(BIO_R_BROKEN_PIPE, ERR_GET_REASON(err));
-
-  // The other end is still functional.
-  EXPECT_EQ(5, BIO_write(bio2, "12345", 5));
-  ASSERT_EQ(5, BIO_read(bio1, buf, sizeof(buf)));
-  EXPECT_EQ(Bytes("12345"), Bytes(buf, 5));
+  return true;
 }

-INSTANTIATE_TEST_CASE_P(, BIOPairTest, testing::Values(false, true));
+static bool TestASN1() {
+  static const uint8_t kData1[] = {0x30, 2, 1, 2, 0, 0};
+  static const uint8_t kData2[] = {0x30, 3, 1, 2};  /* truncated */
+  static const uint8_t kData3[] = {0x30, 0x81, 1, 1};  /* should be short len */
+  static const uint8_t kData4[] = {0x30, 0x82, 0, 1, 1};  /* zero padded. */
+
+  if (!ReadASN1(true, kData1, sizeof(kData1), 4, 100) ||
+      !ReadASN1(false, kData2, sizeof(kData2), 0, 100) ||
+      !ReadASN1(false, kData3, sizeof(kData3), 0, 100) ||
+      !ReadASN1(false, kData4, sizeof(kData4), 0, 100)) {
+    return false;
+  }
+
+  static const size_t kLargePayloadLen = 8000;
+  static const uint8_t kLargePrefix[] = {0x30, 0x82, kLargePayloadLen >> 8,
+                                         kLargePayloadLen & 0xff};
+  bssl::UniquePtr<uint8_t> large(reinterpret_cast<uint8_t *>(
+      OPENSSL_malloc(sizeof(kLargePrefix) + kLargePayloadLen)));
+  if (!large) {
+    return false;
+  }
+  memset(large.get() + sizeof(kLargePrefix), 0, kLargePayloadLen);
+  memcpy(large.get(), kLargePrefix, sizeof(kLargePrefix));
+
+  if (!ReadASN1(true, large.get(), sizeof(kLargePrefix) + kLargePayloadLen,
+                sizeof(kLargePrefix) + kLargePayloadLen,
+                kLargePayloadLen * 2)) {
+    fprintf(stderr, "Large payload test failed.\n");
+    return false;
+  }
+
+  if (!ReadASN1(false, large.get(), sizeof(kLargePrefix) + kLargePayloadLen,
+                sizeof(kLargePrefix) + kLargePayloadLen,
+                kLargePayloadLen - 1)) {
+    fprintf(stderr, "max_len test failed.\n");
+    return false;
+  }
+
+  static const uint8_t kIndefPrefix[] = {0x30, 0x80};
+  memcpy(large.get(), kIndefPrefix, sizeof(kIndefPrefix));
+  if (!ReadASN1(true, large.get(), sizeof(kLargePrefix) + kLargePayloadLen,
+                sizeof(kLargePrefix) + kLargePayloadLen,
+                kLargePayloadLen*2)) {
+    fprintf(stderr, "indefinite length test failed.\n");
+    return false;
+  }
+
+  if (!ReadASN1(false, large.get(), sizeof(kLargePrefix) + kLargePayloadLen,
+                sizeof(kLargePrefix) + kLargePayloadLen,
+                kLargePayloadLen-1)) {
+    fprintf(stderr, "indefinite length, max_len test failed.\n");
+    return false;
+  }
+
+  return true;
+}
+
+int main(void) {
+  CRYPTO_library_init();
+
+#if defined(OPENSSL_WINDOWS)
+  // Initialize Winsock.
+  WORD wsa_version = MAKEWORD(2, 2);
+  WSADATA wsa_data;
+  int wsa_err = WSAStartup(wsa_version, &wsa_data);
+  if (wsa_err != 0) {
+    fprintf(stderr, "WSAStartup failed: %d\n", wsa_err);
+    return 1;
+  }
+  if (wsa_data.wVersion != wsa_version) {
+    fprintf(stderr, "Didn't get expected version: %x\n", wsa_data.wVersion);
+    return 1;
+  }
+#endif
+
+  if (!TestSocketConnect() ||
+      !TestPrintf() ||
+      !TestZeroCopyBioPairs() ||
+      !TestASN1()) {
+    return 1;
+  }
+
+  printf("PASS\n");
+  return 0;
+}
@@ -0,0 +1,496 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ * 
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ * 
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from 
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ * 
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * 
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <openssl/bio.h>
+
+#include <string.h>
+
+#include <openssl/buf.h>
+#include <openssl/err.h>
+#include <openssl/mem.h>
+
+
+#define DEFAULT_BUFFER_SIZE 4096
+
+typedef struct bio_f_buffer_ctx_struct {
+  /* Buffers are setup like this:
+   *
+   * <---------------------- size ----------------------->
+   * +---------------------------------------------------+
+   * | consumed | remaining          | free space        |
+   * +---------------------------------------------------+
+   * <-- off --><------- len ------->
+   */
+
+  int ibuf_size;  /* how big is the input buffer */
+  int obuf_size;  /* how big is the output buffer */
+
+  char *ibuf;   /* the char array */
+  int ibuf_len; /* how many bytes are in it */
+  int ibuf_off; /* write/read offset */
+
+  char *obuf;   /* the char array */
+  int obuf_len; /* how many bytes are in it */
+  int obuf_off; /* write/read offset */
+} BIO_F_BUFFER_CTX;
+
+static int buffer_new(BIO *bio) {
+  BIO_F_BUFFER_CTX *ctx;
+
+  ctx = OPENSSL_malloc(sizeof(BIO_F_BUFFER_CTX));
+  if (ctx == NULL) {
+    return 0;
+  }
+  memset(ctx, 0, sizeof(BIO_F_BUFFER_CTX));
+
+  ctx->ibuf = OPENSSL_malloc(DEFAULT_BUFFER_SIZE);
+  if (ctx->ibuf == NULL) {
+    goto err1;
+  }
+  ctx->obuf = OPENSSL_malloc(DEFAULT_BUFFER_SIZE);
+  if (ctx->obuf == NULL) {
+    goto err2;
+  }
+  ctx->ibuf_size = DEFAULT_BUFFER_SIZE;
+  ctx->obuf_size = DEFAULT_BUFFER_SIZE;
+
+  bio->init = 1;
+  bio->ptr = (char *)ctx;
+  return 1;
+
+err2:
+  OPENSSL_free(ctx->ibuf);
+
+err1:
+  OPENSSL_free(ctx);
+  return 0;
+}
+
+static int buffer_free(BIO *bio) {
+  BIO_F_BUFFER_CTX *ctx;
+
+  if (bio == NULL || bio->ptr == NULL) {
+    return 0;
+  }
+
+  ctx = (BIO_F_BUFFER_CTX *)bio->ptr;
+  OPENSSL_free(ctx->ibuf);
+  OPENSSL_free(ctx->obuf);
+  OPENSSL_free(bio->ptr);
+
+  bio->ptr = NULL;
+  bio->init = 0;
+  bio->flags = 0;
+
+  return 1;
+}
+
+static int buffer_read(BIO *bio, char *out, int outl) {
+  int i, num = 0;
+  BIO_F_BUFFER_CTX *ctx;
+
+  ctx = (BIO_F_BUFFER_CTX *)bio->ptr;
+
+  if (ctx == NULL || bio->next_bio == NULL) {
+    return 0;
+  }
+
+  num = 0;
+  BIO_clear_retry_flags(bio);
+
+  for (;;) {
+    i = ctx->ibuf_len;
+    /* If there is stuff left over, grab it */
+    if (i != 0) {
+      if (i > outl) {
+        i = outl;
+      }
+      memcpy(out, &ctx->ibuf[ctx->ibuf_off], i);
+      ctx->ibuf_off += i;
+      ctx->ibuf_len -= i;
+      num += i;
+      if (outl == i) {
+        return num;
+      }
+      outl -= i;
+      out += i;
+    }
+
+    /* We may have done a partial read. Try to do more. We have nothing in the
+     * buffer. If we get an error and have read some data, just return it and
+     * let them retry to get the error again. Copy direct to parent address
+     * space */
+    if (outl > ctx->ibuf_size) {
+      for (;;) {
+        i = BIO_read(bio->next_bio, out, outl);
+        if (i <= 0) {
+          BIO_copy_next_retry(bio);
+          if (i < 0) {
+            return (num > 0) ? num : i;
+          }
+          return num;
+        }
+        num += i;
+        if (outl == i) {
+          return num;
+        }
+        out += i;
+        outl -= i;
+      }
+    }
+    /* else */
+
+    /* we are going to be doing some buffering */
+    i = BIO_read(bio->next_bio, ctx->ibuf, ctx->ibuf_size);
+    if (i <= 0) {
+      BIO_copy_next_retry(bio);
+      if (i < 0) {
+        return (num > 0) ? num : i;
+      }
+      return num;
+    }
+    ctx->ibuf_off = 0;
+    ctx->ibuf_len = i;
+  }
+}
+
+static int buffer_write(BIO *b, const char *in, int inl) {
+  int i, num = 0;
+  BIO_F_BUFFER_CTX *ctx;
+
+  ctx = (BIO_F_BUFFER_CTX *)b->ptr;
+  if (ctx == NULL || b->next_bio == NULL) {
+    return 0;
+  }
+
+  BIO_clear_retry_flags(b);
+
+  for (;;) {
+    i = ctx->obuf_size - (ctx->obuf_off + ctx->obuf_len);
+    /* add to buffer and return */
+    if (i >= inl) {
+      memcpy(&ctx->obuf[ctx->obuf_off + ctx->obuf_len], in, inl);
+      ctx->obuf_len += inl;
+      return num + inl;
+    }
+    /* else */
+    /* stuff already in buffer, so add to it first, then flush */
+    if (ctx->obuf_len != 0) {
+      if (i > 0) {
+        memcpy(&ctx->obuf[ctx->obuf_off + ctx->obuf_len], in, i);
+        in += i;
+        inl -= i;
+        num += i;
+        ctx->obuf_len += i;
+      }
+
+      /* we now have a full buffer needing flushing */
+      for (;;) {
+        i = BIO_write(b->next_bio, &ctx->obuf[ctx->obuf_off], ctx->obuf_len);
+        if (i <= 0) {
+          BIO_copy_next_retry(b);
+
+          if (i < 0) {
+            return (num > 0) ? num : i;
+          }
+          return num;
+        }
+        ctx->obuf_off += i;
+        ctx->obuf_len -= i;
+        if (ctx->obuf_len == 0) {
+          break;
+        }
+      }
+    }
+
+    /* we only get here if the buffer has been flushed and we
+     * still have stuff to write */
+    ctx->obuf_off = 0;
+
+    /* we now have inl bytes to write */
+    while (inl >= ctx->obuf_size) {
+      i = BIO_write(b->next_bio, in, inl);
+      if (i <= 0) {
+        BIO_copy_next_retry(b);
+        if (i < 0) {
+          return (num > 0) ? num : i;
+        }
+        return num;
+      }
+      num += i;
+      in += i;
+      inl -= i;
+      if (inl == 0) {
+        return num;
+      }
+    }
+
+    /* copy the rest into the buffer since we have only a small
+     * amount left */
+  }
+}
+
+static long buffer_ctrl(BIO *b, int cmd, long num, void *ptr) {
+  BIO_F_BUFFER_CTX *ctx;
+  long ret = 1;
+  char *p1, *p2;
+  int r, *ip;
+  int ibs, obs;
+
+  ctx = (BIO_F_BUFFER_CTX *)b->ptr;
+
+  switch (cmd) {
+    case BIO_CTRL_RESET:
+      ctx->ibuf_off = 0;
+      ctx->ibuf_len = 0;
+      ctx->obuf_off = 0;
+      ctx->obuf_len = 0;
+      if (b->next_bio == NULL) {
+        return 0;
+      }
+      ret = BIO_ctrl(b->next_bio, cmd, num, ptr);
+      break;
+
+    case BIO_CTRL_INFO:
+      ret = ctx->obuf_len;
+      break;
+
+    case BIO_CTRL_WPENDING:
+      ret = (long)ctx->obuf_len;
+      if (ret == 0) {
+        if (b->next_bio == NULL) {
+          return 0;
+        }
+        ret = BIO_ctrl(b->next_bio, cmd, num, ptr);
+      }
+      break;
+
+    case BIO_CTRL_PENDING:
+      ret = (long)ctx->ibuf_len;
+      if (ret == 0) {
+        if (b->next_bio == NULL) {
+          return 0;
+        }
+        ret = BIO_ctrl(b->next_bio, cmd, num, ptr);
+      }
+      break;
+
+    case BIO_C_SET_BUFF_SIZE:
+      ip = (int *)ptr;
+      if (*ip == 0) {
+        ibs = (int)num;
+        obs = ctx->obuf_size;
+      } else /* if (*ip == 1) */ {
+        ibs = ctx->ibuf_size;
+        obs = (int)num;
+      }
+      p1 = ctx->ibuf;
+      p2 = ctx->obuf;
+      if (ibs > DEFAULT_BUFFER_SIZE && ibs != ctx->ibuf_size) {
+        p1 = OPENSSL_malloc(ibs);
+        if (p1 == NULL) {
+          goto malloc_error;
+        }
+      }
+      if (obs > DEFAULT_BUFFER_SIZE && obs != ctx->obuf_size) {
+        p2 = OPENSSL_malloc(obs);
+        if (p2 == NULL) {
+          if (p1 != ctx->ibuf) {
+            OPENSSL_free(p1);
+          }
+          goto malloc_error;
+        }
+      }
+
+      if (ctx->ibuf != p1) {
+        OPENSSL_free(ctx->ibuf);
+        ctx->ibuf = p1;
+        ctx->ibuf_size = ibs;
+      }
+      ctx->ibuf_off = 0;
+      ctx->ibuf_len = 0;
+
+      if (ctx->obuf != p2) {
+        OPENSSL_free(ctx->obuf);
+        ctx->obuf = p2;
+        ctx->obuf_size = obs;
+      }
+      ctx->obuf_off = 0;
+      ctx->obuf_len = 0;
+      break;
+
+    case BIO_CTRL_FLUSH:
+      if (b->next_bio == NULL) {
+        return 0;
+      }
+
+      while (ctx->obuf_len > 0) {
+        BIO_clear_retry_flags(b);
+        r = BIO_write(b->next_bio, &(ctx->obuf[ctx->obuf_off]),
+                      ctx->obuf_len);
+        BIO_copy_next_retry(b);
+        if (r <= 0) {
+          return r;
+        }
+        ctx->obuf_off += r;
+        ctx->obuf_len -= r;
+      }
+
+      ctx->obuf_len = 0;
+      ctx->obuf_off = 0;
+      ret = BIO_ctrl(b->next_bio, cmd, num, ptr);
+      break;
+
+    default:
+      if (b->next_bio == NULL) {
+        return 0;
+      }
+      BIO_clear_retry_flags(b);
+      ret = BIO_ctrl(b->next_bio, cmd, num, ptr);
+      BIO_copy_next_retry(b);
+      break;
+  }
+  return ret;
+
+malloc_error:
+  OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
+  return 0;
+}
+
+static long buffer_callback_ctrl(BIO *b, int cmd, bio_info_cb fp) {
+  long ret = 1;
+
+  if (b->next_bio == NULL) {
+    return 0;
+  }
+
+  switch (cmd) {
+    default:
+      ret = BIO_callback_ctrl(b->next_bio, cmd, fp);
+      break;
+  }
+  return ret;
+}
+
+static int buffer_gets(BIO *b, char *buf, int size) {
+  BIO_F_BUFFER_CTX *ctx;
+  int num = 0, i, flag;
+  char *p;
+
+  ctx = (BIO_F_BUFFER_CTX *)b->ptr;
+  if (buf == NULL || size <= 0) {
+    return 0;
+  }
+
+  size--; /* reserve space for a '\0' */
+  BIO_clear_retry_flags(b);
+
+  for (;;) {
+    if (ctx->ibuf_len > 0) {
+      p = &ctx->ibuf[ctx->ibuf_off];
+      flag = 0;
+      for (i = 0; (i < ctx->ibuf_len) && (i < size); i++) {
+        *(buf++) = p[i];
+        if (p[i] == '\n') {
+          flag = 1;
+          i++;
+          break;
+        }
+      }
+      num += i;
+      size -= i;
+      ctx->ibuf_len -= i;
+      ctx->ibuf_off += i;
+      if (flag || size == 0) {
+        *buf = '\0';
+        return num;
+      }
+    } else /* read another chunk */
+    {
+      i = BIO_read(b->next_bio, ctx->ibuf, ctx->ibuf_size);
+      if (i <= 0) {
+        BIO_copy_next_retry(b);
+        *buf = '\0';
+        if (i < 0) {
+          return (num > 0) ? num : i;
+        }
+        return num;
+      }
+      ctx->ibuf_len = i;
+      ctx->ibuf_off = 0;
+    }
+  }
+}
+
+static int buffer_puts(BIO *b, const char *str) {
+  return buffer_write(b, str, strlen(str));
+}
+
+static const BIO_METHOD methods_buffer = {
+    BIO_TYPE_BUFFER, "buffer",             buffer_write, buffer_read,
+    buffer_puts,     buffer_gets,          buffer_ctrl,  buffer_new,
+    buffer_free,     buffer_callback_ctrl,
+};
+
+const BIO_METHOD *BIO_f_buffer(void) { return &methods_buffer; }
+
+int BIO_set_read_buffer_size(BIO *bio, int buffer_size) {
+  return BIO_int_ctrl(bio, BIO_C_SET_BUFF_SIZE, buffer_size, 0);
+}
+
+int BIO_set_write_buffer_size(BIO *bio, int buffer_size) {
+  return BIO_int_ctrl(bio, BIO_C_SET_BUFF_SIZE, buffer_size, 1);
+}
@@ -77,7 +77,6 @@ OPENSSL_MSVC_PRAGMA(warning(pop))
 #include <openssl/mem.h>

 #include "internal.h"
-#include "../internal.h"


 enum {
@@ -299,7 +298,7 @@ static BIO_CONNECT *BIO_CONNECT_new(void) {
  if (ret == NULL) {
    return NULL;
  }
-  OPENSSL_memset(ret, 0, sizeof(BIO_CONNECT));
+  memset(ret, 0, sizeof(BIO_CONNECT));

  ret->state = BIO_CONN_S_BEFORE;
  return ret;
@@ -468,6 +467,14 @@ static long conn_ctrl(BIO *bio, int cmd, long num, void *ptr) {
      break;
    case BIO_CTRL_FLUSH:
      break;
+    case BIO_CTRL_SET_CALLBACK: {
+#if 0 /* FIXME: Should this be used?  -- Richard Levitte */
+		OPENSSL_PUT_ERROR(BIO, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+		ret = -1;
+#else
+      ret = 0;
+#endif
+    } break;
    case BIO_CTRL_GET_CALLBACK: {
      int (**fptr)(const BIO *bio, int state, int xret);
      fptr = (int (**)(const BIO *bio, int state, int xret))ptr;
@@ -477,7 +484,7 @@ static long conn_ctrl(BIO *bio, int cmd, long num, void *ptr) {
      ret = 0;
      break;
  }
-  return ret;
+  return (ret);
 }

 static long conn_callback_ctrl(BIO *bio, int cmd, bio_info_cb fp) {
@@ -487,9 +494,9 @@ static long conn_callback_ctrl(BIO *bio, int cmd, bio_info_cb fp) {
  data = (BIO_CONNECT *)bio->ptr;

  switch (cmd) {
-    case BIO_CTRL_SET_CALLBACK:
+    case BIO_CTRL_SET_CALLBACK: {
      data->info_callback = (int (*)(const struct bio_st *, int, int))fp;
-      break;
+    } break;
    default:
      ret = 0;
      break;
@@ -497,6 +504,10 @@ static long conn_callback_ctrl(BIO *bio, int cmd, bio_info_cb fp) {
  return ret;
 }

+static int conn_puts(BIO *bp, const char *str) {
+  return conn_write(bp, str, strlen(str));
+}
+
 BIO *BIO_new_connect(const char *hostname) {
  BIO *ret;

@@ -512,8 +523,8 @@ BIO *BIO_new_connect(const char *hostname) {
 }

 static const BIO_METHOD methods_connectp = {
-    BIO_TYPE_CONNECT, "socket connect",   conn_write, conn_read,
-    NULL /* puts */,  NULL /* gets */,    conn_ctrl,  conn_new,
+    BIO_TYPE_CONNECT, "socket connect",         conn_write, conn_read,
+    conn_puts,        NULL /* connect_gets, */, conn_ctrl,  conn_new,
    conn_free,        conn_callback_ctrl,
 };

@@ -241,6 +241,10 @@ static long fd_ctrl(BIO *b, int cmd, long num, void *ptr) {
  return ret;
 }

+static int fd_puts(BIO *bp, const char *str) {
+  return fd_write(bp, str, strlen(str));
+}
+
 static int fd_gets(BIO *bp, char *buf, int size) {
  char *ptr = buf;
  char *end = buf + size - 1;
@@ -259,9 +263,8 @@ static int fd_gets(BIO *bp, char *buf, int size) {
 }

 static const BIO_METHOD methods_fdp = {
-    BIO_TYPE_FD, "file descriptor", fd_write, fd_read, NULL /* puts */,
-    fd_gets,     fd_ctrl,           fd_new,   fd_free, NULL /* callback_ctrl */,
-};
+    BIO_TYPE_FD, "file descriptor", fd_write, fd_read, fd_puts,
+    fd_gets,     fd_ctrl,           fd_new,   fd_free, NULL, };

 const BIO_METHOD *BIO_s_fd(void) { return &methods_fdp; }

@@ -273,13 +273,13 @@ err:
  return ret;
 }

+static int file_puts(BIO *bp, const char *str) {
+  return file_write(bp, str, strlen(str));
+}
+
 static const BIO_METHOD methods_filep = {
-    BIO_TYPE_FILE,   "FILE pointer",
-    file_write,      file_read,
-    NULL /* puts */, file_gets,
-    file_ctrl,       file_new,
-    file_free,       NULL /* callback_ctrl */,
-};
+    BIO_TYPE_FILE, "FILE pointer", file_write, file_read, file_puts,
+    file_gets,     file_ctrl,      file_new,   file_free, NULL, };

 const BIO_METHOD *BIO_s_file(void) { return &methods_filep; }

@@ -59,8 +59,6 @@
 #include <limits.h>
 #include <string.h>

-#include "../internal.h"
-

 /* hexdump_ctx contains the state of a hexdump. */
 struct hexdump_ctx {
@@ -156,7 +154,7 @@ static int finish(struct hexdump_ctx *ctx) {
    return 1;
  }

-  OPENSSL_memset(buf, ' ', 4);
+  memset(buf, ' ', 4);
  buf[4] = '|';

  for (; ctx->used < 16; ctx->used++) {
@@ -181,7 +179,7 @@ static int finish(struct hexdump_ctx *ctx) {

 int BIO_hexdump(BIO *bio, const uint8_t *data, size_t len, unsigned indent) {
  struct hexdump_ctx ctx;
-  OPENSSL_memset(&ctx, 0, sizeof(ctx));
+  memset(&ctx, 0, sizeof(ctx));
  ctx.bio = bio;
  ctx.indent = indent;

@@ -59,8 +59,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../internal.h"
-

 struct bio_bio_st {
  BIO *peer; /* NULL if buf == NULL.
@@ -74,6 +72,12 @@ struct bio_bio_st {
  size_t offset; /* valid iff buf != NULL; 0 if len == 0 */
  size_t size;
  uint8_t *buf; /* "size" elements (if != NULL) */
+  char buf_externally_allocated; /* true iff buf was externally allocated. */
+
+  char zero_copy_read_lock;  /* true iff a zero copy read operation
+                              * is in progress. */
+  char zero_copy_write_lock; /* true iff a zero copy write operation
+                              * is in progress. */

  size_t request; /* valid iff peer != NULL; 0 if len != 0,
                   * otherwise set by peer to number of bytes
@@ -88,7 +92,7 @@ static int bio_new(BIO *bio) {
  if (b == NULL) {
    return 0;
  }
-  OPENSSL_memset(b, 0, sizeof(struct bio_bio_st));
+  memset(b, 0, sizeof(struct bio_bio_st));

  b->size = 17 * 1024; /* enough for one TLS record (just a default) */
  bio->ptr = b;
@@ -141,12 +145,263 @@ static int bio_free(BIO *bio) {
    bio_destroy_pair(bio);
  }

-  OPENSSL_free(b->buf);
+  if (!b->buf_externally_allocated) {
+    OPENSSL_free(b->buf);
+  }
+
  OPENSSL_free(b);

  return 1;
 }

+static size_t bio_zero_copy_get_read_buf(struct bio_bio_st* peer_b,
+                                         uint8_t** out_read_buf,
+                                         size_t* out_buf_offset) {
+  size_t max_available;
+  if (peer_b->len > peer_b->size - peer_b->offset) {
+    /* Only the first half of the ring buffer can be read. */
+    max_available = peer_b->size - peer_b->offset;
+  } else {
+    max_available = peer_b->len;
+  }
+
+  *out_read_buf = peer_b->buf;
+  *out_buf_offset = peer_b->offset;
+  return max_available;
+}
+
+int BIO_zero_copy_get_read_buf(BIO* bio, uint8_t** out_read_buf,
+                               size_t* out_buf_offset,
+                               size_t* out_available_bytes) {
+  struct bio_bio_st* b;
+  struct bio_bio_st* peer_b;
+  size_t max_available;
+  *out_available_bytes = 0;
+
+  BIO_clear_retry_flags(bio);
+
+  if (!bio->init) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
+    return 0;
+  }
+
+  b = bio->ptr;
+
+  if (!b || !b->peer) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+
+  peer_b = b->peer->ptr;
+  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+
+  if (peer_b->zero_copy_read_lock) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
+    return 0;
+  }
+
+  peer_b->request = 0;  /* Is not used by zero-copy API. */
+
+  max_available =
+      bio_zero_copy_get_read_buf(peer_b, out_read_buf, out_buf_offset);
+
+  assert(peer_b->buf != NULL);
+  if (max_available > 0) {
+    peer_b->zero_copy_read_lock = 1;
+  }
+
+  *out_available_bytes = max_available;
+  return 1;
+}
+
+int BIO_zero_copy_get_read_buf_done(BIO* bio, size_t bytes_read) {
+  struct bio_bio_st* b;
+  struct bio_bio_st* peer_b;
+  size_t max_available;
+  size_t dummy_read_offset;
+  uint8_t* dummy_read_buf;
+
+  assert(BIO_get_retry_flags(bio) == 0);
+
+  if (!bio->init) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
+    return 0;
+  }
+
+  b = bio->ptr;
+
+  if (!b || !b->peer) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+
+  peer_b = b->peer->ptr;
+  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+
+  if (!peer_b->zero_copy_read_lock) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
+    return 0;
+  }
+
+  max_available =
+      bio_zero_copy_get_read_buf(peer_b, &dummy_read_buf, &dummy_read_offset);
+  if (bytes_read > max_available) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
+    return 0;
+  }
+
+  assert(peer_b->len >= bytes_read);
+  peer_b->len -= bytes_read;
+  assert(peer_b->offset + bytes_read <= peer_b->size);
+
+  /* Move read offset. If zero_copy_write_lock == 1 we must advance the
+   * offset even if buffer becomes empty, to make sure
+   * write_offset = (offset + len) mod size does not change. */
+  if (peer_b->offset + bytes_read == peer_b->size ||
+      (!peer_b->zero_copy_write_lock && peer_b->len == 0)) {
+    peer_b->offset = 0;
+  } else {
+    peer_b->offset += bytes_read;
+  }
+
+  bio->num_read += bytes_read;
+  peer_b->zero_copy_read_lock = 0;
+  return 1;
+}
+
+static size_t bio_zero_copy_get_write_buf(struct bio_bio_st* b,
+                                          uint8_t** out_write_buf,
+                                          size_t* out_buf_offset) {
+  size_t write_offset;
+  size_t max_available;
+
+  assert(b->len <= b->size);
+
+  write_offset = b->offset + b->len;
+
+  if (write_offset >= b->size) {
+    /* Only the first half of the ring buffer can be written to. */
+    write_offset -= b->size;
+    /* write up to the start of the ring buffer. */
+    max_available = b->offset - write_offset;
+  } else {
+    /* write up to the end the buffer. */
+    max_available = b->size - write_offset;
+  }
+
+  *out_write_buf = b->buf;
+  *out_buf_offset = write_offset;
+  return max_available;
+}
+
+int BIO_zero_copy_get_write_buf(BIO* bio, uint8_t** out_write_buf,
+                                size_t* out_buf_offset,
+                                size_t* out_available_bytes) {
+  struct bio_bio_st* b;
+  struct bio_bio_st* peer_b;
+  size_t max_available;
+
+  *out_available_bytes = 0;
+  BIO_clear_retry_flags(bio);
+
+  if (!bio->init) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
+    return 0;
+  }
+
+  b = bio->ptr;
+
+  if (!b || !b->buf || !b->peer) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+  peer_b = b->peer->ptr;
+  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+
+  assert(b->buf != NULL);
+
+  if (b->zero_copy_write_lock) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
+    return 0;
+  }
+
+  b->request = 0;
+  if (b->closed) {
+    /* Bio is already closed. */
+    OPENSSL_PUT_ERROR(BIO, BIO_R_BROKEN_PIPE);
+    return 0;
+  }
+
+  max_available = bio_zero_copy_get_write_buf(b, out_write_buf, out_buf_offset);
+
+  if (max_available > 0) {
+    b->zero_copy_write_lock = 1;
+  }
+
+  *out_available_bytes = max_available;
+  return 1;
+}
+
+int BIO_zero_copy_get_write_buf_done(BIO* bio, size_t bytes_written) {
+  struct bio_bio_st* b;
+  struct bio_bio_st* peer_b;
+
+  size_t rest;
+  size_t dummy_write_offset;
+  uint8_t* dummy_write_buf;
+
+  if (!bio->init) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
+    return 0;
+  }
+
+  b = bio->ptr;
+
+  if (!b || !b->buf || !b->peer) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+  peer_b = b->peer->ptr;
+  if (!peer_b || !peer_b->peer || peer_b->peer->ptr != b) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+
+  b->request = 0;
+  if (b->closed) {
+    /* BIO is already closed. */
+    OPENSSL_PUT_ERROR(BIO, BIO_R_BROKEN_PIPE);
+    return 0;
+  }
+
+  if (!b->zero_copy_write_lock) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
+    return 0;
+  }
+
+  rest = bio_zero_copy_get_write_buf(b, &dummy_write_buf, &dummy_write_offset);
+
+  if (bytes_written > rest) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_INVALID_ARGUMENT);
+    return 0;
+  }
+
+  bio->num_write += bytes_written;
+  /* Move write offset. */
+  b->len += bytes_written;
+  b->zero_copy_write_lock = 0;
+  return 1;
+}
+
 static int bio_read(BIO *bio, char *buf, int size_) {
  size_t size = size_;
  size_t rest;
@@ -167,7 +422,7 @@ static int bio_read(BIO *bio, char *buf, int size_) {

  peer_b->request = 0; /* will be set in "retry_read" situation */

-  if (buf == NULL || size == 0) {
+  if (buf == NULL || size == 0 || peer_b->zero_copy_read_lock) {
    return 0;
  }

@@ -209,10 +464,13 @@ static int bio_read(BIO *bio, char *buf, int size_) {
    }
    assert(peer_b->offset + chunk <= peer_b->size);

-    OPENSSL_memcpy(buf, peer_b->buf + peer_b->offset, chunk);
+    memcpy(buf, peer_b->buf + peer_b->offset, chunk);

    peer_b->len -= chunk;
-    if (peer_b->len) {
+    /* If zero_copy_write_lock == 1 we must advance the offset even if buffer
+     * becomes empty, to make sure write_offset = (offset + len) % size
+     * does not change. */
+    if (peer_b->len || peer_b->zero_copy_write_lock) {
      peer_b->offset += chunk;
      assert(peer_b->offset <= peer_b->size);
      if (peer_b->offset == peer_b->size) {
@@ -246,6 +504,10 @@ static int bio_write(BIO *bio, const char *buf, int num_) {
  assert(b->peer != NULL);
  assert(b->buf != NULL);

+  if (b->zero_copy_write_lock) {
+    return 0;
+  }
+
  b->request = 0;
  if (b->closed) {
    /* we already closed */
@@ -289,7 +551,7 @@ static int bio_write(BIO *bio, const char *buf, int num_) {
      chunk = b->size - write_offset;
    }

-    OPENSSL_memcpy(b->buf + write_offset, buf, chunk);
+    memcpy(b->buf + write_offset, buf, chunk);

    b->len += chunk;

@@ -302,8 +564,9 @@ static int bio_write(BIO *bio, const char *buf, int num_) {
  return num;
 }

-static int bio_make_pair(BIO *bio1, BIO *bio2, size_t writebuf1_len,
-                         size_t writebuf2_len) {
+static int bio_make_pair(BIO* bio1, BIO* bio2,
+                         size_t writebuf1_len, uint8_t* ext_writebuf1,
+                         size_t writebuf2_len, uint8_t* ext_writebuf2) {
  struct bio_bio_st *b1, *b2;

  assert(bio1 != NULL);
@@ -317,14 +580,23 @@ static int bio_make_pair(BIO *bio1, BIO *bio2, size_t writebuf1_len,
    return 0;
  }

+  assert(b1->buf_externally_allocated == 0);
+  assert(b2->buf_externally_allocated == 0);
+
  if (b1->buf == NULL) {
    if (writebuf1_len) {
      b1->size = writebuf1_len;
    }
-    b1->buf = OPENSSL_malloc(b1->size);
-    if (b1->buf == NULL) {
-      OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
-      return 0;
+    if (!ext_writebuf1) {
+      b1->buf_externally_allocated = 0;
+      b1->buf = OPENSSL_malloc(b1->size);
+      if (b1->buf == NULL) {
+        OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
+        return 0;
+      }
+    } else {
+      b1->buf = ext_writebuf1;
+      b1->buf_externally_allocated = 1;
    }
    b1->len = 0;
    b1->offset = 0;
@@ -334,10 +606,16 @@ static int bio_make_pair(BIO *bio1, BIO *bio2, size_t writebuf1_len,
    if (writebuf2_len) {
      b2->size = writebuf2_len;
    }
-    b2->buf = OPENSSL_malloc(b2->size);
-    if (b2->buf == NULL) {
-      OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
-      return 0;
+    if (!ext_writebuf2) {
+      b2->buf_externally_allocated = 0;
+      b2->buf = OPENSSL_malloc(b2->size);
+      if (b2->buf == NULL) {
+        OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
+        return 0;
+      }
+    } else {
+      b2->buf = ext_writebuf2;
+      b2->buf_externally_allocated = 1;
    }
    b2->len = 0;
    b2->offset = 0;
@@ -346,9 +624,13 @@ static int bio_make_pair(BIO *bio1, BIO *bio2, size_t writebuf1_len,
  b1->peer = bio2;
  b1->closed = 0;
  b1->request = 0;
+  b1->zero_copy_read_lock = 0;
+  b1->zero_copy_write_lock = 0;
  b2->peer = bio1;
  b2->closed = 0;
  b2->request = 0;
+  b2->zero_copy_read_lock = 0;
+  b2->zero_copy_write_lock = 0;

  bio1->init = 1;
  bio2->init = 1;
@@ -450,30 +732,62 @@ static long bio_ctrl(BIO *bio, int cmd, long num, void *ptr) {
  return ret;
 }

+static int bio_puts(BIO *bio, const char *str) {
+  return bio_write(bio, str, strlen(str));
+}

 static const BIO_METHOD methods_biop = {
-    BIO_TYPE_BIO,    "BIO pair", bio_write, bio_read, NULL /* puts */,
-    NULL /* gets */, bio_ctrl,   bio_new,   bio_free, NULL /* callback_ctrl */
+    BIO_TYPE_BIO, "BIO pair",             bio_write, bio_read,
+    bio_puts,     NULL /* no bio_gets */, bio_ctrl,  bio_new,
+    bio_free,     NULL /* no bio_callback_ctrl */
 };

 static const BIO_METHOD *bio_s_bio(void) { return &methods_biop; }

-int BIO_new_bio_pair(BIO** bio1_p, size_t writebuf1_len,
-                     BIO** bio2_p, size_t writebuf2_len) {
-  BIO *bio1 = BIO_new(bio_s_bio());
-  BIO *bio2 = BIO_new(bio_s_bio());
-  if (bio1 == NULL || bio2 == NULL ||
-      !bio_make_pair(bio1, bio2, writebuf1_len, writebuf2_len)) {
+int BIO_new_bio_pair(BIO** bio1_p, size_t writebuf1,
+                     BIO** bio2_p, size_t writebuf2) {
+  return BIO_new_bio_pair_external_buf(bio1_p, writebuf1, NULL, bio2_p,
+                                       writebuf2, NULL);
+}
+
+int BIO_new_bio_pair_external_buf(BIO** bio1_p, size_t writebuf1_len,
+                                  uint8_t* ext_writebuf1,
+                                  BIO** bio2_p, size_t writebuf2_len,
+                                  uint8_t* ext_writebuf2) {
+  BIO *bio1 = NULL, *bio2 = NULL;
+  int ret = 0;
+
+  /* External buffers must have sizes greater than 0. */
+  if ((ext_writebuf1 && !writebuf1_len) || (ext_writebuf2 && !writebuf2_len)) {
+    goto err;
+  }
+
+  bio1 = BIO_new(bio_s_bio());
+  if (bio1 == NULL) {
+    goto err;
+  }
+  bio2 = BIO_new(bio_s_bio());
+  if (bio2 == NULL) {
+    goto err;
+  }
+
+  if (!bio_make_pair(bio1, bio2, writebuf1_len, ext_writebuf1, writebuf2_len,
+                     ext_writebuf2)) {
+    goto err;
+  }
+  ret = 1;
+
+err:
+  if (ret == 0) {
    BIO_free(bio1);
+    bio1 = NULL;
    BIO_free(bio2);
-    *bio1_p = NULL;
-    *bio2_p = NULL;
-    return 0;
+    bio2 = NULL;
  }

  *bio1_p = bio1;
  *bio2_p = bio2;
-  return 1;
+  return ret;
 }

 size_t BIO_ctrl_get_read_request(BIO *bio) {
@@ -142,6 +142,10 @@ static int sock_write(BIO *b, const char *in, int inl) {
  return ret;
 }

+static int sock_puts(BIO *bp, const char *str) {
+  return sock_write(bp, str, strlen(str));
+}
+
 static long sock_ctrl(BIO *b, int cmd, long num, void *ptr) {
  long ret = 1;
  int *ip;
@@ -181,11 +185,8 @@ static long sock_ctrl(BIO *b, int cmd, long num, void *ptr) {
 }

 static const BIO_METHOD methods_sockp = {
-    BIO_TYPE_SOCKET, "socket",
-    sock_write,      sock_read,
-    NULL /* puts */, NULL /* gets, */,
-    sock_ctrl,       sock_new,
-    sock_free,       NULL /* callback_ctrl */,
+    BIO_TYPE_SOCKET,  "socket",  sock_write, sock_read, sock_puts,
+    NULL /* gets, */, sock_ctrl, sock_new,   sock_free, NULL,
 };

 const BIO_METHOD *BIO_s_socket(void) { return &methods_sockp; }
@@ -33,7 +33,6 @@ OPENSSL_MSVC_PRAGMA(warning(pop))
 #endif

 #include "internal.h"
-#include "../internal.h"


 int bio_ip_and_port_to_socket_and_addr(int *out_sock,
@@ -46,7 +45,7 @@ int bio_ip_and_port_to_socket_and_addr(int *out_sock,

  *out_sock = -1;

-  OPENSSL_memset(&hint, 0, sizeof(hint));
+  memset(&hint, 0, sizeof(hint));
  hint.ai_family = AF_UNSPEC;
  hint.ai_socktype = SOCK_STREAM;

@@ -63,8 +62,8 @@ int bio_ip_and_port_to_socket_and_addr(int *out_sock,
    if ((size_t) cur->ai_addrlen > sizeof(struct sockaddr_storage)) {
      continue;
    }
-    OPENSSL_memset(out_addr, 0, sizeof(struct sockaddr_storage));
-    OPENSSL_memcpy(out_addr, cur->ai_addr, cur->ai_addrlen);
+    memset(out_addr, 0, sizeof(struct sockaddr_storage));
+    memcpy(out_addr, cur->ai_addr, cur->ai_addrlen);
    *out_addr_length = cur->ai_addrlen;

    *out_sock = socket(cur->ai_family, cur->ai_socktype, cur->ai_protocol);
@@ -0,0 +1,89 @@
+include_directories(../../include)
+
+if (${ARCH} STREQUAL "x86_64")
+  set(
+    BN_ARCH_SOURCES
+
+    x86_64-mont.${ASM_EXT}
+    x86_64-mont5.${ASM_EXT}
+    rsaz-x86_64.${ASM_EXT}
+    rsaz-avx2.${ASM_EXT}
+
+    rsaz_exp.c
+  )
+endif()
+
+if (${ARCH} STREQUAL "x86")
+  set(
+    BN_ARCH_SOURCES
+
+    bn-586.${ASM_EXT}
+    co-586.${ASM_EXT}
+    x86-mont.${ASM_EXT}
+  )
+endif()
+
+if (${ARCH} STREQUAL "arm")
+  set(
+    BN_ARCH_SOURCES
+
+    armv4-mont.${ASM_EXT}
+  )
+endif()
+
+if (${ARCH} STREQUAL "aarch64")
+  set(
+    BN_ARCH_SOURCES
+
+    armv8-mont.${ASM_EXT}
+  )
+endif()
+
+add_library(
+  bn
+
+  OBJECT
+
+  add.c
+  asm/x86_64-gcc.c
+  bn.c
+  bn_asn1.c
+  cmp.c
+  convert.c
+  ctx.c
+  div.c
+  exponentiation.c
+  generic.c
+  gcd.c
+  kronecker.c
+  montgomery.c
+  montgomery_inv.c
+  mul.c
+  prime.c
+  random.c
+  shift.c
+  sqrt.c
+
+  ${BN_ARCH_SOURCES}
+)
+
+perlasm(x86_64-mont.${ASM_EXT} asm/x86_64-mont.pl)
+perlasm(x86_64-mont5.${ASM_EXT} asm/x86_64-mont5.pl)
+perlasm(rsaz-x86_64.${ASM_EXT} asm/rsaz-x86_64.pl)
+perlasm(rsaz-avx2.${ASM_EXT} asm/rsaz-avx2.pl)
+perlasm(bn-586.${ASM_EXT} asm/bn-586.pl)
+perlasm(co-586.${ASM_EXT} asm/co-586.pl)
+perlasm(x86-mont.${ASM_EXT} asm/x86-mont.pl)
+perlasm(armv4-mont.${ASM_EXT} asm/armv4-mont.pl)
+perlasm(armv8-mont.${ASM_EXT} asm/armv8-mont.pl)
+
+add_executable(
+  bn_test
+
+  bn_test.cc
+
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(bn_test crypto)
+add_dependencies(all_tests bn_test)
@@ -115,7 +115,7 @@ int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
  min = b->top;
  dif = max - min;

-  if (!bn_wexpand(r, max + 1)) {
+  if (bn_wexpand(r, max + 1) == NULL) {
    return 0;
  }

@@ -191,7 +191,7 @@ int BN_add_word(BIGNUM *a, BN_ULONG w) {
  }

  if (w && i == a->top) {
-    if (!bn_wexpand(a, a->top + 1)) {
+    if (bn_wexpand(a, a->top + 1) == NULL) {
      return 0;
    }
    a->top++;
@@ -239,7 +239,7 @@ int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
  /* We are actually doing a - b :-) */

  max = (a->top > b->top) ? a->top : b->top;
-  if (!bn_wexpand(r, max)) {
+  if (bn_wexpand(r, max) == NULL) {
    return 0;
  }

@@ -273,7 +273,7 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
    return 0;
  }

-  if (!bn_wexpand(r, max)) {
+  if (bn_wexpand(r, max) == NULL) {
    return 0;
  }

@@ -314,7 +314,7 @@ int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
  }

  if (dif > 0 && rp != ap) {
-    OPENSSL_memcpy(rp, ap, sizeof(*rp) * dif);
+    memcpy(rp, ap, sizeof(*rp) * dif);
  }

  r->top = max;
@@ -16,7 +16,7 @@
 # [depending on key length, less for longer keys] on ARM920T, and
 # +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
 # base and compiler generated code with in-lined umull and even umlal
-# instructions. The latter means that this code didn't really have an
+# instructions. The latter means that this code didn't really have an 
 # "advantage" of utilizing some "secret" instruction.
 #
 # The code is interoperable with Thumb ISA and is rather compact, less
@@ -45,7 +45,7 @@ else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
 if ($flavour && $flavour ne "void") {
    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
    die "can't locate arm-xlate.pl";

    open STDOUT,"| \"$^X\" $xlate $flavour $output";
@@ -38,7 +38,7 @@ $output  = shift;

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
 die "can't locate arm-xlate.pl";

 open OUT,"| \"$^X\" $xlate $flavour $output";
@@ -1,13 +1,13 @@
 #!/usr/bin/env perl

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../../perlasm");
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";

 $output = pop;
 open STDOUT,">$output";

-&asm_init($ARGV[0]);
+&asm_init($ARGV[0],$0);

 $sse2=0;
 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -47,7 +47,7 @@ sub bn_mul_add_words
 		&movd("mm0",&wparam(3));	# mm0 = w
 		&pxor("mm1","mm1");		# mm1 = carry_in
 		&jmp(&label("maw_sse2_entry"));
-
+		
 	&set_label("maw_sse2_unrolled",16);
 		&movd("mm3",&DWP(0,$r,"",0));	# mm3 = r[0]
 		&paddq("mm1","mm3");		# mm1 = carry_in + r[0]
@@ -668,20 +668,20 @@ sub bn_sub_part_words
 	    &adc($c,0);
 	    &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
 	}
-
+	    
 	&comment("");
 	&add($b,32);
 	&add($r,32);
 	&sub($num,8);
 	&jnz(&label("pw_neg_loop"));
-
+	    
 	&set_label("pw_neg_finish",0);
 	&mov($tmp2,&wparam(4));	# get dl
 	&mov($num,0);
 	&sub($num,$tmp2);
 	&and($num,7);
 	&jz(&label("pw_end"));
-
+	    
 	for ($i=0; $i<7; $i++)
 	{
 	    &comment("dl<0 Tail Round $i");
@@ -698,9 +698,9 @@ sub bn_sub_part_words
 	}

 	&jmp(&label("pw_end"));
-
+	
 	&set_label("pw_pos",0);
-
+	
 	&and($num,0xfffffff8);	# num / 8
 	&jz(&label("pw_pos_finish"));

@@ -715,18 +715,18 @@ sub bn_sub_part_words
 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
 	    &jnc(&label("pw_nc".$i));
 	}
-
+	    
 	&comment("");
 	&add($a,32);
 	&add($r,32);
 	&sub($num,8);
 	&jnz(&label("pw_pos_loop"));
-
+	    
 	&set_label("pw_pos_finish",0);
 	&mov($num,&wparam(4));	# get dl
 	&and($num,7);
 	&jz(&label("pw_end"));
-
+	    
 	for ($i=0; $i<7; $i++)
 	{
 	    &comment("dl>0 Tail Round $i");
@@ -747,17 +747,17 @@ sub bn_sub_part_words
 	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
 	    &set_label("pw_nc".$i,0);
 	}
-
+	    
 	&comment("");
 	&add($a,32);
 	&add($r,32);
 	&sub($num,8);
 	&jnz(&label("pw_nc_loop"));
-
+	    
 	&mov($num,&wparam(4));	# get dl
 	&and($num,7);
 	&jz(&label("pw_nc_end"));
-
+	    
 	for ($i=0; $i<7; $i++)
 	{
 	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
@@ -1,13 +1,13 @@
 #!/usr/local/bin/perl

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../../perlasm");
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";

 $output = pop;
 open STDOUT,">$output";

-&asm_init($ARGV[0]);
+&asm_init($ARGV[0],$0);

 &bn_mul_comba("bn_mul_comba8",8);
 &bn_mul_comba("bn_mul_comba4",4);
@@ -41,7 +41,7 @@ sub mul_add_c
 	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;	# laod next b
 	 ###
 	&adc($c2,0);
-	 # is pos > 1, it means it is the last loop
+	 # is pos > 1, it means it is the last loop 
 	 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;		# save r[];
 	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# laod next a
 	}
@@ -70,7 +70,7 @@ sub sqr_add_c
 	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
 	 ###
 	&adc($c2,0);
-	 # is pos > 1, it means it is the last loop
+	 # is pos > 1, it means it is the last loop 
 	 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
 	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# load next b
 	}
@@ -121,7 +121,7 @@ sub bn_mul_comba
 	$c2="ebp";
 	$a="esi";
 	$b="edi";
-
+	
 	$as=0;
 	$ae=0;
 	$bs=0;
@@ -136,9 +136,9 @@ sub bn_mul_comba
 	 &push("ebx");

 	&xor($c0,$c0);
-	 &mov("eax",&DWP(0,$a,"",0));	# load the first word
+	 &mov("eax",&DWP(0,$a,"",0));	# load the first word 
 	&xor($c1,$c1);
-	 &mov("edx",&DWP(0,$b,"",0));	# load the first second
+	 &mov("edx",&DWP(0,$b,"",0));	# load the first second 

 	for ($i=0; $i<$tot; $i++)
 		{
@@ -146,7 +146,7 @@ sub bn_mul_comba
 		$bi=$bs;
 		$end=$be+1;

-		&comment("################## Calculate word $i");
+		&comment("################## Calculate word $i"); 

 		for ($j=$bs; $j<$end; $j++)
 			{
@@ -76,7 +76,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";

 # In upstream, this is controlled by shelling out to the compiler to check
@@ -84,8 +84,8 @@ die "can't locate x86_64-xlate.pl";
 # output, so this isn't useful anyway.
 #
 # TODO(davidben): Enable these after testing. $avx goes up to 2 and $addx to 1.
-$avx = 2;
-$addx = 1;
+$avx = 0;
+$addx = 0;

 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
 *STDOUT = *OUT;
@@ -145,21 +145,13 @@ $code.=<<___;
 .type	rsaz_1024_sqr_avx2,\@function,5
 .align	64
 rsaz_1024_sqr_avx2:		# 702 cycles, 14% faster than rsaz_1024_mul_avx2
-.cfi_startproc
 	lea	(%rsp), %rax
-.cfi_def_cfa_register	%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
 	vzeroupper
 ___
 $code.=<<___ if ($win64);
@@ -178,7 +170,6 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	%rax,%rbp
-.cfi_def_cfa_register	%rbp
 	mov	%rdx, $np			# reassigned argument
 	sub	\$$FrameSize, %rsp
 	mov	$np, $tmp
@@ -368,7 +359,7 @@ $code.=<<___;
 	vpaddq		$TEMP1, $ACC1, $ACC1
 	vpmuludq	32*7-128($aap), $B2, $ACC2
 	 vpbroadcastq	32*5-128($tpa), $B2
-	vpaddq		32*11-448($tp1), $ACC2, $ACC2
+	vpaddq		32*11-448($tp1), $ACC2, $ACC2	

 	vmovdqu		$ACC6, 32*6-192($tp0)
 	vmovdqu		$ACC7, 32*7-192($tp0)
@@ -427,7 +418,7 @@ $code.=<<___;
 	vmovdqu		$ACC7, 32*16-448($tp1)
 	lea		8($tp1), $tp1

-	dec	$i
+	dec	$i        
 	jnz	.LOOP_SQR_1024
 ___
 $ZERO = $ACC9;
@@ -772,7 +763,7 @@ $code.=<<___;
 	vpblendd	\$3, $TEMP4, $TEMP5, $TEMP4
 	vpaddq		$TEMP3, $ACC7, $ACC7
 	vpaddq		$TEMP4, $ACC8, $ACC8
-
+     
 	vpsrlq		\$29, $ACC4, $TEMP1
 	vpand		$AND_MASK, $ACC4, $ACC4
 	vpsrlq		\$29, $ACC5, $TEMP2
@@ -811,10 +802,8 @@ $code.=<<___;

 	vzeroall
 	mov	%rbp, %rax
-.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($win64);
-.Lsqr_1024_in_tail:
 	movaps	-0xd8(%rax),%xmm6
 	movaps	-0xc8(%rax),%xmm7
 	movaps	-0xb8(%rax),%xmm8
@@ -828,22 +817,14 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
-.cfi_restore	%r15
 	mov	-40(%rax),%r14
-.cfi_restore	%r14
 	mov	-32(%rax),%r13
-.cfi_restore	%r13
 	mov	-24(%rax),%r12
-.cfi_restore	%r12
 	mov	-16(%rax),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
-.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
-.cfi_def_cfa_register	%rsp
 .Lsqr_1024_epilogue:
 	ret
-.cfi_endproc
 .size	rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
 ___
 }
@@ -896,21 +877,13 @@ $code.=<<___;
 .type	rsaz_1024_mul_avx2,\@function,5
 .align	64
 rsaz_1024_mul_avx2:
-.cfi_startproc
 	lea	(%rsp), %rax
-.cfi_def_cfa_register	%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
 ___
 $code.=<<___ if ($win64);
 	vzeroupper
@@ -929,7 +902,6 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	%rax,%rbp
-.cfi_def_cfa_register	%rbp
 	vzeroall
 	mov	%rdx, $bp	# reassigned argument
 	sub	\$64,%rsp
@@ -1456,17 +1428,15 @@ $code.=<<___;
 	vpaddq		$TEMP4, $ACC8, $ACC8

 	vmovdqu		$ACC4, 128-128($rp)
-	vmovdqu		$ACC5, 160-128($rp)
+	vmovdqu		$ACC5, 160-128($rp)    
 	vmovdqu		$ACC6, 192-128($rp)
 	vmovdqu		$ACC7, 224-128($rp)
 	vmovdqu		$ACC8, 256-128($rp)
 	vzeroupper

 	mov	%rbp, %rax
-.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($win64);
-.Lmul_1024_in_tail:
 	movaps	-0xd8(%rax),%xmm6
 	movaps	-0xc8(%rax),%xmm7
 	movaps	-0xb8(%rax),%xmm8
@@ -1480,22 +1450,14 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
 	mov	-48(%rax),%r15
-.cfi_restore	%r15
 	mov	-40(%rax),%r14
-.cfi_restore	%r14
 	mov	-32(%rax),%r13
-.cfi_restore	%r13
 	mov	-24(%rax),%r12
-.cfi_restore	%r12
 	mov	-16(%rax),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rax),%rbx
-.cfi_restore	%rbx
 	lea	(%rax),%rsp		# restore %rsp
-.cfi_def_cfa_register	%rsp
 .Lmul_1024_epilogue:
 	ret
-.cfi_endproc
 .size	rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
 ___
 }
@@ -1614,10 +1576,8 @@ rsaz_1024_scatter5_avx2:
 .type	rsaz_1024_gather5_avx2,\@abi-omnipotent
 .align	32
 rsaz_1024_gather5_avx2:
-.cfi_startproc
 	vzeroupper
 	mov	%rsp,%r11
-.cfi_def_cfa_register	%r11
 ___
 $code.=<<___ if ($win64);
 	lea	-0x88(%rsp),%rax
@@ -1755,13 +1715,11 @@ $code.=<<___ if ($win64);
 	movaps	-0x38(%r11),%xmm13
 	movaps	-0x28(%r11),%xmm14
 	movaps	-0x18(%r11),%xmm15
+.LSEH_end_rsaz_1024_gather5:
 ___
 $code.=<<___;
 	lea	(%r11),%rsp
-.cfi_def_cfa_register	%rsp
 	ret
-.cfi_endproc
-.LSEH_end_rsaz_1024_gather5:
 .size	rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
 ___
 }
@@ -1772,8 +1730,7 @@ $code.=<<___;
 .type	rsaz_avx2_eligible,\@abi-omnipotent
 .align	32
 rsaz_avx2_eligible:
-	leaq	OPENSSL_ia32cap_P(%rip),%rax
-	mov	8(%rax),%eax
+	mov	OPENSSL_ia32cap_P+8(%rip),%eax
 ___
 $code.=<<___	if ($addx);
 	mov	\$`1<<8|1<<19`,%ecx
@@ -1835,17 +1792,14 @@ rsaz_se_handler:
 	cmp	%r10,%rbx		# context->Rip<prologue label
 	jb	.Lcommon_seh_tail

+	mov	152($context),%rax	# pull context->Rsp
+
 	mov	4(%r11),%r10d		# HandlerData[1]
 	lea	(%rsi,%r10),%r10	# epilogue label
 	cmp	%r10,%rbx		# context->Rip>=epilogue label
 	jae	.Lcommon_seh_tail

-	mov	160($context),%rbp	# pull context->Rbp
-
-	mov	8(%r11),%r10d		# HandlerData[2]
-	lea	(%rsi,%r10),%r10	# "in tail" label
-	cmp	%r10,%rbx		# context->Rip>="in tail" label
-	cmovc	%rbp,%rax
+	mov	160($context),%rax	# pull context->Rbp

 	mov	-48(%rax),%r15
 	mov	-40(%rax),%r14
@@ -1923,13 +1877,11 @@ rsaz_se_handler:
 .LSEH_info_rsaz_1024_sqr_avx2:
 	.byte	9,0,0,0
 	.rva	rsaz_se_handler
-	.rva	.Lsqr_1024_body,.Lsqr_1024_epilogue,.Lsqr_1024_in_tail
-	.long	0
+	.rva	.Lsqr_1024_body,.Lsqr_1024_epilogue
 .LSEH_info_rsaz_1024_mul_avx2:
 	.byte	9,0,0,0
 	.rva	rsaz_se_handler
-	.rva	.Lmul_1024_body,.Lmul_1024_epilogue,.Lmul_1024_in_tail
-	.long	0
+	.rva	.Lmul_1024_body,.Lmul_1024_epilogue
 .LSEH_info_rsaz_1024_gather5:
 	.byte	0x01,0x36,0x17,0x0b
 	.byte	0x36,0xf8,0x09,0x00	# vmovaps 0x90(rsp),xmm15
@@ -27,13 +27,13 @@
 # gives ~40% on rsa512 sign benchmark...

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-push(@INC,"${dir}","${dir}../../../perlasm");
+push(@INC,"${dir}","${dir}../../perlasm");
 require "x86asm.pl";

 $output = pop;
 open STDOUT,">$output";
-
-&asm_init($ARGV[0]);
+ 
+&asm_init($ARGV[0],$0);

 $sse2=0;
 for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
@@ -66,57 +66,33 @@ $frame=32;				# size of above frame rounded up to 16n

 	&lea	("esi",&wparam(0));	# put aside pointer to argument block
 	&lea	("edx",&wparam(1));	# load ap
+	&mov	("ebp","esp");		# saved stack pointer!
 	&add	("edi",2);		# extra two words on top of tp
 	&neg	("edi");
-	&lea	("ebp",&DWP(-$frame,"esp","edi",4));	# future alloca($frame+4*(num+2))
+	&lea	("esp",&DWP(-$frame,"esp","edi",4));	# alloca($frame+4*(num+2))
 	&neg	("edi");

 	# minimize cache contention by arraning 2K window between stack
 	# pointer and ap argument [np is also position sensitive vector,
 	# but it's assumed to be near ap, as it's allocated at ~same
 	# time].
-	&mov	("eax","ebp");
+	&mov	("eax","esp");
 	&sub	("eax","edx");
 	&and	("eax",2047);
-	&sub	("ebp","eax");		# this aligns sp and ap modulo 2048
+	&sub	("esp","eax");		# this aligns sp and ap modulo 2048

-	&xor	("edx","ebp");
+	&xor	("edx","esp");
 	&and	("edx",2048);
 	&xor	("edx",2048);
-	&sub	("ebp","edx");		# this splits them apart modulo 4096
+	&sub	("esp","edx");		# this splits them apart modulo 4096

-	&and	("ebp",-64);		# align to cache line
-
-	# An OS-agnostic version of __chkstk.
-	#
-	# Some OSes (Windows) insist on stack being "wired" to
-	# physical memory in strictly sequential manner, i.e. if stack
-	# allocation spans two pages, then reference to farmost one can
-	# be punishable by SEGV. But page walking can do good even on
-	# other OSes, because it guarantees that villain thread hits
-	# the guard page before it can make damage to innocent one...
-	&mov	("eax","esp");
-	&sub	("eax","ebp");
-	&and	("eax",-4096);
-	&mov	("edx","esp");		# saved stack pointer!
-	&lea	("esp",&DWP(0,"ebp","eax"));
-	&mov	("eax",&DWP(0,"esp"));
-	&cmp	("esp","ebp");
-	&ja	(&label("page_walk"));
-	&jmp	(&label("page_walk_done"));
-
-&set_label("page_walk",16);
-	&lea	("esp",&DWP(-4096,"esp"));
-	&mov	("eax",&DWP(0,"esp"));
-	&cmp	("esp","ebp");
-	&ja	(&label("page_walk"));
-&set_label("page_walk_done");
+	&and	("esp",-64);		# align to cache line

 	################################# load argument block...
 	&mov	("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
 	&mov	("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
 	&mov	("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
-	&mov	("ebp",&DWP(3*4,"esi"));# const BN_ULONG *np
+	&mov	("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
 	&mov	("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
 	#&mov	("edi",&DWP(5*4,"esi"));# int num

@@ -124,11 +100,11 @@ $frame=32;				# size of above frame rounded up to 16n
 	&mov	($_rp,"eax");		# ... save a copy of argument block
 	&mov	($_ap,"ebx");
 	&mov	($_bp,"ecx");
-	&mov	($_np,"ebp");
+	&mov	($_np,"edx");
 	&mov	($_n0,"esi");
 	&lea	($num,&DWP(-3,"edi"));	# num=num-1 to assist modulo-scheduling
 	#&mov	($_num,$num);		# redundant as $num is not reused
-	&mov	($_sp,"edx");		# saved stack pointer!
+	&mov	($_sp,"ebp");		# saved stack pointer!

 if($sse2) {
 $acc0="mm0";	# mmx register bank layout
@@ -294,7 +270,7 @@ if (0) {
 	&xor	("eax","eax");	# signal "not fast enough [yet]"
 	&jmp	(&label("just_leave"));
 	# While the below code provides competitive performance for
-	# all key lengths on modern Intel cores, it's still more
+	# all key lengthes on modern Intel cores, it's still more
 	# than 10% slower for 4096-bit key elsewhere:-( "Competitive"
 	# means compared to the original integer-only assembler.
 	# 512-bit RSA sign is better by ~40%, but that's about all
@@ -597,16 +573,15 @@ $sbit=$num;
 	&jge	(&label("sub"));

 	&sbb	("eax",0);			# handle upmost overflow bit
-	&and	($tp,"eax");
-	&not	("eax");
-	&mov	($np,$rp);
-	&and	($np,"eax");
-	&or	($tp,$np);			# tp=carry?tp:rp

 &set_label("copy",16);				# copy or in-place refresh
-	&mov	("eax",&DWP(0,$tp,$num,4));
-	&mov	(&DWP(0,$rp,$num,4),"eax");	# rp[i]=tp[i]
-	&mov	(&DWP($frame,"esp",$num,4),$j);	# zap temporary vector
+	&mov	("edx",&DWP(0,$tp,$num,4));
+	&mov	($np,&DWP(0,$rp,$num,4));
+	&xor	("edx",$np);			# conditional select
+	&and	("edx","eax");
+	&xor	("edx",$np);
+	&mov	(&DWP(0,$tp,$num,4),$j)		# zap temporary vector
+	&mov	(&DWP(0,$rp,$num,4),"edx");	# rp[i]=tp[i]
 	&dec	($num);
 	&jge	(&label("copy"));

@@ -200,8 +200,7 @@ BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
      "	subq	%0,%0		\n" /* clear carry */
      "	jmp	1f		\n"
      ".p2align 4			\n"
-      "1:"
-      "	movq	(%4,%2,8),%0	\n"
+      "1:	movq	(%4,%2,8),%0	\n"
      "	adcq	(%5,%2,8),%0	\n"
      "	movq	%0,(%3,%2,8)	\n"
      "	lea	1(%2),%2	\n"
@@ -227,8 +226,7 @@ BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
      "	subq	%0,%0		\n" /* clear borrow */
      "	jmp	1f		\n"
      ".p2align 4			\n"
-      "1:"
-      "	movq	(%4,%2,8),%0	\n"
+      "1:	movq	(%4,%2,8),%0	\n"
      "	sbbq	(%5,%2,8),%0	\n"
      "	movq	%0,(%3,%2,8)	\n"
      "	lea	1(%2),%2	\n"
@@ -531,12 +529,4 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
  r[7] = c2;
 }

-#undef mul_add
-#undef mul
-#undef sqr
-#undef mul_add_c
-#undef sqr_add_c
-#undef mul_add_c2
-#undef sqr_add_c2
-
 #endif  /* !NO_ASM && X86_64 && __GNUC__ */
@@ -47,7 +47,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";

 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
@@ -84,18 +84,13 @@ $code=<<___;
 .type	bn_mul_mont,\@function,6
 .align	16
 bn_mul_mont:
-.cfi_startproc
-	mov	${num}d,${num}d
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 	test	\$3,${num}d
 	jnz	.Lmul_enter
 	cmp	\$8,${num}d
 	jb	.Lmul_enter
 ___
 $code.=<<___ if ($addx);
-	leaq	OPENSSL_ia32cap_P(%rip),%r11
-	mov	8(%r11),%r11d
+	mov	OPENSSL_ia32cap_P+8(%rip),%r11d
 ___
 $code.=<<___;
 	cmp	$ap,$bp
@@ -107,50 +102,20 @@ $code.=<<___;
 .align	16
 .Lmul_enter:
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15

-	neg	$num
+	mov	${num}d,${num}d
+	lea	2($num),%r10
 	mov	%rsp,%r11
-	lea	-16(%rsp,$num,8),%r10	# future alloca(8*(num+2))
-	neg	$num			# restore $num
-	and	\$-1024,%r10		# minimize TLB usage
+	neg	%r10
+	lea	(%rsp,%r10,8),%rsp	# tp=alloca(8*(num+2))
+	and	\$-1024,%rsp		# minimize TLB usage

-	# An OS-agnostic version of __chkstk.
-	#
-	# Some OSes (Windows) insist on stack being "wired" to
-	# physical memory in strictly sequential manner, i.e. if stack
-	# allocation spans two pages, then reference to farmost one can
-	# be punishable by SEGV. But page walking can do good even on
-	# other OSes, because it guarantees that villain thread hits
-	# the guard page before it can make damage to innocent one...
-	sub	%r10,%r11
-	and	\$-4096,%r11
-	lea	(%r10,%r11),%rsp
-	mov	(%rsp),%r11
-	cmp	%r10,%rsp
-	ja	.Lmul_page_walk
-	jmp	.Lmul_page_walk_done
-
-.align	16
-.Lmul_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r11
-	cmp	%r10,%rsp
-	ja	.Lmul_page_walk
-.Lmul_page_walk_done:
-
-	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
-.cfi_cfa_expression	%rsp+8,$num,8,mul,plus,deref,+8
+	mov	%r11,8(%rsp,$num,8)	# tp[num+1]=%rsp
 .Lmul_body:
 	mov	$bp,%r12		# reassign $bp
 ___
@@ -296,51 +261,40 @@ $code.=<<___;
 	mov	$num,$j			# j=num
 	jmp	.Lsub
 .align	16
-.Lsub:
-	sbb	($np,$i,8),%rax
+.Lsub:	sbb	($np,$i,8),%rax
 	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]-np[i]
 	mov	8($ap,$i,8),%rax	# tp[i+1]
 	lea	1($i),$i		# i++
-	dec	$j			# doesnn't affect CF!
+	dec	$j			# doesn't affect CF!
 	jnz	.Lsub

 	sbb	\$0,%rax		# handle upmost overflow bit
 	xor	$i,$i
-	and	%rax,$ap
-	not	%rax
-	mov	$rp,$np
-	and	%rax,$np
 	mov	$num,$j			# j=num
-	or	$np,$ap			# ap=borrow?tp:rp
 .align	16
 .Lcopy:					# copy or in-place refresh
-	mov	($ap,$i,8),%rax
+	mov	(%rsp,$i,8),$ap
+	mov	($rp,$i,8),$np
+	xor	$np,$ap			# conditional select:
+	and	%rax,$ap		# ((ap ^ np) & %rax) ^ np
+	xor	$np,$ap			# ap = borrow?tp:rp
 	mov	$i,(%rsp,$i,8)		# zap temporary vector
-	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]
+	mov	$ap,($rp,$i,8)		# rp[i]=tp[i]
 	lea	1($i),$i
 	sub	\$1,$j
 	jnz	.Lcopy

 	mov	8(%rsp,$num,8),%rsi	# restore %rsp
-.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
-	mov	-48(%rsi),%r15
-.cfi_restore	%r15
-	mov	-40(%rsi),%r14
-.cfi_restore	%r14
-	mov	-32(%rsi),%r13
-.cfi_restore	%r13
-	mov	-24(%rsi),%r12
-.cfi_restore	%r12
-	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
-	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
-	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
+	mov	(%rsi),%r15
+	mov	8(%rsi),%r14
+	mov	16(%rsi),%r13
+	mov	24(%rsi),%r12
+	mov	32(%rsi),%rbp
+	mov	40(%rsi),%rbx
+	lea	48(%rsi),%rsp
 .Lmul_epilogue:
 	ret
-.cfi_endproc
 .size	bn_mul_mont,.-bn_mul_mont
 ___
 {{{
@@ -350,10 +304,6 @@ $code.=<<___;
 .type	bn_mul4x_mont,\@function,6
 .align	16
 bn_mul4x_mont:
-.cfi_startproc
-	mov	${num}d,${num}d
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 .Lmul4x_enter:
 ___
 $code.=<<___ if ($addx);
@@ -363,41 +313,20 @@ $code.=<<___ if ($addx);
 ___
 $code.=<<___;
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15

-	neg	$num
+	mov	${num}d,${num}d
+	lea	4($num),%r10
 	mov	%rsp,%r11
-	lea	-32(%rsp,$num,8),%r10	# future alloca(8*(num+4))
-	neg	$num			# restore
-	and	\$-1024,%r10		# minimize TLB usage
+	neg	%r10
+	lea	(%rsp,%r10,8),%rsp	# tp=alloca(8*(num+4))
+	and	\$-1024,%rsp		# minimize TLB usage

-	sub	%r10,%r11
-	and	\$-4096,%r11
-	lea	(%r10,%r11),%rsp
-	mov	(%rsp),%r11
-	cmp	%r10,%rsp
-	ja	.Lmul4x_page_walk
-	jmp	.Lmul4x_page_walk_done
-
-.Lmul4x_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r11
-	cmp	%r10,%rsp
-	ja	.Lmul4x_page_walk
-.Lmul4x_page_walk_done:
-
-	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
-.cfi_cfa_expression	%rsp+8,$num,8,mul,plus,deref,+8
+	mov	%r11,8(%rsp,$num,8)	# tp[num+1]=%rsp
 .Lmul4x_body:
 	mov	$rp,16(%rsp,$num,8)	# tp[num+2]=$rp
 	mov	%rdx,%r12		# reassign $bp
@@ -704,11 +633,9 @@ ___
 my @ri=("%rax","%rdx",$m0,$m1);
 $code.=<<___;
 	mov	16(%rsp,$num,8),$rp	# restore $rp
-	lea	-4($num),$j
 	mov	0(%rsp),@ri[0]		# tp[0]
-	pxor	%xmm0,%xmm0
 	mov	8(%rsp),@ri[1]		# tp[1]
-	shr	\$2,$j			# j=num/4-1
+	shr	\$2,$num		# num/=4
 	lea	(%rsp),$ap		# borrow ap for tp
 	xor	$i,$i			# i=0 and clear CF!

@@ -716,6 +643,7 @@ $code.=<<___;
 	mov	16($ap),@ri[2]		# tp[2]
 	mov	24($ap),@ri[3]		# tp[3]
 	sbb	8($np),@ri[1]
+	lea	-1($num),$j		# j=num/4-1
 	jmp	.Lsub4x
 .align	16
 .Lsub4x:
@@ -743,58 +671,50 @@ $code.=<<___;
 	mov	@ri[2],16($rp,$i,8)	# rp[i]=tp[i]-np[i]

 	sbb	\$0,@ri[0]		# handle upmost overflow bit
+	mov	@ri[0],%xmm0
+	punpcklqdq %xmm0,%xmm0		# extend mask to 128 bits
 	mov	@ri[3],24($rp,$i,8)	# rp[i]=tp[i]-np[i]
 	xor	$i,$i			# i=0
-	and	@ri[0],$ap
-	not	@ri[0]
-	mov	$rp,$np
-	and	@ri[0],$np
-	lea	-4($num),$j
-	or	$np,$ap			# ap=borrow?tp:rp
-	shr	\$2,$j			# j=num/4-1

-	movdqu	($ap),%xmm1
-	movdqa	%xmm0,(%rsp)
-	movdqu	%xmm1,($rp)
+	mov	$num,$j
+	pxor	%xmm5,%xmm5
 	jmp	.Lcopy4x
 .align	16
-.Lcopy4x:					# copy or in-place refresh
-	movdqu	16($ap,$i),%xmm2
-	movdqu	32($ap,$i),%xmm1
-	movdqa	%xmm0,16(%rsp,$i)
-	movdqu	%xmm2,16($rp,$i)
-	movdqa	%xmm0,32(%rsp,$i)
-	movdqu	%xmm1,32($rp,$i)
+.Lcopy4x:				# copy or in-place refresh
+	movdqu	(%rsp,$i),%xmm2
+	movdqu  16(%rsp,$i),%xmm4
+	movdqu	($rp,$i),%xmm1
+	movdqu	16($rp,$i),%xmm3
+	pxor	%xmm1,%xmm2		# conditional select
+	pxor	%xmm3,%xmm4
+	pand	%xmm0,%xmm2
+	pand	%xmm0,%xmm4
+	pxor	%xmm1,%xmm2
+	pxor	%xmm3,%xmm4
+	movdqu	%xmm2,($rp,$i)
+	movdqu  %xmm4,16($rp,$i)
+	movdqa	%xmm5,(%rsp,$i)		# zap temporary vectors
+	movdqa	%xmm5,16(%rsp,$i)
+
 	lea	32($i),$i
 	dec	$j
 	jnz	.Lcopy4x

-	movdqu	16($ap,$i),%xmm2
-	movdqa	%xmm0,16(%rsp,$i)
-	movdqu	%xmm2,16($rp,$i)
+	shl	\$2,$num
 ___
 }
 $code.=<<___;
 	mov	8(%rsp,$num,8),%rsi	# restore %rsp
-.cfi_def_cfa	%rsi, 8
 	mov	\$1,%rax
-	mov	-48(%rsi),%r15
-.cfi_restore	%r15
-	mov	-40(%rsi),%r14
-.cfi_restore	%r14
-	mov	-32(%rsi),%r13
-.cfi_restore	%r13
-	mov	-24(%rsi),%r12
-.cfi_restore	%r12
-	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
-	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
-	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
+	mov	(%rsi),%r15
+	mov	8(%rsi),%r14
+	mov	16(%rsi),%r13
+	mov	24(%rsi),%r12
+	mov	32(%rsi),%rbp
+	mov	40(%rsi),%rbx
+	lea	48(%rsi),%rsp
 .Lmul4x_epilogue:
 	ret
-.cfi_endproc
 .size	bn_mul4x_mont,.-bn_mul4x_mont
 ___
 }}}
@@ -822,23 +742,14 @@ $code.=<<___;
 .type	bn_sqr8x_mont,\@function,6
 .align	32
 bn_sqr8x_mont:
-.cfi_startproc
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 .Lsqr8x_enter:
+	mov	%rsp,%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
-.Lsqr8x_prologue:

 	mov	${num}d,%r10d
 	shl	\$3,${num}d		# convert $num to bytes
@@ -851,49 +762,30 @@ bn_sqr8x_mont:
 	# do its job.
 	#
 	lea	-64(%rsp,$num,2),%r11
-	mov	%rsp,%rbp
 	mov	($n0),$n0		# *n0
 	sub	$aptr,%r11
 	and	\$4095,%r11
 	cmp	%r11,%r10
 	jb	.Lsqr8x_sp_alt
-	sub	%r11,%rbp		# align with $aptr
-	lea	-64(%rbp,$num,2),%rbp	# future alloca(frame+2*$num)
+	sub	%r11,%rsp		# align with $aptr
+	lea	-64(%rsp,$num,2),%rsp	# alloca(frame+2*$num)
 	jmp	.Lsqr8x_sp_done

 .align	32
 .Lsqr8x_sp_alt:
 	lea	4096-64(,$num,2),%r10	# 4096-frame-2*$num
-	lea	-64(%rbp,$num,2),%rbp	# future alloca(frame+2*$num)
+	lea	-64(%rsp,$num,2),%rsp	# alloca(frame+2*$num)
 	sub	%r10,%r11
 	mov	\$0,%r10
 	cmovc	%r10,%r11
-	sub	%r11,%rbp
+	sub	%r11,%rsp
 .Lsqr8x_sp_done:
-	and	\$-64,%rbp
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lsqr8x_page_walk
-	jmp	.Lsqr8x_page_walk_done
-
-.align	16
-.Lsqr8x_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lsqr8x_page_walk
-.Lsqr8x_page_walk_done:
-
+	and	\$-64,%rsp
 	mov	$num,%r10
 	neg	$num

 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
-.cfi_cfa_expression	%rsp+40,deref,+8
 .Lsqr8x_body:

 	movq	$nptr, %xmm2		# save pointer to modulus
@@ -902,8 +794,7 @@ bn_sqr8x_mont:
 	movq	%r10, %xmm3		# -$num
 ___
 $code.=<<___ if ($addx);
-	leaq	OPENSSL_ia32cap_P(%rip),%rax
-	mov	8(%rax),%eax
+	mov	OPENSSL_ia32cap_P+8(%rip),%eax
 	and	\$0x80100,%eax
 	cmp	\$0x80100,%eax
 	jne	.Lsqr8x_nox
@@ -964,7 +855,6 @@ $code.=<<___;
 	pxor	%xmm0,%xmm0
 	pshufd	\$0,%xmm1,%xmm1
 	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
 	jmp	.Lsqr8x_cond_copy

 .align	32
@@ -994,22 +884,14 @@ $code.=<<___;

 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lsqr8x_epilogue:
 	ret
-.cfi_endproc
 .size	bn_sqr8x_mont,.-bn_sqr8x_mont
 ___
 }}}
@@ -1021,48 +903,23 @@ $code.=<<___;
 .type	bn_mulx4x_mont,\@function,6
 .align	32
 bn_mulx4x_mont:
-.cfi_startproc
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 .Lmulx4x_enter:
+	mov	%rsp,%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
-.Lmulx4x_prologue:

 	shl	\$3,${num}d		# convert $num to bytes
+	.byte	0x67
 	xor	%r10,%r10
 	sub	$num,%r10		# -$num
 	mov	($n0),$n0		# *n0
-	lea	-72(%rsp,%r10),%rbp	# future alloca(frame+$num+8)
-	and	\$-128,%rbp
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lmulx4x_page_walk
-	jmp	.Lmulx4x_page_walk_done
-
-.align	16
-.Lmulx4x_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lmulx4x_page_walk
-.Lmulx4x_page_walk_done:
-
+	lea	-72(%rsp,%r10),%rsp	# alloca(frame+$num+8)
 	lea	($bp,$num),%r10
+	and	\$-128,%rsp
 	##############################################################
 	# Stack layout
 	# +0	num
@@ -1082,7 +939,6 @@ bn_mulx4x_mont:
 	mov	$n0, 24(%rsp)		# save *n0
 	mov	$rp, 32(%rsp)		# save $rp
 	mov	%rax,40(%rsp)		# save original %rsp
-.cfi_cfa_expression	%rsp+40,deref,+8
 	mov	$num,48(%rsp)		# inner counter
 	jmp	.Lmulx4x_body

@@ -1332,7 +1188,6 @@ $code.=<<___;
 	pxor	%xmm0,%xmm0
 	pshufd	\$0,%xmm1,%xmm1
 	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
 	jmp	.Lmulx4x_cond_copy

 .align	32
@@ -1362,22 +1217,14 @@ $code.=<<___;

 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lmulx4x_epilogue:
 	ret
-.cfi_endproc
 .size	bn_mulx4x_mont,.-bn_mulx4x_mont
 ___
 }}}
@@ -1430,8 +1277,22 @@ mul_handler:

 	mov	192($context),%r10	# pull $num
 	mov	8(%rax,%r10,8),%rax	# pull saved stack pointer
+	lea	48(%rax),%rax

-	jmp	.Lcommon_pop_regs
+	mov	-8(%rax),%rbx
+	mov	-16(%rax),%rbp
+	mov	-24(%rax),%r12
+	mov	-32(%rax),%r13
+	mov	-40(%rax),%r14
+	mov	-48(%rax),%r15
+	mov	%rbx,144($context)	# restore context->Rbx
+	mov	%rbp,160($context)	# restore context->Rbp
+	mov	%r12,216($context)	# restore context->R12
+	mov	%r13,224($context)	# restore context->R13
+	mov	%r14,232($context)	# restore context->R14
+	mov	%r15,240($context)	# restore context->R15
+
+	jmp	.Lcommon_seh_tail
 .size	mul_handler,.-mul_handler

 .type	sqr_handler,\@abi-omnipotent
@@ -1456,24 +1317,18 @@ sqr_handler:

 	mov	0(%r11),%r10d		# HandlerData[0]
 	lea	(%rsi,%r10),%r10	# end of prologue label
-	cmp	%r10,%rbx		# context->Rip<.Lsqr_prologue
-	jb	.Lcommon_seh_tail
-
-	mov	4(%r11),%r10d		# HandlerData[1]
-	lea	(%rsi,%r10),%r10	# body label
 	cmp	%r10,%rbx		# context->Rip<.Lsqr_body
-	jb	.Lcommon_pop_regs
+	jb	.Lcommon_seh_tail

 	mov	152($context),%rax	# pull context->Rsp

-	mov	8(%r11),%r10d		# HandlerData[2]
+	mov	4(%r11),%r10d		# HandlerData[1]
 	lea	(%rsi,%r10),%r10	# epilogue label
 	cmp	%r10,%rbx		# context->Rip>=.Lsqr_epilogue
 	jae	.Lcommon_seh_tail

 	mov	40(%rax),%rax		# pull saved stack pointer

-.Lcommon_pop_regs:
 	mov	-8(%rax),%rbx
 	mov	-16(%rax),%rbp
 	mov	-24(%rax),%r12
@@ -1560,15 +1415,13 @@ $code.=<<___;
 .LSEH_info_bn_sqr8x_mont:
 	.byte	9,0,0,0
 	.rva	sqr_handler
-	.rva	.Lsqr8x_prologue,.Lsqr8x_body,.Lsqr8x_epilogue		# HandlerData[]
-.align	8
+	.rva	.Lsqr8x_body,.Lsqr8x_epilogue	# HandlerData[]
 ___
 $code.=<<___ if ($addx);
 .LSEH_info_bn_mulx4x_mont:
 	.byte	9,0,0,0
 	.rva	sqr_handler
-	.rva	.Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue	# HandlerData[]
-.align	8
+	.rva	.Lmulx4x_body,.Lmulx4x_epilogue	# HandlerData[]
 ___
 }

@@ -32,7 +32,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);

 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
 die "can't locate x86_64-xlate.pl";

 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
@@ -73,70 +73,35 @@ $code=<<___;
 .type	bn_mul_mont_gather5,\@function,6
 .align	64
 bn_mul_mont_gather5:
-.cfi_startproc
-	mov	${num}d,${num}d
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 	test	\$7,${num}d
 	jnz	.Lmul_enter
 ___
 $code.=<<___ if ($addx);
-	leaq	OPENSSL_ia32cap_P(%rip),%r11
-	mov	8(%r11),%r11d
+	mov	OPENSSL_ia32cap_P+8(%rip),%r11d
 ___
 $code.=<<___;
 	jmp	.Lmul4x_enter

 .align	16
 .Lmul_enter:
+	mov	${num}d,${num}d
+	mov	%rsp,%rax
 	movd	`($win64?56:8)`(%rsp),%xmm5	# load 7th argument
-	push	%rbx
-.cfi_push	%rbx
-	push	%rbp
-.cfi_push	%rbp
-	push	%r12
-.cfi_push	%r12
-	push	%r13
-.cfi_push	%r13
-	push	%r14
-.cfi_push	%r14
-	push	%r15
-.cfi_push	%r15
-
-	neg	$num
-	mov	%rsp,%r11
-	lea	-280(%rsp,$num,8),%r10	# future alloca(8*(num+2)+256+8)
-	neg	$num			# restore $num
-	and	\$-1024,%r10		# minimize TLB usage
-
-	# An OS-agnostic version of __chkstk.
-	#
-	# Some OSes (Windows) insist on stack being "wired" to
-	# physical memory in strictly sequential manner, i.e. if stack
-	# allocation spans two pages, then reference to farmost one can
-	# be punishable by SEGV. But page walking can do good even on
-	# other OSes, because it guarantees that villain thread hits
-	# the guard page before it can make damage to innocent one...
-	sub	%r10,%r11
-	and	\$-4096,%r11
-	lea	(%r10,%r11),%rsp
-	mov	(%rsp),%r11
-	cmp	%r10,%rsp
-	ja	.Lmul_page_walk
-	jmp	.Lmul_page_walk_done
-
-.Lmul_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r11
-	cmp	%r10,%rsp
-	ja	.Lmul_page_walk
-.Lmul_page_walk_done:
-
 	lea	.Linc(%rip),%r10
-	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
-.cfi_cfa_expression	%rsp+8,$num,8,mul,plus,deref,+8
-.Lmul_body:
+	push	%rbx
+	push	%rbp
+	push	%r12
+	push	%r13
+	push	%r14
+	push	%r15

+	lea	2($num),%r11
+	neg	%r11
+	lea	-264(%rsp,%r11,8),%rsp	# tp=alloca(8*(num+2)+256+8)
+	and	\$-1024,%rsp		# minimize TLB usage
+
+	mov	%rax,8(%rsp,$num,8)	# tp[num+1]=%rsp
+.Lmul_body:
 	lea	128($bp),%r12		# reassign $bp (+size optimization)
 ___
 		$bp="%r12";
@@ -396,8 +361,7 @@ $code.=<<___;
 	mov	$num,$j			# j=num
 	jmp	.Lsub
 .align	16
-.Lsub:
-	sbb	($np,$i,8),%rax
+.Lsub:	sbb	($np,$i,8),%rax
 	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]-np[i]
 	mov	8($ap,$i,8),%rax	# tp[i+1]
 	lea	1($i),$i		# i++
@@ -406,42 +370,32 @@ $code.=<<___;

 	sbb	\$0,%rax		# handle upmost overflow bit
 	xor	$i,$i
-	and	%rax,$ap
-	not	%rax
-	mov	$rp,$np
-	and	%rax,$np
 	mov	$num,$j			# j=num
-	or	$np,$ap			# ap=borrow?tp:rp
 .align	16
 .Lcopy:					# copy or in-place refresh
-	mov	($ap,$i,8),%rax
+	mov	(%rsp,$i,8),$ap
+	mov	($rp,$i,8),$np
+	xor	$np,$ap			# conditional select:
+	and	%rax,$ap		# ((ap ^ np) & %rax) ^ np
+	xor	$np,$ap			# ap = borrow?tp:rp
 	mov	$i,(%rsp,$i,8)		# zap temporary vector
-	mov	%rax,($rp,$i,8)		# rp[i]=tp[i]
+	mov	$ap,($rp,$i,8)		# rp[i]=tp[i]
 	lea	1($i),$i
 	sub	\$1,$j
 	jnz	.Lcopy

 	mov	8(%rsp,$num,8),%rsi	# restore %rsp
-.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax

 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lmul_epilogue:
 	ret
-.cfi_endproc
 .size	bn_mul_mont_gather5,.-bn_mul_mont_gather5
 ___
 {{{
@@ -451,10 +405,6 @@ $code.=<<___;
 .type	bn_mul4x_mont_gather5,\@function,6
 .align	32
 bn_mul4x_mont_gather5:
-.cfi_startproc
-	.byte	0x67
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 .Lmul4x_enter:
 ___
 $code.=<<___ if ($addx);
@@ -463,19 +413,14 @@ $code.=<<___ if ($addx);
 	je	.Lmulx4x_enter
 ___
 $code.=<<___;
+	.byte	0x67
+	mov	%rsp,%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
-.Lmul4x_prologue:

 	.byte	0x67
 	shl	\$3,${num}d		# convert $num to bytes
@@ -492,70 +437,43 @@ $code.=<<___;
 	# calculated from 7th argument, the index.]
 	#
 	lea	-320(%rsp,$num,2),%r11
-	mov	%rsp,%rbp
 	sub	$rp,%r11
 	and	\$4095,%r11
 	cmp	%r11,%r10
 	jb	.Lmul4xsp_alt
-	sub	%r11,%rbp		# align with $rp
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*num*8+256)
+	sub	%r11,%rsp		# align with $rp
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*num*8+256)
 	jmp	.Lmul4xsp_done

 .align	32
 .Lmul4xsp_alt:
 	lea	4096-320(,$num,2),%r10
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*num*8+256)
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*num*8+256)
 	sub	%r10,%r11
 	mov	\$0,%r10
 	cmovc	%r10,%r11
-	sub	%r11,%rbp
+	sub	%r11,%rsp
 .Lmul4xsp_done:
-	and	\$-64,%rbp
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lmul4x_page_walk
-	jmp	.Lmul4x_page_walk_done
-
-.Lmul4x_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lmul4x_page_walk
-.Lmul4x_page_walk_done:
-
+	and	\$-64,%rsp
 	neg	$num

 	mov	%rax,40(%rsp)
-.cfi_cfa_expression	%rsp+40,deref,+8
 .Lmul4x_body:

 	call	mul4x_internal

 	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax

 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lmul4x_epilogue:
 	ret
-.cfi_endproc
 .size	bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5

 .type	mul4x_internal,\@abi-omnipotent
@@ -1067,7 +985,7 @@ my $bptr="%rdx";	# const void *table,
 my $nptr="%rcx";	# const BN_ULONG *nptr,
 my $n0  ="%r8";		# const BN_ULONG *n0);
 my $num ="%r9";		# int num, has to be divisible by 8
-			# int pwr
+			# int pwr 

 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
 my @A0=("%r10","%r11");
@@ -1079,31 +997,21 @@ $code.=<<___;
 .type	bn_power5,\@function,6
 .align	32
 bn_power5:
-.cfi_startproc
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 ___
 $code.=<<___ if ($addx);
-	leaq	OPENSSL_ia32cap_P(%rip),%r11
-	mov	8(%r11),%r11d
+	mov	OPENSSL_ia32cap_P+8(%rip),%r11d
 	and	\$0x80108,%r11d
 	cmp	\$0x80108,%r11d		# check for AD*X+BMI2+BMI1
 	je	.Lpowerx5_enter
 ___
 $code.=<<___;
+	mov	%rsp,%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
-.Lpower5_prologue:

 	shl	\$3,${num}d		# convert $num to bytes
 	lea	($num,$num,2),%r10d	# 3*$num
@@ -1118,42 +1026,25 @@ $code.=<<___;
 	# calculated from 7th argument, the index.]
 	#
 	lea	-320(%rsp,$num,2),%r11
-	mov	%rsp,%rbp
 	sub	$rptr,%r11
 	and	\$4095,%r11
 	cmp	%r11,%r10
 	jb	.Lpwr_sp_alt
-	sub	%r11,%rbp		# align with $aptr
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*num*8+256)
+	sub	%r11,%rsp		# align with $aptr
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*num*8+256)
 	jmp	.Lpwr_sp_done

 .align	32
 .Lpwr_sp_alt:
 	lea	4096-320(,$num,2),%r10
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*num*8+256)
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*num*8+256)
 	sub	%r10,%r11
 	mov	\$0,%r10
 	cmovc	%r10,%r11
-	sub	%r11,%rbp
+	sub	%r11,%rsp
 .Lpwr_sp_done:
-	and	\$-64,%rbp
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lpwr_page_walk
-	jmp	.Lpwr_page_walk_done
-
-.Lpwr_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lpwr_page_walk
-.Lpwr_page_walk_done:
-
-	mov	$num,%r10
+	and	\$-64,%rsp
+	mov	$num,%r10	
 	neg	$num

 	##############################################################
@@ -1167,7 +1058,6 @@ $code.=<<___;
 	#
 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
-.cfi_cfa_expression	%rsp+40,deref,+8
 .Lpower5_body:
 	movq	$rptr,%xmm1		# save $rptr, used in sqr8x
 	movq	$nptr,%xmm2		# save $nptr
@@ -1194,25 +1084,16 @@ $code.=<<___;
 	call	mul4x_internal

 	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lpower5_epilogue:
 	ret
-.cfi_endproc
 .size	bn_power5,.-bn_power5

 .globl	bn_sqr8x_internal
@@ -1971,7 +1852,6 @@ __bn_sqr8x_reduction:

 .align	32
 .L8x_tail_done:
-	xor	%rax,%rax
 	add	(%rdx),%r8		# can this overflow?
 	adc	\$0,%r9
 	adc	\$0,%r10
@@ -1979,8 +1859,10 @@ __bn_sqr8x_reduction:
 	adc	\$0,%r12
 	adc	\$0,%r13
 	adc	\$0,%r14
-	adc	\$0,%r15
-	adc	\$0,%rax
+	adc	\$0,%r15		# can't overflow, because we
+					# started with "overhung" part
+					# of multiplication
+	xor	%rax,%rax

 	neg	$carry
 .L8x_no_tail:
@@ -2072,7 +1954,7 @@ __bn_post4x_internal:
 	jnz	.Lsqr4x_sub

 	mov	$num,%r10		# prepare for back-to-back call
-	neg	$num			# restore $num
+	neg	$num			# restore $num	
 	ret
 .size	__bn_post4x_internal,.-__bn_post4x_internal
 ___
@@ -2092,23 +1974,14 @@ bn_from_montgomery:
 .type	bn_from_mont8x,\@function,6
 .align	32
 bn_from_mont8x:
-.cfi_startproc
 	.byte	0x67
 	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
-.Lfrom_prologue:

 	shl	\$3,${num}d		# convert $num to bytes
 	lea	($num,$num,2),%r10	# 3*$num in bytes
@@ -2123,42 +1996,25 @@ bn_from_mont8x:
 	# last operation, we use the opportunity to cleanse it.
 	#
 	lea	-320(%rsp,$num,2),%r11
-	mov	%rsp,%rbp
 	sub	$rptr,%r11
 	and	\$4095,%r11
 	cmp	%r11,%r10
 	jb	.Lfrom_sp_alt
-	sub	%r11,%rbp		# align with $aptr
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*$num*8+256)
+	sub	%r11,%rsp		# align with $aptr
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*$num*8+256)
 	jmp	.Lfrom_sp_done

 .align	32
 .Lfrom_sp_alt:
 	lea	4096-320(,$num,2),%r10
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*$num*8+256)
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*$num*8+256)
 	sub	%r10,%r11
 	mov	\$0,%r10
 	cmovc	%r10,%r11
-	sub	%r11,%rbp
+	sub	%r11,%rsp
 .Lfrom_sp_done:
-	and	\$-64,%rbp
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lfrom_page_walk
-	jmp	.Lfrom_page_walk_done
-
-.Lfrom_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lfrom_page_walk
-.Lfrom_page_walk_done:
-
-	mov	$num,%r10
+	and	\$-64,%rsp
+	mov	$num,%r10	
 	neg	$num

 	##############################################################
@@ -2172,7 +2028,6 @@ bn_from_mont8x:
 	#
 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
-.cfi_cfa_expression	%rsp+40,deref,+8
 .Lfrom_body:
 	mov	$num,%r11
 	lea	48(%rsp),%rax
@@ -2205,8 +2060,7 @@ bn_from_mont8x:
 	movq	%r10, %xmm3		# -num
 ___
 $code.=<<___ if ($addx);
-	leaq	OPENSSL_ia32cap_P(%rip),%r11
-	mov	8(%r11),%r11d
+	mov	OPENSSL_ia32cap_P+8(%rip),%r11d
 	and	\$0x80108,%r11d
 	cmp	\$0x80108,%r11d		# check for AD*X+BMI2+BMI1
 	jne	.Lfrom_mont_nox
@@ -2217,6 +2071,7 @@ $code.=<<___ if ($addx);

 	pxor	%xmm0,%xmm0
 	lea	48(%rsp),%rax
+	mov	40(%rsp),%rsi		# restore %rsp
 	jmp	.Lfrom_mont_zero

 .align	32
@@ -2228,12 +2083,11 @@ $code.=<<___;

 	pxor	%xmm0,%xmm0
 	lea	48(%rsp),%rax
+	mov	40(%rsp),%rsi		# restore %rsp
 	jmp	.Lfrom_mont_zero

 .align	32
 .Lfrom_mont_zero:
-	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
 	movdqa	%xmm0,16*0(%rax)
 	movdqa	%xmm0,16*1(%rax)
 	movdqa	%xmm0,16*2(%rax)
@@ -2244,22 +2098,14 @@ $code.=<<___;

 	mov	\$1,%rax
 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lfrom_epilogue:
 	ret
-.cfi_endproc
 .size	bn_from_mont8x,.-bn_from_mont8x
 ___
 }
@@ -2272,23 +2118,14 @@ $code.=<<___;
 .type	bn_mulx4x_mont_gather5,\@function,6
 .align	32
 bn_mulx4x_mont_gather5:
-.cfi_startproc
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 .Lmulx4x_enter:
+	mov	%rsp,%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
-.Lmulx4x_prologue:

 	shl	\$3,${num}d		# convert $num to bytes
 	lea	($num,$num,2),%r10	# 3*$num in bytes
@@ -2305,40 +2142,23 @@ bn_mulx4x_mont_gather5:
 	# calculated from 7th argument, the index.]
 	#
 	lea	-320(%rsp,$num,2),%r11
-	mov	%rsp,%rbp
 	sub	$rp,%r11
 	and	\$4095,%r11
 	cmp	%r11,%r10
 	jb	.Lmulx4xsp_alt
-	sub	%r11,%rbp		# align with $aptr
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*$num*8+256)
+	sub	%r11,%rsp		# align with $aptr
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*$num*8+256)
 	jmp	.Lmulx4xsp_done

 .Lmulx4xsp_alt:
 	lea	4096-320(,$num,2),%r10
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*$num*8+256)
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*$num*8+256)
 	sub	%r10,%r11
 	mov	\$0,%r10
 	cmovc	%r10,%r11
-	sub	%r11,%rbp
-.Lmulx4xsp_done:
-	and	\$-64,%rbp		# ensure alignment
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lmulx4x_page_walk
-	jmp	.Lmulx4x_page_walk_done
-
-.Lmulx4x_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lmulx4x_page_walk
-.Lmulx4x_page_walk_done:
-
+	sub	%r11,%rsp
+.Lmulx4xsp_done:	
+	and	\$-64,%rsp		# ensure alignment
 	##############################################################
 	# Stack layout
 	# +0	-num
@@ -2353,31 +2173,21 @@ bn_mulx4x_mont_gather5:
 	#
 	mov	$n0, 32(%rsp)		# save *n0
 	mov	%rax,40(%rsp)		# save original %rsp
-.cfi_cfa_expression	%rsp+40,deref,+8
 .Lmulx4x_body:
 	call	mulx4x_internal

 	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax

 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lmulx4x_epilogue:
 	ret
-.cfi_endproc
 .size	bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5

 .type	mulx4x_internal,\@abi-omnipotent
@@ -2755,23 +2565,14 @@ $code.=<<___;
 .type	bn_powerx5,\@function,6
 .align	32
 bn_powerx5:
-.cfi_startproc
-	mov	%rsp,%rax
-.cfi_def_cfa_register	%rax
 .Lpowerx5_enter:
+	mov	%rsp,%rax
 	push	%rbx
-.cfi_push	%rbx
 	push	%rbp
-.cfi_push	%rbp
 	push	%r12
-.cfi_push	%r12
 	push	%r13
-.cfi_push	%r13
 	push	%r14
-.cfi_push	%r14
 	push	%r15
-.cfi_push	%r15
-.Lpowerx5_prologue:

 	shl	\$3,${num}d		# convert $num to bytes
 	lea	($num,$num,2),%r10	# 3*$num in bytes
@@ -2786,42 +2587,25 @@ bn_powerx5:
 	# calculated from 7th argument, the index.]
 	#
 	lea	-320(%rsp,$num,2),%r11
-	mov	%rsp,%rbp
 	sub	$rptr,%r11
 	and	\$4095,%r11
 	cmp	%r11,%r10
 	jb	.Lpwrx_sp_alt
-	sub	%r11,%rbp		# align with $aptr
-	lea	-320(%rbp,$num,2),%rbp	# future alloca(frame+2*$num*8+256)
+	sub	%r11,%rsp		# align with $aptr
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*$num*8+256)
 	jmp	.Lpwrx_sp_done

 .align	32
 .Lpwrx_sp_alt:
 	lea	4096-320(,$num,2),%r10
-	lea	-320(%rbp,$num,2),%rbp	# alloca(frame+2*$num*8+256)
+	lea	-320(%rsp,$num,2),%rsp	# alloca(frame+2*$num*8+256)
 	sub	%r10,%r11
 	mov	\$0,%r10
 	cmovc	%r10,%r11
-	sub	%r11,%rbp
+	sub	%r11,%rsp
 .Lpwrx_sp_done:
-	and	\$-64,%rbp
-	mov	%rsp,%r11
-	sub	%rbp,%r11
-	and	\$-4096,%r11
-	lea	(%rbp,%r11),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lpwrx_page_walk
-	jmp	.Lpwrx_page_walk_done
-
-.Lpwrx_page_walk:
-	lea	-4096(%rsp),%rsp
-	mov	(%rsp),%r10
-	cmp	%rbp,%rsp
-	ja	.Lpwrx_page_walk
-.Lpwrx_page_walk_done:
-
-	mov	$num,%r10
+	and	\$-64,%rsp
+	mov	$num,%r10	
 	neg	$num

 	##############################################################
@@ -2842,7 +2626,6 @@ bn_powerx5:
 	movq	$bptr,%xmm4
 	mov	$n0,  32(%rsp)
 	mov	%rax, 40(%rsp)		# save original %rsp
-.cfi_cfa_expression	%rsp+40,deref,+8
 .Lpowerx5_body:

 	call	__bn_sqrx8x_internal
@@ -2865,26 +2648,17 @@ bn_powerx5:
 	call	mulx4x_internal

 	mov	40(%rsp),%rsi		# restore %rsp
-.cfi_def_cfa	%rsi,8
 	mov	\$1,%rax

 	mov	-48(%rsi),%r15
-.cfi_restore	%r15
 	mov	-40(%rsi),%r14
-.cfi_restore	%r14
 	mov	-32(%rsi),%r13
-.cfi_restore	%r13
 	mov	-24(%rsi),%r12
-.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
 	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
 .Lpowerx5_epilogue:
 	ret
-.cfi_endproc
 .size	bn_powerx5,.-bn_powerx5

 .globl	bn_sqrx8x_internal
@@ -3474,7 +3248,6 @@ __bn_sqrx8x_reduction:

 .align	32
 .Lsqrx8x_tail_done:
-	xor	%rax,%rax
 	add	24+8(%rsp),%r8		# can this overflow?
 	adc	\$0,%r9
 	adc	\$0,%r10
@@ -3482,8 +3255,10 @@ __bn_sqrx8x_reduction:
 	adc	\$0,%r12
 	adc	\$0,%r13
 	adc	\$0,%r14
-	adc	\$0,%r15
-	adc	\$0,%rax
+	adc	\$0,%r15		# can't overflow, because we
+					# started with "overhung" part
+					# of multiplication
+	mov	$carry,%rax		# xor	%rax,%rax

 	sub	16+8(%rsp),$carry	# mov 16(%rsp),%cf
 .Lsqrx8x_no_tail:			# %cf is 0 if jumped here
@@ -3498,7 +3273,7 @@ __bn_sqrx8x_reduction:
 	adc	8*5($tptr),%r13
 	adc	8*6($tptr),%r14
 	adc	8*7($tptr),%r15
-	adc	\$0,%rax		# top-most carry
+	adc	%rax,%rax		# top-most carry

 	mov	32+8(%rsp),%rbx		# n0
 	mov	8*8($tptr,%rcx),%rdx	# modulo-scheduled "%r8"
@@ -3740,14 +3515,9 @@ mul_handler:
 	cmp	%r10,%rbx		# context->Rip<end of prologue label
 	jb	.Lcommon_seh_tail

-	mov	4(%r11),%r10d		# HandlerData[1]
-	lea	(%rsi,%r10),%r10	# beginning of body label
-	cmp	%r10,%rbx		# context->Rip<body label
-	jb	.Lcommon_pop_regs
-
 	mov	152($context),%rax	# pull context->Rsp

-	mov	8(%r11),%r10d		# HandlerData[2]
+	mov	4(%r11),%r10d		# HandlerData[1]
 	lea	(%rsi,%r10),%r10	# epilogue label
 	cmp	%r10,%rbx		# context->Rip>=epilogue label
 	jae	.Lcommon_seh_tail
@@ -3759,11 +3529,11 @@ mul_handler:
 	mov	192($context),%r10	# pull $num
 	mov	8(%rax,%r10,8),%rax	# pull saved stack pointer

-	jmp	.Lcommon_pop_regs
+	jmp	.Lbody_proceed

 .Lbody_40:
 	mov	40(%rax),%rax		# pull saved stack pointer
-.Lcommon_pop_regs:
+.Lbody_proceed:
 	mov	-8(%rax),%rbx
 	mov	-16(%rax),%rbp
 	mov	-24(%rax),%r12
@@ -3854,34 +3624,34 @@ $code.=<<___;
 .LSEH_info_bn_mul_mont_gather5:
 	.byte	9,0,0,0
 	.rva	mul_handler
-	.rva	.Lmul_body,.Lmul_body,.Lmul_epilogue		# HandlerData[]
+	.rva	.Lmul_body,.Lmul_epilogue		# HandlerData[]
 .align	8
 .LSEH_info_bn_mul4x_mont_gather5:
 	.byte	9,0,0,0
 	.rva	mul_handler
-	.rva	.Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue		# HandlerData[]
+	.rva	.Lmul4x_body,.Lmul4x_epilogue		# HandlerData[]
 .align	8
 .LSEH_info_bn_power5:
 	.byte	9,0,0,0
 	.rva	mul_handler
-	.rva	.Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue	# HandlerData[]
+	.rva	.Lpower5_body,.Lpower5_epilogue		# HandlerData[]
 .align	8
 .LSEH_info_bn_from_mont8x:
 	.byte	9,0,0,0
 	.rva	mul_handler
-	.rva	.Lfrom_prologue,.Lfrom_body,.Lfrom_epilogue		# HandlerData[]
+	.rva	.Lfrom_body,.Lfrom_epilogue		# HandlerData[]
 ___
 $code.=<<___ if ($addx);
 .align	8
 .LSEH_info_bn_mulx4x_mont_gather5:
 	.byte	9,0,0,0
 	.rva	mul_handler
-	.rva	.Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue	# HandlerData[]
+	.rva	.Lmulx4x_body,.Lmulx4x_epilogue		# HandlerData[]
 .align	8
 .LSEH_info_bn_powerx5:
 	.byte	9,0,0,0
 	.rva	mul_handler
-	.rva	.Lpowerx5_prologue,.Lpowerx5_body,.Lpowerx5_epilogue	# HandlerData[]
+	.rva	.Lpowerx5_body,.Lpowerx5_epilogue	# HandlerData[]
 ___
 $code.=<<___;
 .align	8
@@ -63,7 +63,6 @@
 #include <openssl/mem.h>

 #include "internal.h"
-#include "../delocate.h"


 BIGNUM *BN_new(void) {
@@ -74,14 +73,14 @@ BIGNUM *BN_new(void) {
    return NULL;
  }

-  OPENSSL_memset(bn, 0, sizeof(BIGNUM));
+  memset(bn, 0, sizeof(BIGNUM));
  bn->flags = BN_FLG_MALLOCED;

  return bn;
 }

 void BN_init(BIGNUM *bn) {
-  OPENSSL_memset(bn, 0, sizeof(BIGNUM));
+  memset(bn, 0, sizeof(BIGNUM));
 }

 void BN_free(BIGNUM *bn) {
@@ -146,11 +145,11 @@ BIGNUM *BN_copy(BIGNUM *dest, const BIGNUM *src) {
    return dest;
  }

-  if (!bn_wexpand(dest, src->top)) {
+  if (bn_wexpand(dest, src->top) == NULL) {
    return NULL;
  }

-  OPENSSL_memcpy(dest->d, src->d, sizeof(src->d[0]) * src->top);
+  memcpy(dest->d, src->d, sizeof(src->d[0]) * src->top);

  dest->top = src->top;
  dest->neg = src->neg;
@@ -159,20 +158,24 @@ BIGNUM *BN_copy(BIGNUM *dest, const BIGNUM *src) {

 void BN_clear(BIGNUM *bn) {
  if (bn->d != NULL) {
-    OPENSSL_memset(bn->d, 0, bn->dmax * sizeof(bn->d[0]));
+    memset(bn->d, 0, bn->dmax * sizeof(bn->d[0]));
  }

  bn->top = 0;
  bn->neg = 0;
 }

-DEFINE_METHOD_FUNCTION(BIGNUM, BN_value_one) {
+const BIGNUM *BN_value_one(void) {
  static const BN_ULONG kOneLimbs[1] = { 1 };
-  out->d = (BN_ULONG*) kOneLimbs;
-  out->top = 1;
-  out->dmax = 1;
-  out->neg = 0;
-  out->flags = BN_FLG_STATIC_DATA;
+  static const BIGNUM kOne = STATIC_BIGNUM(kOneLimbs);
+
+  return &kOne;
+}
+
+void BN_with_flags(BIGNUM *out, const BIGNUM *in, int flags) {
+  memcpy(out, in, sizeof(BIGNUM));
+  out->flags &= ~BN_FLG_MALLOCED;
+  out->flags |= BN_FLG_STATIC_DATA | flags;
 }

 /* BN_num_bits_word returns the minimum number of bits needed to represent the
@@ -253,7 +256,7 @@ int BN_set_word(BIGNUM *bn, BN_ULONG value) {
    return 1;
  }

-  if (!bn_wexpand(bn, 1)) {
+  if (bn_wexpand(bn, 1) == NULL) {
    return 0;
  }

@@ -271,7 +274,7 @@ int BN_set_u64(BIGNUM *bn, uint64_t value) {
    return BN_set_word(bn, (BN_ULONG)value);
  }

-  if (!bn_wexpand(bn, 2)) {
+  if (bn_wexpand(bn, 2) == NULL) {
    return 0;
  }

@@ -286,10 +289,10 @@ int BN_set_u64(BIGNUM *bn, uint64_t value) {
 }

 int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num) {
-  if (!bn_wexpand(bn, num)) {
+  if (bn_wexpand(bn, num) == NULL) {
    return 0;
  }
-  OPENSSL_memmove(bn->d, words, num * sizeof(BN_ULONG));
+  memmove(bn->d, words, num * sizeof(BN_ULONG));
  /* |bn_wexpand| verified that |num| isn't too large. */
  bn->top = (int)num;
  bn_correct_top(bn);
@@ -309,42 +312,42 @@ void BN_set_negative(BIGNUM *bn, int sign) {
  }
 }

-int bn_wexpand(BIGNUM *bn, size_t words) {
+BIGNUM *bn_wexpand(BIGNUM *bn, size_t words) {
  BN_ULONG *a;

  if (words <= (size_t)bn->dmax) {
-    return 1;
+    return bn;
  }

  if (words > (INT_MAX / (4 * BN_BITS2))) {
    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
-    return 0;
+    return NULL;
  }

  if (bn->flags & BN_FLG_STATIC_DATA) {
    OPENSSL_PUT_ERROR(BN, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
-    return 0;
+    return NULL;
  }

  a = OPENSSL_malloc(sizeof(BN_ULONG) * words);
  if (a == NULL) {
    OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
-    return 0;
+    return NULL;
  }

-  OPENSSL_memcpy(a, bn->d, sizeof(BN_ULONG) * bn->top);
+  memcpy(a, bn->d, sizeof(BN_ULONG) * bn->top);

  OPENSSL_free(bn->d);
  bn->d = a;
  bn->dmax = (int)words;

-  return 1;
+  return bn;
 }

-int bn_expand(BIGNUM *bn, size_t bits) {
+BIGNUM *bn_expand(BIGNUM *bn, size_t bits) {
  if (bits + BN_BITS2 - 1 < bits) {
    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
-    return 0;
+    return NULL;
  }
  return bn_wexpand(bn, (bits+BN_BITS2-1)/BN_BITS2);
 }
@@ -366,3 +369,11 @@ void bn_correct_top(BIGNUM *bn) {
    bn->neg = 0;
  }
 }
+
+int BN_get_flags(const BIGNUM *bn, int flags) {
+  return bn->flags & flags;
+}
+
+void BN_set_flags(BIGNUM *bn, int flags) {
+  bn->flags |= flags;
+}
@@ -91,9 +91,9 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../../internal.h"
-#include "../../test/file_test.h"
-#include "../../test/test_util.h"
+#include "../internal.h"
+#include "../test/file_test.h"
+#include "../test/test_util.h"


 static int HexToBIGNUM(bssl::UniquePtr<BIGNUM> *out, const char *in) {
@@ -515,54 +515,6 @@ static bool TestModMul(FileTest *t, BN_CTX *ctx) {
  return true;
 }

-static bool TestModSquare(FileTest *t, BN_CTX *ctx) {
-  bssl::UniquePtr<BIGNUM> a = GetBIGNUM(t, "A");
-  bssl::UniquePtr<BIGNUM> m = GetBIGNUM(t, "M");
-  bssl::UniquePtr<BIGNUM> mod_square = GetBIGNUM(t, "ModSquare");
-  if (!a || !m || !mod_square) {
-    return false;
-  }
-
-  bssl::UniquePtr<BIGNUM> a_copy(BN_new());
-  bssl::UniquePtr<BIGNUM> ret(BN_new());
-  if (!ret || !a_copy ||
-      !BN_mod_mul(ret.get(), a.get(), a.get(), m.get(), ctx) ||
-      !ExpectBIGNUMsEqual(t, "A * A (mod M)", mod_square.get(), ret.get()) ||
-      // Repeat the operation with |a_copy|.
-      !BN_copy(a_copy.get(), a.get()) ||
-      !BN_mod_mul(ret.get(), a.get(), a_copy.get(), m.get(), ctx) ||
-      !ExpectBIGNUMsEqual(t, "A * A_copy (mod M)", mod_square.get(),
-                          ret.get())) {
-    return false;
-  }
-
-  if (BN_is_odd(m.get())) {
-    // Reduce |a| and test the Montgomery version.
-    bssl::UniquePtr<BN_MONT_CTX> mont(BN_MONT_CTX_new());
-    bssl::UniquePtr<BIGNUM> a_tmp(BN_new());
-    if (!mont || !a_tmp ||
-        !BN_MONT_CTX_set(mont.get(), m.get(), ctx) ||
-        !BN_nnmod(a_tmp.get(), a.get(), m.get(), ctx) ||
-        !BN_to_montgomery(a_tmp.get(), a_tmp.get(), mont.get(), ctx) ||
-        !BN_mod_mul_montgomery(ret.get(), a_tmp.get(), a_tmp.get(), mont.get(),
-                               ctx) ||
-        !BN_from_montgomery(ret.get(), ret.get(), mont.get(), ctx) ||
-        !ExpectBIGNUMsEqual(t, "A * A (mod M) (Montgomery)",
-                            mod_square.get(), ret.get()) ||
-        // Repeat the operation with |a_copy|.
-        !BN_copy(a_copy.get(), a_tmp.get()) ||
-        !BN_mod_mul_montgomery(ret.get(), a_tmp.get(), a_copy.get(), mont.get(),
-                               ctx) ||
-        !BN_from_montgomery(ret.get(), ret.get(), mont.get(), ctx) ||
-        !ExpectBIGNUMsEqual(t, "A * A_copy (mod M) (Montgomery)",
-                            mod_square.get(), ret.get())) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
 static bool TestModExp(FileTest *t, BN_CTX *ctx) {
  bssl::UniquePtr<BIGNUM> a = GetBIGNUM(t, "A");
  bssl::UniquePtr<BIGNUM> e = GetBIGNUM(t, "E");
@@ -616,25 +568,21 @@ static bool TestModSqrt(FileTest *t, BN_CTX *ctx) {
  bssl::UniquePtr<BIGNUM> a = GetBIGNUM(t, "A");
  bssl::UniquePtr<BIGNUM> p = GetBIGNUM(t, "P");
  bssl::UniquePtr<BIGNUM> mod_sqrt = GetBIGNUM(t, "ModSqrt");
-  bssl::UniquePtr<BIGNUM> mod_sqrt2(BN_new());
-  if (!a || !p || !mod_sqrt || !mod_sqrt2 ||
-      // There are two possible answers.
-      !BN_sub(mod_sqrt2.get(), p.get(), mod_sqrt.get())) {
+  if (!a || !p || !mod_sqrt) {
    return false;
  }

-  // -0 is 0, not P.
-  if (BN_is_zero(mod_sqrt.get())) {
-    BN_zero(mod_sqrt2.get());
-  }
-
  bssl::UniquePtr<BIGNUM> ret(BN_new());
+  bssl::UniquePtr<BIGNUM> ret2(BN_new());
  if (!ret ||
-      !BN_mod_sqrt(ret.get(), a.get(), p.get(), ctx)) {
+      !ret2 ||
+      !BN_mod_sqrt(ret.get(), a.get(), p.get(), ctx) ||
+      // There are two possible answers.
+      !BN_sub(ret2.get(), p.get(), ret.get())) {
    return false;
  }

-  if (BN_cmp(ret.get(), mod_sqrt2.get()) != 0 &&
+  if (BN_cmp(ret2.get(), mod_sqrt.get()) != 0 &&
      !ExpectBIGNUMsEqual(t, "sqrt(A) (mod P)", mod_sqrt.get(), ret.get())) {
    return false;
  }
@@ -642,29 +590,6 @@ static bool TestModSqrt(FileTest *t, BN_CTX *ctx) {
  return true;
 }

-static bool TestNotModSquare(FileTest *t, BN_CTX *ctx) {
-  bssl::UniquePtr<BIGNUM> not_mod_square = GetBIGNUM(t, "NotModSquare");
-  bssl::UniquePtr<BIGNUM> p = GetBIGNUM(t, "P");
-  bssl::UniquePtr<BIGNUM> ret(BN_new());
-  if (!not_mod_square || !p || !ret) {
-    return false;
-  }
-
-  if (BN_mod_sqrt(ret.get(), not_mod_square.get(), p.get(), ctx)) {
-    t->PrintLine("BN_mod_sqrt unexpectedly succeeded.");
-    return false;
-  }
-
-  uint32_t err = ERR_peek_error();
-  if (ERR_GET_LIB(err) == ERR_LIB_BN &&
-      ERR_GET_REASON(err) == BN_R_NOT_A_SQUARE) {
-    ERR_clear_error();
-    return true;
-  }
-
-  return false;
-}
-
 static bool TestModInv(FileTest *t, BN_CTX *ctx) {
  bssl::UniquePtr<BIGNUM> a = GetBIGNUM(t, "A");
  bssl::UniquePtr<BIGNUM> m = GetBIGNUM(t, "M");
@@ -680,6 +605,15 @@ static bool TestModInv(FileTest *t, BN_CTX *ctx) {
    return false;
  }

+  BN_set_flags(a.get(), BN_FLG_CONSTTIME);
+
+  if (!ret ||
+      !BN_mod_inverse(ret.get(), a.get(), m.get(), ctx) ||
+      !ExpectBIGNUMsEqual(t, "inv(A) (mod M) (constant-time)", mod_inv.get(),
+                          ret.get())) {
+    return false;
+  }
+
  return true;
 }

@@ -697,11 +631,9 @@ static const Test kTests[] = {
    {"Product", TestProduct},
    {"Quotient", TestQuotient},
    {"ModMul", TestModMul},
-    {"ModSquare", TestModSquare},
    {"ModExp", TestModExp},
    {"Exp", TestExp},
    {"ModSqrt", TestModSqrt},
-    {"NotModSquare", TestNotModSquare},
    {"ModInv", TestModInv},
 };

@@ -720,7 +652,7 @@ static bool RunTest(FileTest *t, void *arg) {
 static bool TestBN2BinPadded(BN_CTX *ctx) {
  uint8_t zeros[256], out[256], reference[128];

-  OPENSSL_memset(zeros, 0, sizeof(zeros));
+  memset(zeros, 0, sizeof(zeros));

  // Test edge case at 0.
  bssl::UniquePtr<BIGNUM> n(BN_new());
@@ -729,13 +661,13 @@ static bool TestBN2BinPadded(BN_CTX *ctx) {
            "BN_bn2bin_padded failed to encode 0 in an empty buffer.\n");
    return false;
  }
-  OPENSSL_memset(out, -1, sizeof(out));
+  memset(out, -1, sizeof(out));
  if (!BN_bn2bin_padded(out, sizeof(out), n.get())) {
    fprintf(stderr,
            "BN_bn2bin_padded failed to encode 0 in a non-empty buffer.\n");
    return false;
  }
-  if (OPENSSL_memcmp(zeros, out, sizeof(out))) {
+  if (memcmp(zeros, out, sizeof(out))) {
    fprintf(stderr, "BN_bn2bin_padded did not zero buffer.\n");
    return false;
  }
@@ -764,21 +696,20 @@ static bool TestBN2BinPadded(BN_CTX *ctx) {
    }
    // Exactly right size should encode.
    if (!BN_bn2bin_padded(out, bytes, n.get()) ||
-        OPENSSL_memcmp(out, reference, bytes) != 0) {
+        memcmp(out, reference, bytes) != 0) {
      fprintf(stderr, "BN_bn2bin_padded gave a bad result.\n");
      return false;
    }
    // Pad up one byte extra.
    if (!BN_bn2bin_padded(out, bytes + 1, n.get()) ||
-        OPENSSL_memcmp(out + 1, reference, bytes) ||
-        OPENSSL_memcmp(out, zeros, 1)) {
+        memcmp(out + 1, reference, bytes) || memcmp(out, zeros, 1)) {
      fprintf(stderr, "BN_bn2bin_padded gave a bad result.\n");
      return false;
    }
    // Pad up to 256.
    if (!BN_bn2bin_padded(out, sizeof(out), n.get()) ||
-        OPENSSL_memcmp(out + sizeof(out) - bytes, reference, bytes) ||
-        OPENSSL_memcmp(out, zeros, sizeof(out) - bytes)) {
+        memcmp(out + sizeof(out) - bytes, reference, bytes) ||
+        memcmp(out, zeros, sizeof(out) - bytes)) {
      fprintf(stderr, "BN_bn2bin_padded gave a bad result.\n");
      return false;
    }
@@ -787,82 +718,6 @@ static bool TestBN2BinPadded(BN_CTX *ctx) {
  return true;
 }

-static bool TestLittleEndian() {
-  bssl::UniquePtr<BIGNUM> x(BN_new());
-  bssl::UniquePtr<BIGNUM> y(BN_new());
-  if (!x || !y) {
-    fprintf(stderr, "BN_new failed to malloc.\n");
-    return false;
-  }
-
-  // Test edge case at 0. Fill |out| with garbage to ensure |BN_bn2le_padded|
-  // wrote the result.
-  uint8_t out[256], zeros[256];
-  OPENSSL_memset(out, -1, sizeof(out));
-  OPENSSL_memset(zeros, 0, sizeof(zeros));
-  if (!BN_bn2le_padded(out, sizeof(out), x.get()) ||
-      OPENSSL_memcmp(zeros, out, sizeof(out))) {
-    fprintf(stderr, "BN_bn2le_padded failed to encode 0.\n");
-    return false;
-  }
-
-  if (!BN_le2bn(out, sizeof(out), y.get()) ||
-      BN_cmp(x.get(), y.get()) != 0) {
-    fprintf(stderr, "BN_le2bn failed to decode 0 correctly.\n");
-    return false;
-  }
-
-  // Test random numbers at various byte lengths.
-  for (size_t bytes = 128 - 7; bytes <= 128; bytes++) {
-    if (!BN_rand(x.get(), bytes * 8, BN_RAND_TOP_ONE, BN_RAND_BOTTOM_ANY)) {
-      ERR_print_errors_fp(stderr);
-      return false;
-    }
-
-    // Fill |out| with garbage to ensure |BN_bn2le_padded| wrote the result.
-    OPENSSL_memset(out, -1, sizeof(out));
-    if (!BN_bn2le_padded(out, sizeof(out), x.get())) {
-      fprintf(stderr, "BN_bn2le_padded failed to encode random value.\n");
-      return false;
-    }
-
-    // Compute the expected value by reversing the big-endian output.
-    uint8_t expected[sizeof(out)];
-    if (!BN_bn2bin_padded(expected, sizeof(expected), x.get())) {
-      return false;
-    }
-    for (size_t i = 0; i < sizeof(expected) / 2; i++) {
-      uint8_t tmp = expected[i];
-      expected[i] = expected[sizeof(expected) - 1 - i];
-      expected[sizeof(expected) - 1 - i] = tmp;
-    }
-
-    if (OPENSSL_memcmp(expected, out, sizeof(out))) {
-      fprintf(stderr, "BN_bn2le_padded failed to encode value correctly.\n");
-      hexdump(stderr, "Expected: ", expected, sizeof(expected));
-      hexdump(stderr, "Got:      ", out, sizeof(out));
-      return false;
-    }
-
-    // Make sure the decoding produces the same BIGNUM.
-    if (!BN_le2bn(out, bytes, y.get()) ||
-        BN_cmp(x.get(), y.get()) != 0) {
-      bssl::UniquePtr<char> x_hex(BN_bn2hex(x.get())),
-          y_hex(BN_bn2hex(y.get()));
-      if (!x_hex || !y_hex) {
-        return false;
-      }
-      fprintf(stderr, "BN_le2bn failed to decode value correctly.\n");
-      fprintf(stderr, "Expected: %s\n", x_hex.get());
-      hexdump(stderr, "Encoding: ", out, bytes);
-      fprintf(stderr, "Got:      %s\n", y_hex.get());
-      return false;
-    }
-  }
-
-  return true;
-}
-
 static int DecimalToBIGNUM(bssl::UniquePtr<BIGNUM> *out, const char *in) {
  BIGNUM *raw = NULL;
  int ret = BN_dec2bn(&raw, in);
@@ -1039,7 +894,7 @@ static bool TestMPI() {
    }

    if (mpi_len != test.mpi_len ||
-        OPENSSL_memcmp(test.mpi, scratch, mpi_len) != 0) {
+        memcmp(test.mpi, scratch, mpi_len) != 0) {
      fprintf(stderr, "MPI test #%u failed:\n", (unsigned)i);
      hexdump(stderr, "Expected: ", test.mpi, test.mpi_len);
      hexdump(stderr, "Got:      ", scratch, mpi_len);
@@ -1105,49 +960,6 @@ static bool TestRand() {
  return true;
 }

-static bool TestRandRange() {
-  bssl::UniquePtr<BIGNUM> bn(BN_new()), six(BN_new());
-  if (!bn || !six ||
-      !BN_set_word(six.get(), 6)) {
-    return false;
-  }
-
-  // Generate 1,000 random numbers and ensure they all stay in range. This check
-  // may flakily pass when it should have failed but will not flakily fail.
-  bool seen[6] = {false, false, false, false, false};
-  for (unsigned i = 0; i < 1000; i++) {
-    if (!BN_rand_range_ex(bn.get(), 1, six.get())) {
-      return false;
-    }
-
-    BN_ULONG word = BN_get_word(bn.get());
-    if (BN_is_negative(bn.get()) ||
-        word < 1 ||
-        word >= 6) {
-      fprintf(stderr,
-              "BN_rand_range_ex generated invalid value: " BN_DEC_FMT1 "\n",
-              word);
-      return false;
-    }
-
-    seen[word] = true;
-  }
-
-  // Test that all numbers were accounted for. Note this test is probabilistic
-  // and may flakily fail when it should have passed. As an upper-bound on the
-  // failure probability, we'll never see any one number with probability
-  // (4/5)^1000, so the probability of failure is at most 5*(4/5)^1000. This is
-  // around 1 in 2^320.
-  for (unsigned i = 1; i < 6; i++) {
-    if (!seen[i]) {
-      fprintf(stderr, "BN_rand_range failed to generate %u.\n", i);
-      return false;
-    }
-  }
-
-  return true;
-}
-
 struct ASN1Test {
  const char *value_ascii;
  const char *der;
@@ -1222,8 +1034,7 @@ static bool TestASN1() {
    }
    bssl::UniquePtr<uint8_t> delete_der(der);
    if (der_len != test.der_len ||
-        OPENSSL_memcmp(der, reinterpret_cast<const uint8_t *>(test.der),
-                       der_len) != 0) {
+        memcmp(der, reinterpret_cast<const uint8_t*>(test.der), der_len) != 0) {
      fprintf(stderr, "Bad serialization.\n");
      return false;
    }
@@ -1334,35 +1145,42 @@ static bool TestNegativeZero(BN_CTX *ctx) {
    return false;
  }

-  bssl::UniquePtr<BIGNUM> numerator(BN_new()), denominator(BN_new());
-  if (!numerator || !denominator) {
-    return false;
-  }
+  for (int consttime = 0; consttime < 2; consttime++) {
+    bssl::UniquePtr<BIGNUM> numerator(BN_new()), denominator(BN_new());
+    if (!numerator || !denominator) {
+      return false;
+    }

-  // Test that BN_div never gives negative zero in the quotient.
-  if (!BN_set_word(numerator.get(), 1) ||
-      !BN_set_word(denominator.get(), 2)) {
-    return false;
-  }
-  BN_set_negative(numerator.get(), 1);
-  if (!BN_div(a.get(), b.get(), numerator.get(), denominator.get(), ctx)) {
-    return false;
-  }
-  if (!BN_is_zero(a.get()) || BN_is_negative(a.get())) {
-    fprintf(stderr, "Incorrect quotient.\n");
-    return false;
-  }
+    if (consttime) {
+      BN_set_flags(numerator.get(), BN_FLG_CONSTTIME);
+      BN_set_flags(denominator.get(), BN_FLG_CONSTTIME);
+    }

-  // Test that BN_div never gives negative zero in the remainder.
-  if (!BN_set_word(denominator.get(), 1)) {
-    return false;
-  }
-  if (!BN_div(a.get(), b.get(), numerator.get(), denominator.get(), ctx)) {
-    return false;
-  }
-  if (!BN_is_zero(b.get()) || BN_is_negative(b.get())) {
-    fprintf(stderr, "Incorrect remainder.\n");
-    return false;
+    // Test that BN_div never gives negative zero in the quotient.
+    if (!BN_set_word(numerator.get(), 1) ||
+        !BN_set_word(denominator.get(), 2)) {
+      return false;
+    }
+    BN_set_negative(numerator.get(), 1);
+    if (!BN_div(a.get(), b.get(), numerator.get(), denominator.get(), ctx)) {
+      return false;
+    }
+    if (!BN_is_zero(a.get()) || BN_is_negative(a.get())) {
+      fprintf(stderr, "Incorrect quotient (consttime = %d).\n", consttime);
+      return false;
+    }
+
+    // Test that BN_div never gives negative zero in the remainder.
+    if (!BN_set_word(denominator.get(), 1)) {
+      return false;
+    }
+    if (!BN_div(a.get(), b.get(), numerator.get(), denominator.get(), ctx)) {
+      return false;
+    }
+    if (!BN_is_zero(b.get()) || BN_is_negative(b.get())) {
+      fprintf(stderr, "Incorrect remainder (consttime = %d).\n", consttime);
+      return false;
+    }
  }

  // Test that BN_set_negative will not produce a negative zero.
@@ -1385,37 +1203,6 @@ static bool TestNegativeZero(BN_CTX *ctx) {
    return false;
  }

-  // Test that |BN_rshift| and |BN_rshift1| will not produce a negative zero.
-  if (!BN_set_word(a.get(), 1)) {
-    return false;
-  }
-
-  BN_set_negative(a.get(), 1);
-  if (!BN_rshift(b.get(), a.get(), 1) ||
-      !BN_rshift1(c.get(), a.get())) {
-    return false;
-  }
-
-  if (!BN_is_zero(b.get()) || BN_is_negative(b.get())) {
-    fprintf(stderr, "BN_rshift(-1, 1) produced the wrong result.\n");
-    return false;
-  }
-
-  if (!BN_is_zero(c.get()) || BN_is_negative(c.get())) {
-    fprintf(stderr, "BN_rshift1(-1) produced the wrong result.\n");
-    return false;
-  }
-
-  // Test that |BN_div_word| will not produce a negative zero.
-  if (BN_div_word(a.get(), 2) == (BN_ULONG)-1) {
-    return false;
-  }
-
-  if (!BN_is_zero(a.get()) || BN_is_negative(a.get())) {
-    fprintf(stderr, "BN_div_word(-1, 2) produced the wrong result.\n");
-    return false;
-  }
-
  return true;
 }

@@ -1654,7 +1441,7 @@ static bool TestBN2Dec() {
  return true;
 }

-static bool TestBNSetGetU64() {
+static bool TestBNSetU64() {
  static const struct {
    const char *hex;
    uint64_t value;
@@ -1676,388 +1463,6 @@ static bool TestBNSetGetU64() {
      ERR_print_errors_fp(stderr);
      return false;
    }
-
-    uint64_t tmp;
-    if (!BN_get_u64(bn.get(), &tmp) || tmp != test.value) {
-      fprintf(stderr, "BN_get_u64 test failed for 0x%s.\n", test.hex);
-      return false;
-    }
-
-    BN_set_negative(bn.get(), 1);
-    if (!BN_get_u64(bn.get(), &tmp) || tmp != test.value) {
-      fprintf(stderr, "BN_get_u64 test failed for -0x%s.\n", test.hex);
-      return false;
-    }
-  }
-
-  // Test that BN_get_u64 fails on large numbers.
-  bssl::UniquePtr<BIGNUM> bn(BN_new());
-  if (!BN_lshift(bn.get(), BN_value_one(), 64)) {
-    return false;
-  }
-
-  uint64_t tmp;
-  if (BN_get_u64(bn.get(), &tmp)) {
-    fprintf(stderr, "BN_get_u64 of 2^64 unexpectedly succeeded.\n");
-    return false;
-  }
-
-  BN_set_negative(bn.get(), 1);
-  if (BN_get_u64(bn.get(), &tmp)) {
-    fprintf(stderr, "BN_get_u64 of -2^64 unexpectedly succeeded.\n");
-    return false;
-  }
-
-  return true;
-}
-
-static bool TestBNPow2(BN_CTX *ctx) {
-  bssl::UniquePtr<BIGNUM>
-      power_of_two(BN_new()),
-      random(BN_new()),
-      expected(BN_new()),
-      actual(BN_new());
-
-  if (!power_of_two.get() ||
-      !random.get() ||
-      !expected.get() ||
-      !actual.get()) {
-    return false;
-  }
-
-  // Choose an exponent.
-  for (size_t e = 3; e < 512; e += 11) {
-    // Choose a bit length for our randoms.
-    for (int len = 3; len < 512; len += 23) {
-      // Set power_of_two = 2^e.
-      if (!BN_lshift(power_of_two.get(), BN_value_one(), (int) e)) {
-        fprintf(stderr, "Failed to shiftl.\n");
-        return false;
-      }
-
-      // Test BN_is_pow2 on power_of_two.
-      if (!BN_is_pow2(power_of_two.get())) {
-        fprintf(stderr, "BN_is_pow2 returned false for a power of two.\n");
-        hexdump(stderr, "Arg: ", power_of_two->d,
-                power_of_two->top * sizeof(BN_ULONG));
-        return false;
-      }
-
-      // Pick a large random value, ensuring it isn't a power of two.
-      if (!BN_rand(random.get(), len, BN_RAND_TOP_TWO, BN_RAND_BOTTOM_ANY)) {
-        fprintf(stderr, "Failed to generate random in TestBNPow2.\n");
-        return false;
-      }
-
-      // Test BN_is_pow2 on |r|.
-      if (BN_is_pow2(random.get())) {
-        fprintf(stderr, "BN_is_pow2 returned true for a non-power of two.\n");
-        hexdump(stderr, "Arg: ", random->d, random->top * sizeof(BN_ULONG));
-        return false;
-      }
-
-      // Test BN_mod_pow2 on |r|.
-      if (!BN_mod(expected.get(), random.get(), power_of_two.get(), ctx) ||
-          !BN_mod_pow2(actual.get(), random.get(), e) ||
-          BN_cmp(actual.get(), expected.get())) {
-        fprintf(stderr, "BN_mod_pow2 returned the wrong value:\n");
-        hexdump(stderr, "Expected: ", expected->d,
-                expected->top * sizeof(BN_ULONG));
-        hexdump(stderr, "Got:      ", actual->d,
-                actual->top * sizeof(BN_ULONG));
-        return false;
-      }
-
-      // Test BN_nnmod_pow2 on |r|.
-      if (!BN_nnmod(expected.get(), random.get(), power_of_two.get(), ctx) ||
-          !BN_nnmod_pow2(actual.get(), random.get(), e) ||
-          BN_cmp(actual.get(), expected.get())) {
-        fprintf(stderr, "BN_nnmod_pow2 failed on positive input:\n");
-        hexdump(stderr, "Expected: ", expected->d,
-                expected->top * sizeof(BN_ULONG));
-        hexdump(stderr, "Got:      ", actual->d,
-                actual->top * sizeof(BN_ULONG));
-        return false;
-      }
-
-      // Test BN_nnmod_pow2 on -|r|.
-      BN_set_negative(random.get(), 1);
-      if (!BN_nnmod(expected.get(), random.get(), power_of_two.get(), ctx) ||
-          !BN_nnmod_pow2(actual.get(), random.get(), e) ||
-          BN_cmp(actual.get(), expected.get())) {
-        fprintf(stderr, "BN_nnmod_pow2 failed on negative input:\n");
-        hexdump(stderr, "Expected: ", expected->d,
-                expected->top * sizeof(BN_ULONG));
-        hexdump(stderr, "Got:      ", actual->d,
-                actual->top * sizeof(BN_ULONG));
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-static const int kPrimes[] = {
-    2,     3,     5,     7,     11,    13,    17,    19,    23,    29,    31,
-    37,    41,    43,    47,    53,    59,    61,    67,    71,    73,    79,
-    83,    89,    97,    101,   103,   107,   109,   113,   127,   131,   137,
-    139,   149,   151,   157,   163,   167,   173,   179,   181,   191,   193,
-    197,   199,   211,   223,   227,   229,   233,   239,   241,   251,   257,
-    263,   269,   271,   277,   281,   283,   293,   307,   311,   313,   317,
-    331,   337,   347,   349,   353,   359,   367,   373,   379,   383,   389,
-    397,   401,   409,   419,   421,   431,   433,   439,   443,   449,   457,
-    461,   463,   467,   479,   487,   491,   499,   503,   509,   521,   523,
-    541,   547,   557,   563,   569,   571,   577,   587,   593,   599,   601,
-    607,   613,   617,   619,   631,   641,   643,   647,   653,   659,   661,
-    673,   677,   683,   691,   701,   709,   719,   727,   733,   739,   743,
-    751,   757,   761,   769,   773,   787,   797,   809,   811,   821,   823,
-    827,   829,   839,   853,   857,   859,   863,   877,   881,   883,   887,
-    907,   911,   919,   929,   937,   941,   947,   953,   967,   971,   977,
-    983,   991,   997,   1009,  1013,  1019,  1021,  1031,  1033,  1039,  1049,
-    1051,  1061,  1063,  1069,  1087,  1091,  1093,  1097,  1103,  1109,  1117,
-    1123,  1129,  1151,  1153,  1163,  1171,  1181,  1187,  1193,  1201,  1213,
-    1217,  1223,  1229,  1231,  1237,  1249,  1259,  1277,  1279,  1283,  1289,
-    1291,  1297,  1301,  1303,  1307,  1319,  1321,  1327,  1361,  1367,  1373,
-    1381,  1399,  1409,  1423,  1427,  1429,  1433,  1439,  1447,  1451,  1453,
-    1459,  1471,  1481,  1483,  1487,  1489,  1493,  1499,  1511,  1523,  1531,
-    1543,  1549,  1553,  1559,  1567,  1571,  1579,  1583,  1597,  1601,  1607,
-    1609,  1613,  1619,  1621,  1627,  1637,  1657,  1663,  1667,  1669,  1693,
-    1697,  1699,  1709,  1721,  1723,  1733,  1741,  1747,  1753,  1759,  1777,
-    1783,  1787,  1789,  1801,  1811,  1823,  1831,  1847,  1861,  1867,  1871,
-    1873,  1877,  1879,  1889,  1901,  1907,  1913,  1931,  1933,  1949,  1951,
-    1973,  1979,  1987,  1993,  1997,  1999,  2003,  2011,  2017,  2027,  2029,
-    2039,  2053,  2063,  2069,  2081,  2083,  2087,  2089,  2099,  2111,  2113,
-    2129,  2131,  2137,  2141,  2143,  2153,  2161,  2179,  2203,  2207,  2213,
-    2221,  2237,  2239,  2243,  2251,  2267,  2269,  2273,  2281,  2287,  2293,
-    2297,  2309,  2311,  2333,  2339,  2341,  2347,  2351,  2357,  2371,  2377,
-    2381,  2383,  2389,  2393,  2399,  2411,  2417,  2423,  2437,  2441,  2447,
-    2459,  2467,  2473,  2477,  2503,  2521,  2531,  2539,  2543,  2549,  2551,
-    2557,  2579,  2591,  2593,  2609,  2617,  2621,  2633,  2647,  2657,  2659,
-    2663,  2671,  2677,  2683,  2687,  2689,  2693,  2699,  2707,  2711,  2713,
-    2719,  2729,  2731,  2741,  2749,  2753,  2767,  2777,  2789,  2791,  2797,
-    2801,  2803,  2819,  2833,  2837,  2843,  2851,  2857,  2861,  2879,  2887,
-    2897,  2903,  2909,  2917,  2927,  2939,  2953,  2957,  2963,  2969,  2971,
-    2999,  3001,  3011,  3019,  3023,  3037,  3041,  3049,  3061,  3067,  3079,
-    3083,  3089,  3109,  3119,  3121,  3137,  3163,  3167,  3169,  3181,  3187,
-    3191,  3203,  3209,  3217,  3221,  3229,  3251,  3253,  3257,  3259,  3271,
-    3299,  3301,  3307,  3313,  3319,  3323,  3329,  3331,  3343,  3347,  3359,
-    3361,  3371,  3373,  3389,  3391,  3407,  3413,  3433,  3449,  3457,  3461,
-    3463,  3467,  3469,  3491,  3499,  3511,  3517,  3527,  3529,  3533,  3539,
-    3541,  3547,  3557,  3559,  3571,  3581,  3583,  3593,  3607,  3613,  3617,
-    3623,  3631,  3637,  3643,  3659,  3671,  3673,  3677,  3691,  3697,  3701,
-    3709,  3719,  3727,  3733,  3739,  3761,  3767,  3769,  3779,  3793,  3797,
-    3803,  3821,  3823,  3833,  3847,  3851,  3853,  3863,  3877,  3881,  3889,
-    3907,  3911,  3917,  3919,  3923,  3929,  3931,  3943,  3947,  3967,  3989,
-    4001,  4003,  4007,  4013,  4019,  4021,  4027,  4049,  4051,  4057,  4073,
-    4079,  4091,  4093,  4099,  4111,  4127,  4129,  4133,  4139,  4153,  4157,
-    4159,  4177,  4201,  4211,  4217,  4219,  4229,  4231,  4241,  4243,  4253,
-    4259,  4261,  4271,  4273,  4283,  4289,  4297,  4327,  4337,  4339,  4349,
-    4357,  4363,  4373,  4391,  4397,  4409,  4421,  4423,  4441,  4447,  4451,
-    4457,  4463,  4481,  4483,  4493,  4507,  4513,  4517,  4519,  4523,  4547,
-    4549,  4561,  4567,  4583,  4591,  4597,  4603,  4621,  4637,  4639,  4643,
-    4649,  4651,  4657,  4663,  4673,  4679,  4691,  4703,  4721,  4723,  4729,
-    4733,  4751,  4759,  4783,  4787,  4789,  4793,  4799,  4801,  4813,  4817,
-    4831,  4861,  4871,  4877,  4889,  4903,  4909,  4919,  4931,  4933,  4937,
-    4943,  4951,  4957,  4967,  4969,  4973,  4987,  4993,  4999,  5003,  5009,
-    5011,  5021,  5023,  5039,  5051,  5059,  5077,  5081,  5087,  5099,  5101,
-    5107,  5113,  5119,  5147,  5153,  5167,  5171,  5179,  5189,  5197,  5209,
-    5227,  5231,  5233,  5237,  5261,  5273,  5279,  5281,  5297,  5303,  5309,
-    5323,  5333,  5347,  5351,  5381,  5387,  5393,  5399,  5407,  5413,  5417,
-    5419,  5431,  5437,  5441,  5443,  5449,  5471,  5477,  5479,  5483,  5501,
-    5503,  5507,  5519,  5521,  5527,  5531,  5557,  5563,  5569,  5573,  5581,
-    5591,  5623,  5639,  5641,  5647,  5651,  5653,  5657,  5659,  5669,  5683,
-    5689,  5693,  5701,  5711,  5717,  5737,  5741,  5743,  5749,  5779,  5783,
-    5791,  5801,  5807,  5813,  5821,  5827,  5839,  5843,  5849,  5851,  5857,
-    5861,  5867,  5869,  5879,  5881,  5897,  5903,  5923,  5927,  5939,  5953,
-    5981,  5987,  6007,  6011,  6029,  6037,  6043,  6047,  6053,  6067,  6073,
-    6079,  6089,  6091,  6101,  6113,  6121,  6131,  6133,  6143,  6151,  6163,
-    6173,  6197,  6199,  6203,  6211,  6217,  6221,  6229,  6247,  6257,  6263,
-    6269,  6271,  6277,  6287,  6299,  6301,  6311,  6317,  6323,  6329,  6337,
-    6343,  6353,  6359,  6361,  6367,  6373,  6379,  6389,  6397,  6421,  6427,
-    6449,  6451,  6469,  6473,  6481,  6491,  6521,  6529,  6547,  6551,  6553,
-    6563,  6569,  6571,  6577,  6581,  6599,  6607,  6619,  6637,  6653,  6659,
-    6661,  6673,  6679,  6689,  6691,  6701,  6703,  6709,  6719,  6733,  6737,
-    6761,  6763,  6779,  6781,  6791,  6793,  6803,  6823,  6827,  6829,  6833,
-    6841,  6857,  6863,  6869,  6871,  6883,  6899,  6907,  6911,  6917,  6947,
-    6949,  6959,  6961,  6967,  6971,  6977,  6983,  6991,  6997,  7001,  7013,
-    7019,  7027,  7039,  7043,  7057,  7069,  7079,  7103,  7109,  7121,  7127,
-    7129,  7151,  7159,  7177,  7187,  7193,  7207,  7211,  7213,  7219,  7229,
-    7237,  7243,  7247,  7253,  7283,  7297,  7307,  7309,  7321,  7331,  7333,
-    7349,  7351,  7369,  7393,  7411,  7417,  7433,  7451,  7457,  7459,  7477,
-    7481,  7487,  7489,  7499,  7507,  7517,  7523,  7529,  7537,  7541,  7547,
-    7549,  7559,  7561,  7573,  7577,  7583,  7589,  7591,  7603,  7607,  7621,
-    7639,  7643,  7649,  7669,  7673,  7681,  7687,  7691,  7699,  7703,  7717,
-    7723,  7727,  7741,  7753,  7757,  7759,  7789,  7793,  7817,  7823,  7829,
-    7841,  7853,  7867,  7873,  7877,  7879,  7883,  7901,  7907,  7919,  7927,
-    7933,  7937,  7949,  7951,  7963,  7993,  8009,  8011,  8017,  8039,  8053,
-    8059,  8069,  8081,  8087,  8089,  8093,  8101,  8111,  8117,  8123,  8147,
-    8161,  8167,  8171,  8179,  8191,  8209,  8219,  8221,  8231,  8233,  8237,
-    8243,  8263,  8269,  8273,  8287,  8291,  8293,  8297,  8311,  8317,  8329,
-    8353,  8363,  8369,  8377,  8387,  8389,  8419,  8423,  8429,  8431,  8443,
-    8447,  8461,  8467,  8501,  8513,  8521,  8527,  8537,  8539,  8543,  8563,
-    8573,  8581,  8597,  8599,  8609,  8623,  8627,  8629,  8641,  8647,  8663,
-    8669,  8677,  8681,  8689,  8693,  8699,  8707,  8713,  8719,  8731,  8737,
-    8741,  8747,  8753,  8761,  8779,  8783,  8803,  8807,  8819,  8821,  8831,
-    8837,  8839,  8849,  8861,  8863,  8867,  8887,  8893,  8923,  8929,  8933,
-    8941,  8951,  8963,  8969,  8971,  8999,  9001,  9007,  9011,  9013,  9029,
-    9041,  9043,  9049,  9059,  9067,  9091,  9103,  9109,  9127,  9133,  9137,
-    9151,  9157,  9161,  9173,  9181,  9187,  9199,  9203,  9209,  9221,  9227,
-    9239,  9241,  9257,  9277,  9281,  9283,  9293,  9311,  9319,  9323,  9337,
-    9341,  9343,  9349,  9371,  9377,  9391,  9397,  9403,  9413,  9419,  9421,
-    9431,  9433,  9437,  9439,  9461,  9463,  9467,  9473,  9479,  9491,  9497,
-    9511,  9521,  9533,  9539,  9547,  9551,  9587,  9601,  9613,  9619,  9623,
-    9629,  9631,  9643,  9649,  9661,  9677,  9679,  9689,  9697,  9719,  9721,
-    9733,  9739,  9743,  9749,  9767,  9769,  9781,  9787,  9791,  9803,  9811,
-    9817,  9829,  9833,  9839,  9851,  9857,  9859,  9871,  9883,  9887,  9901,
-    9907,  9923,  9929,  9931,  9941,  9949,  9967,  9973,  10007, 10009, 10037,
-    10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, 10103, 10111, 10133,
-    10139, 10141, 10151, 10159, 10163, 10169, 10177, 10181, 10193, 10211, 10223,
-    10243, 10247, 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303, 10313,
-    10321, 10331, 10333, 10337, 10343, 10357, 10369, 10391, 10399, 10427, 10429,
-    10433, 10453, 10457, 10459, 10463, 10477, 10487, 10499, 10501, 10513, 10529,
-    10531, 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627, 10631, 10639,
-    10651, 10657, 10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733,
-    10739, 10753, 10771, 10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
-    10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937, 10939, 10949, 10957,
-    10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059, 11069, 11071,
-    11083, 11087, 11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, 11171,
-    11173, 11177, 11197, 11213, 11239, 11243, 11251, 11257, 11261, 11273, 11279,
-    11287, 11299, 11311, 11317, 11321, 11329, 11351, 11353, 11369, 11383, 11393,
-    11399, 11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, 11489, 11491,
-    11497, 11503, 11519, 11527, 11549, 11551, 11579, 11587, 11593, 11597, 11617,
-    11621, 11633, 11657, 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
-    11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831,
-    11833, 11839, 11863, 11867, 11887, 11897, 11903, 11909, 11923, 11927, 11933,
-    11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011, 12037,
-    12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109, 12113, 12119,
-    12143, 12149, 12157, 12161, 12163, 12197, 12203, 12211, 12227, 12239, 12241,
-    12251, 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323, 12329, 12343,
-    12347, 12373, 12377, 12379, 12391, 12401, 12409, 12413, 12421, 12433, 12437,
-    12451, 12457, 12473, 12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
-    12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589, 12601, 12611, 12613,
-    12619, 12637, 12641, 12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713,
-    12721, 12739, 12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823,
-    12829, 12841, 12853, 12889, 12893, 12899, 12907, 12911, 12917, 12919, 12923,
-    12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007, 13009,
-    13033, 13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, 13121, 13127,
-    13147, 13151, 13159, 13163, 13171, 13177, 13183, 13187, 13217, 13219, 13229,
-    13241, 13249, 13259, 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
-    13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, 13441, 13451, 13457,
-    13463, 13469, 13477, 13487, 13499, 13513, 13523, 13537, 13553, 13567, 13577,
-    13591, 13597, 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681, 13687,
-    13691, 13693, 13697, 13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759,
-    13763, 13781, 13789, 13799, 13807, 13829, 13831, 13841, 13859, 13873, 13877,
-    13879, 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933, 13963, 13967,
-    13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081, 14083,
-    14087, 14107, 14143, 14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
-    14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323, 14327, 14341, 14347,
-    14369, 14387, 14389, 14401, 14407, 14411, 14419, 14423, 14431, 14437, 14447,
-    14449, 14461, 14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, 14551,
-    14557, 14561, 14563, 14591, 14593, 14621, 14627, 14629, 14633, 14639, 14653,
-    14657, 14669, 14683, 14699, 14713, 14717, 14723, 14731, 14737, 14741, 14747,
-    14753, 14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831,
-    14843, 14851, 14867, 14869, 14879, 14887, 14891, 14897, 14923, 14929, 14939,
-    14947, 14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
-    15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149, 15161,
-    15173, 15187, 15193, 15199, 15217, 15227, 15233, 15241, 15259, 15263, 15269,
-    15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331, 15349,
-    15359, 15361, 15373, 15377, 15383, 15391, 15401, 15413, 15427, 15439, 15443,
-    15451, 15461, 15467, 15473, 15493, 15497, 15511, 15527, 15541, 15551, 15559,
-    15569, 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643, 15647, 15649,
-    15661, 15667, 15671, 15679, 15683, 15727, 15731, 15733, 15737, 15739, 15749,
-    15761, 15767, 15773, 15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
-    15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919, 15923, 15937, 15959,
-    15971, 15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069,
-    16073, 16087, 16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, 16187,
-    16189, 16193, 16217, 16223, 16229, 16231, 16249, 16253, 16267, 16273, 16301,
-    16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381, 16411, 16417, 16421,
-    16427, 16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, 16519, 16529,
-    16547, 16553, 16561, 16567, 16573, 16603, 16607, 16619, 16631, 16633, 16649,
-    16651, 16657, 16661, 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
-    16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, 16871, 16879, 16883,
-    16889, 16901, 16903, 16921, 16927, 16931, 16937, 16943, 16963, 16979, 16981,
-    16987, 16993, 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053, 17077,
-    17093, 17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191,
-    17203, 17207, 17209, 17231, 17239, 17257, 17291, 17293, 17299, 17317, 17321,
-    17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389, 17393, 17401,
-    17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477, 17483, 17489, 17491,
-    17497, 17509, 17519, 17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
-    17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683, 17707, 17713, 17729,
-    17737, 17747, 17749, 17761, 17783, 17789, 17791, 17807, 17827, 17837, 17839,
-    17851, 17863, 17881, 17891, 17903, 17909, 17911, 17921, 17923, 17929, 17939,
-    17957, 17959, 17971, 17977, 17981, 17987, 17989, 18013, 18041, 18043, 18047,
-    18049, 18059, 18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133,
-    18143, 18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233,
-    18251, 18253, 18257, 18269, 18287, 18289, 18301, 18307, 18311, 18313, 18329,
-    18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427, 18433, 18439,
-    18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517, 18521, 18523, 18539,
-    18541, 18553, 18583, 18587, 18593, 18617, 18637, 18661, 18671, 18679, 18691,
-    18701, 18713, 18719, 18731, 18743, 18749, 18757, 18773, 18787, 18793, 18797,
-    18803, 18839, 18859, 18869, 18899, 18911, 18913, 18917, 18919, 18947, 18959,
-    18973, 18979, 19001, 19009, 19013, 19031, 19037, 19051, 19069, 19073, 19079,
-    19081, 19087, 19121, 19139, 19141, 19157, 19163, 19181, 19183, 19207, 19211,
-    19213, 19219, 19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309,
-    19319, 19333, 19373, 19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423,
-    19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469, 19471, 19477, 19483,
-    19489, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, 19577, 19583,
-    19597, 19603, 19609, 19661, 19681, 19687, 19697, 19699, 19709, 19717, 19727,
-    19739, 19751, 19753, 19759, 19763, 19777, 19793, 19801, 19813, 19819, 19841,
-    19843, 19853, 19861, 19867, 19889, 19891, 19913, 19919, 19927, 19937, 19949,
-    19961, 19963, 19973, 19979, 19991, 19993, 19997,
-};
-
-static bool TestPrimeChecking(BN_CTX *ctx) {
-  bssl::UniquePtr<BIGNUM> p(BN_new());
-  int is_probably_prime_1 = 0, is_probably_prime_2 = 0;
-
-  const int max_prime = kPrimes[OPENSSL_ARRAY_SIZE(kPrimes)-1];
-  size_t next_prime_index = 0;
-
-  for (int i = 0; i <= max_prime; i++) {
-    bool is_prime = false;
-
-    if (i == kPrimes[next_prime_index]) {
-      is_prime = true;
-      next_prime_index++;
-    }
-
-    if (!BN_set_word(p.get(), i) ||
-        !BN_primality_test(&is_probably_prime_1, p.get(), BN_prime_checks, ctx,
-                           false /* do_trial_division */,
-                           nullptr /* callback */) ||
-        is_probably_prime_1 != (is_prime ? 1 : 0) ||
-        !BN_primality_test(&is_probably_prime_2, p.get(), BN_prime_checks, ctx,
-                           true /* do_trial_division */,
-                           nullptr /* callback */) ||
-        is_probably_prime_2 != (is_prime ? 1 : 0)) {
-      fprintf(stderr,
-              "TestPrimeChecking failed for %d (is_prime: %d vs %d without "
-              "trial division vs %d with it)\n",
-              i, static_cast<int>(is_prime), is_probably_prime_1,
-              is_probably_prime_2);
-      return false;
-    }
-  }
-
-  // Negative numbers are not prime.
-  if (!BN_set_word(p.get(), 7)) {
-    return false;
-  }
-  BN_set_negative(p.get(), 1);
-  if (!BN_primality_test(&is_probably_prime_1, p.get(), BN_prime_checks, ctx,
-                         false /* do_trial_division */,
-                         nullptr /* callback */) ||
-      is_probably_prime_1 != 0 ||
-      !BN_primality_test(&is_probably_prime_2, p.get(), BN_prime_checks, ctx,
-                         true /* do_trial_division */,
-                         nullptr /* callback */) ||
-      is_probably_prime_2 != 0) {
-    fprintf(stderr,
-            "TestPrimeChecking failed for -7 (is_prime: 0 vs %d without "
-            "trial division vs %d with it)\n",
-            is_probably_prime_1, is_probably_prime_2);
-    return false;
  }

  return true;
@@ -2080,10 +1485,8 @@ int main(int argc, char *argv[]) {
      !TestDec2BN(ctx.get()) ||
      !TestHex2BN(ctx.get()) ||
      !TestASC2BN(ctx.get()) ||
-      !TestLittleEndian() ||
      !TestMPI() ||
      !TestRand() ||
-      !TestRandRange() ||
      !TestASN1() ||
      !TestNegativeZero(ctx.get()) ||
      !TestBadModulus(ctx.get()) ||
@@ -2091,10 +1494,7 @@ int main(int argc, char *argv[]) {
      !TestSmallPrime(ctx.get()) ||
      !TestCmpWord() ||
      !TestBN2Dec() ||
-      !TestBNSetGetU64() ||
-      !TestBNPow2(ctx.get()) ||
-      !TestPrimeChecking(ctx.get())) {
-    ERR_print_errors_fp(stderr);
+      !TestBNSetU64()) {
    return 1;
  }

@@ -9888,16 +9888,6 @@ B = 7878787878787878787878787878787878787878787878787878787878787878787878787878
 M = d78af684e71db0c39cff4e64fb9db567132cb9c50cc98009feb820b26f2ded9b91b9b5e2b83ae0ae4eb4e0523ca726bfbe969b89fd754f674ce99118c3f2d1c5d81fdc7c54e02b60262b241d53c040e99e45826eca37a804668e690e1afc1ca42c9a15d84d4954425f0b7642fc0bd9d7b24e2618d2dcc9b729d944badacfddaf


-# ModSquare tests.
-#
-# These test vectors satisfy A * A = ModSquare (mod M) and 0 <= ModSquare < M.
-
-# Regression test for CVE-2017-3732.
-ModSquare = fffffffdfffffd01000009000002f6fffdf403000312000402f3fff5f602fe080a0005fdfafffa00010001ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00000002000002fefffff7fffffd07000109fdfffef3fffdfd06000405ff00fdfbfffe00010001
-A = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00000000000000ffffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffff00000000
-M = ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff00000000000000ffffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffff
-
-
 # ModExp tests.
 #
 # These test vectors satisfy A ^ E = ModExp (mod M) and 0 <= ModExp < M.
@@ -10397,89 +10387,6 @@ E = d7e6df5d755284929b986cd9b61c9c2c8843f24c711fbdbae1a468edcae15940094372557072
 M = e4e784aa1fa88625a43ba0185a153a929663920be7fe674a4d33c943d3b898cff051482e7050a070cede53be5e89f31515772c7aea637576f99f82708f89d9e244f6ad3a24a02cbe5c0ff7bcf2dad5491f53db7c3f2698a7c41b44f086652f17bb05fe4c5c0a92433c34086b49d7e1825b28bab6c5a9bd0bc95b53d659afa0d7


-# RSAZ 512-bit.
-#
-# These are regression tests for code which historically reached the RSAZ-512
-# code. That has since been removed, but the test vectors remain. Note that the
-# lengths of the inputs, especially the *bit* length of |M|, matter a lot.
-
-# Control: No relationship between A and M except that A < M and they're the same number of limbs.
-ModExp = 7f34c1cd63377bc3abf2bb5b2d1bf5f06454e1e8040fe19a72245ce9731cbee1bf9e84532300776c8021ed4f3a8de508d85b4cf320bd82065a013754857b50c4
-A = 8e4e67da6ff890643d0599387955996ef6f0c2045eb9944576ddb965ca64cdb6247727ce128ef178d4a84e5a56d2e67eb0fe389ecbf691f9244ae80f4c11b364
-E =  be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1
-M = f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491
-
-# Same as above except A is negative.
-ModExp = 71fa6a4c8ae75368eda8cc6282c26afa69e2af12a97fb9444f16b7dd6c99e0a5d6034cab4248cae4357346b211039f4a2bc4c5a20a297372094162417af703cd
-A = -8e4e67da6ff890643d0599387955996ef6f0c2045eb9944576ddb965ca64cdb6247727ce128ef178d4a84e5a56d2e67eb0fe389ecbf691f9244ae80f4c11b364
-E =   be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1
-M =  f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491
-
-# A == M - 1 == -1 (mod M) and the exponent is odd so A ^ E (mod M) == A.
-ModExp = f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725490
-A = f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725490
-E =  be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1
-M = f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491
-
-# Same inputs as above except A is negative. Note that A mod M with a "correct top" isn't the right length for RSAZ.
-ModExp = 1
-A = -f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725490
-E =   be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1
-M =  f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491
-
-# A == M, so A == 0 (mod M) so A ^ E (mod M) == 0. Note that A mod M with a "correct top" isn't the right length for RSAZ.
-ModExp = 0
-A = f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491
-E =  be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1
-M = f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491
-
-# A is negative, and A (mod M) is the right length for RSAZ.
-ModExp = 8d76eb0f8c7bc3160cc8bb0e0c3590fbed26c5932f5f525b48045c0bd46dda287ba5483f97c851fb7c12c2e858ee7a4a4d1af745cbfb3eb311fa54bea12cde25
-A = -80000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
-E =   be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1
-M =  f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491
-
-
-# RSAZ 1024-bit.
-# Note that the lengths of the inputs, especially the *bit* length of |M|, matter a lot.
-
-# Control: No relationship between A and M except that A < M and they're the same number of limbs.
-ModExp = 8984f8c16044f9c0ad7bd72347af90f58e6e003acda92b76e3c7c4a56ea8e918409d8e9b34884d4c89d0b17cb40fe898f2627c084a0f1698e46beccbf6f48eecc281e11ea9e5135adba460ddae157f2c655b5f589ce29b254d43a960a71cede8a08dbb86be4dac22458da232fb1ec2470856827302ed772c9ddafa408c931aa7
-A = 21158da5fe20356825e72b3f5384ec57720d22f727b27ce2f945c8ee311db781add73bf8fae96b775c909bd22fca75c44c2b0584284a5bb1c07f8eefcd6b0a44047a02b185df34f897f11d4fb9a86c9eb841b4cb8d0383441fdc5af3ef385b5e8380f605d73ed41bb42eb2c2a5704d6034b3ad058dafffce83dbbfb6295daaf8
-E = ecdebd112b3b5788669449dcddbd479a203ee9ab72a9bb9c406b97623513bf0ab9a22f1f23634d269e16bfd6d3b64202b71fc355057411967b6ac70f8d9cef0a4e06819a9a18cc06bbe438243fa9759303d98be8a65dc1cb13595ee9b99f138554425d50f6fbc025d8ffa3eaea828d6f3b82a3584146bafde34da257995f0575
-M = ff3a3e023db3bba929ca4ededbace13d0d1264387b5ef62734e177eaf47a78af56b58aacc8ac5d46f5b066bafb95d93d4442bb948653613eec76837b4ffb7991cb080b6c8b403fb09bc817d026e283ee47ab2fc9af274b12f626eda2fe02004a8e27b9ed7d3b614e8955c7e7c2c0700edd079455237c4475fbd41857e206e4b7
-
-# Same as above except A is negative.
-ModExp = 75b54540dd6ec1e87c4e77bb93fd50477ea463fdadb5cab05119b34585d18f971617fc1194240ffa6bdfb53e4785f0a451e03f8c3c444aa6080a96af5906eaa508862a4de15b2c55c023b6f278cd04c1e24fd0711244afeda8e3444256e51261ed99fe66beedb52c43c825b4c7a1adc7d4b111e2208ecd495df91e175573ca10
-A = -21158da5fe20356825e72b3f5384ec57720d22f727b27ce2f945c8ee311db781add73bf8fae96b775c909bd22fca75c44c2b0584284a5bb1c07f8eefcd6b0a44047a02b185df34f897f11d4fb9a86c9eb841b4cb8d0383441fdc5af3ef385b5e8380f605d73ed41bb42eb2c2a5704d6034b3ad058dafffce83dbbfb6295daaf8
-E = ecdebd112b3b5788669449dcddbd479a203ee9ab72a9bb9c406b97623513bf0ab9a22f1f23634d269e16bfd6d3b64202b71fc355057411967b6ac70f8d9cef0a4e06819a9a18cc06bbe438243fa9759303d98be8a65dc1cb13595ee9b99f138554425d50f6fbc025d8ffa3eaea828d6f3b82a3584146bafde34da257995f0575
-M = ff3a3e023db3bba929ca4ededbace13d0d1264387b5ef62734e177eaf47a78af56b58aacc8ac5d46f5b066bafb95d93d4442bb948653613eec76837b4ffb7991cb080b6c8b403fb09bc817d026e283ee47ab2fc9af274b12f626eda2fe02004a8e27b9ed7d3b614e8955c7e7c2c0700edd079455237c4475fbd41857e206e4b7
-
-# A == M - 1 == -1 (mod M) and the exponent is odd so A ^ E (mod M) == A.
-ModExp = b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d964
-A =  b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d964
-E = 61803d4973ae68cfb2ba6770dbed70d36760fa42c01a16d1482eacf0d01adf7a917bc86ece58a73b920295c1291b90f49167ef856ecad149330e1fd49ec71392fb62d47270b53e6d4f3c8f044b80a5736753364896932abc6d872c4c5e135d1edb200597a93ceb262ff6c99079177cd10808b9ed20c8cd7352d80ac7f6963103
-M =  b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d965
-
-# Same inputs as above except A is negative. Note that A mod M with a "correct top" isn't the right length for RSAZ.
-ModExp = 1
-A =  -b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d964
-E = 61803d4973ae68cfb2ba6770dbed70d36760fa42c01a16d1482eacf0d01adf7a917bc86ece58a73b920295c1291b90f49167ef856ecad149330e1fd49ec71392fb62d47270b53e6d4f3c8f044b80a5736753364896932abc6d872c4c5e135d1edb200597a93ceb262ff6c99079177cd10808b9ed20c8cd7352d80ac7f6963103
-M =  b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d965
-
-# A == M, so A == 0 (mod M) so A ^ E (mod M) == 0. Note that A mod M with a "correct top" isn't the right length for RSAZ.
-ModExp = 0
-A =  b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d965
-E = 61803d4973ae68cfb2ba6770dbed70d36760fa42c01a16d1482eacf0d01adf7a917bc86ece58a73b920295c1291b90f49167ef856ecad149330e1fd49ec71392fb62d47270b53e6d4f3c8f044b80a5736753364896932abc6d872c4c5e135d1edb200597a93ceb262ff6c99079177cd10808b9ed20c8cd7352d80ac7f6963103
-M =  b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d965
-
-# A is negative, and A (mod M) is the right length for RSAZ.
-ModExp = 9cf810b9e89d5cbc4b79ae64e123ea06d92965e2bab077df97a1b906dc2e1ddcf96a9c4ed14e2cd96309b829ea9cc2a74a7d4b43c5f34d792a7c583201427754b8f78b783608070a84b61f18913e3ced7f7f530972de7764667c54e29d756eea38a93cd1703c676a4587231b0ebfeadddf908e2877a7a84b5bfc370ecf0d158d
-A =  -8000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
-E = 61803d4973ae68cfb2ba6770dbed70d36760fa42c01a16d1482eacf0d01adf7a917bc86ece58a73b920295c1291b90f49167ef856ecad149330e1fd49ec71392fb62d47270b53e6d4f3c8f044b80a5736753364896932abc6d872c4c5e135d1edb200597a93ceb262ff6c99079177cd10808b9ed20c8cd7352d80ac7f6963103
-M =  b5d257b2c50b050d42f0852eff5cfa2571157c500cd0bd9aa0b2ccdd89c531c9609d520eb81d928fb52b06da25dc713561aa0bd365ee56db9e62ac6787a85936990f44438363560f7af9e0c16f378e5b83f658252390d849401817624da97ec613a1b855fd901847352f434a777e4e32af0cb4033c7547fb6437d067fcd3d965
-
-
 # Exp tests.
 #
 # These test vectors satisfy A ^ E = Exp.
@@ -10830,28 +10737,6 @@ ModSqrt = a1d52989f12f204d3d2167d9b1e6c8a6174c0c786a979a5952383b7b8bd186
 A = 2eee37cf06228a387788188e650bc6d8a2ff402931443f69156a29155eca07dcb45f3aac238d92943c0c25c896098716baa433f25bd696a142f5a69d5d937e81
 P = 9df9d6cc20b8540411af4e5357ef2b0353cb1f2ab5ffc3e246b41c32f71e951f

-
-# NotModSquare tests.
-#
-# These test vectors are such that NotModSquare is not a square modulo P.
-
-NotModSquare = 03
-P = 07
-
-NotModSquare = 05
-P = 07
-
-NotModSquare = 06
-P = 07
-
-NotModSquare = 9df9d6cc20b8540411af4e5357ef2b0353cb1f2ab5ffc3e246b41c32f71e951e
-P = 9df9d6cc20b8540411af4e5357ef2b0353cb1f2ab5ffc3e246b41c32f71e951f
-
-
-# ModInv tests.
-#
-# These test vectors satisfy ModInv * A = 1 (mod M) and 0 <= ModInv < M.
-
 ModInv = 00
 A = 00
 M = 01
@@ -212,20 +212,6 @@ int BN_is_odd(const BIGNUM *bn) {
  return bn->top > 0 && (bn->d[0] & 1) == 1;
 }

-int BN_is_pow2(const BIGNUM *bn) {
-  if (bn->top == 0 || bn->neg) {
-    return 0;
-  }
-
-  for (int i = 0; i < bn->top - 1; i++) {
-    if (bn->d[i] != 0) {
-      return 0;
-    }
-  }
-
-  return 0 == (bn->d[bn->top-1] & (bn->d[bn->top-1] - 1));
-}
-
 int BN_equal_consttime(const BIGNUM *a, const BIGNUM *b) {
  if (a->top != b->top) {
    return 0;
@@ -60,14 +60,138 @@
 #include <ctype.h>
 #include <limits.h>
 #include <stdio.h>
+#include <string.h>

 #include <openssl/bio.h>
 #include <openssl/bytestring.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../fipsmodule/bn/internal.h"
+#include "internal.h"

+BIGNUM *BN_bin2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
+  size_t num_words;
+  unsigned m;
+  BN_ULONG word = 0;
+  BIGNUM *bn = NULL;
+
+  if (ret == NULL) {
+    ret = bn = BN_new();
+  }
+
+  if (ret == NULL) {
+    return NULL;
+  }
+
+  if (len == 0) {
+    ret->top = 0;
+    return ret;
+  }
+
+  num_words = ((len - 1) / BN_BYTES) + 1;
+  m = (len - 1) % BN_BYTES;
+  if (bn_wexpand(ret, num_words) == NULL) {
+    if (bn) {
+      BN_free(bn);
+    }
+    return NULL;
+  }
+
+  /* |bn_wexpand| must check bounds on |num_words| to write it into
+   * |ret->dmax|. */
+  assert(num_words <= INT_MAX);
+  ret->top = (int)num_words;
+  ret->neg = 0;
+
+  while (len--) {
+    word = (word << 8) | *(in++);
+    if (m-- == 0) {
+      ret->d[--num_words] = word;
+      word = 0;
+      m = BN_BYTES - 1;
+    }
+  }
+
+  /* need to call this due to clear byte at top if avoiding having the top bit
+   * set (-ve number) */
+  bn_correct_top(ret);
+  return ret;
+}
+
+size_t BN_bn2bin(const BIGNUM *in, uint8_t *out) {
+  size_t n, i;
+  BN_ULONG l;
+
+  n = i = BN_num_bytes(in);
+  while (i--) {
+    l = in->d[i / BN_BYTES];
+    *(out++) = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
+  }
+  return n;
+}
+
+/* constant_time_select_ulong returns |x| if |v| is 1 and |y| if |v| is 0. Its
+ * behavior is undefined if |v| takes any other value. */
+static BN_ULONG constant_time_select_ulong(int v, BN_ULONG x, BN_ULONG y) {
+  BN_ULONG mask = v;
+  mask--;
+
+  return (~mask & x) | (mask & y);
+}
+
+/* constant_time_le_size_t returns 1 if |x| <= |y| and 0 otherwise. |x| and |y|
+ * must not have their MSBs set. */
+static int constant_time_le_size_t(size_t x, size_t y) {
+  return ((x - y - 1) >> (sizeof(size_t) * 8 - 1)) & 1;
+}
+
+/* read_word_padded returns the |i|'th word of |in|, if it is not out of
+ * bounds. Otherwise, it returns 0. It does so without branches on the size of
+ * |in|, however it necessarily does not have the same memory access pattern. If
+ * the access would be out of bounds, it reads the last word of |in|. |in| must
+ * not be zero. */
+static BN_ULONG read_word_padded(const BIGNUM *in, size_t i) {
+  /* Read |in->d[i]| if valid. Otherwise, read the last word. */
+  BN_ULONG l = in->d[constant_time_select_ulong(
+      constant_time_le_size_t(in->dmax, i), in->dmax - 1, i)];
+
+  /* Clamp to zero if above |d->top|. */
+  return constant_time_select_ulong(constant_time_le_size_t(in->top, i), 0, l);
+}
+
+int BN_bn2bin_padded(uint8_t *out, size_t len, const BIGNUM *in) {
+  /* Special case for |in| = 0. Just branch as the probability is negligible. */
+  if (BN_is_zero(in)) {
+    memset(out, 0, len);
+    return 1;
+  }
+
+  /* Check if the integer is too big. This case can exit early in non-constant
+   * time. */
+  if ((size_t)in->top > (len + (BN_BYTES - 1)) / BN_BYTES) {
+    return 0;
+  }
+  if ((len % BN_BYTES) != 0) {
+    BN_ULONG l = read_word_padded(in, len / BN_BYTES);
+    if (l >> (8 * (len % BN_BYTES)) != 0) {
+      return 0;
+    }
+  }
+
+  /* Write the bytes out one by one. Serialization is done without branching on
+   * the bits of |in| or on |in->top|, but if the routine would otherwise read
+   * out of bounds, the memory access pattern can't be fixed. However, for an
+   * RSA key of size a multiple of the word size, the probability of BN_BYTES
+   * leading zero octets is low.
+   *
+   * See Falko Stenzke, "Manger's Attack revisited", ICICS 2010. */
+  size_t i = len;
+  while (i--) {
+    BN_ULONG l = read_word_padded(in, i / BN_BYTES);
+    *(out++) = (uint8_t)(l >> (8 * (i % BN_BYTES))) & 0xff;
+  }
+  return 1;
+}

 int BN_bn2cbb_padded(CBB *out, size_t len, const BIGNUM *in) {
  uint8_t *ptr;
@@ -117,7 +241,7 @@ static int decode_hex(BIGNUM *bn, const char *in, int in_len) {
    return 0;
  }
  /* |in_len| is the number of hex digits. */
-  if (!bn_expand(bn, in_len * 4)) {
+  if (bn_expand(bn, in_len * 4) == NULL) {
    return 0;
  }

@@ -380,6 +504,16 @@ int BN_print_fp(FILE *fp, const BIGNUM *a) {
  return ret;
 }

+BN_ULONG BN_get_word(const BIGNUM *bn) {
+  switch (bn->top) {
+    case 0:
+      return 0;
+    case 1:
+      return bn->d[0];
+    default:
+      return BN_MASK2;
+  }
+}

 size_t BN_bn2mpi(const BIGNUM *in, uint8_t *out) {
  const size_t bits = BN_num_bits(in);
@@ -398,7 +532,7 @@ size_t BN_bn2mpi(const BIGNUM *in, uint8_t *out) {
    /* If we cannot represent the number then we emit zero as the interface
     * doesn't allow an error to be signalled. */
    if (out) {
-      OPENSSL_memset(out, 0, 4);
+      memset(out, 0, 4);
    }
    return 4;
  }
@@ -59,8 +59,6 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>

-#include "../../internal.h"
-

 /* How many bignums are in each "pool item"; */
 #define BN_CTX_POOL_SIZE 16
@@ -220,7 +218,7 @@ static int BN_STACK_push(BN_STACK *st, unsigned int idx) {
      return 0;
    }
    if (st->depth) {
-      OPENSSL_memcpy(newitems, st->indexes, st->depth * sizeof(unsigned int));
+      memcpy(newitems, st->indexes, st->depth * sizeof(unsigned int));
    }
    OPENSSL_free(st->indexes);
    st->indexes = newitems;
@@ -242,8 +240,13 @@ static void BN_POOL_init(BN_POOL *p) {

 static void BN_POOL_finish(BN_POOL *p) {
  while (p->head) {
-    for (size_t i = 0; i < BN_CTX_POOL_SIZE; i++) {
-      BN_clear_free(&p->head->vals[i]);
+    unsigned int loop = 0;
+    BIGNUM *bn = p->head->vals;
+    while (loop++ < BN_CTX_POOL_SIZE) {
+      if (bn->d) {
+        BN_clear_free(bn);
+      }
+      bn++;
    }

    p->current = p->head->next;
@@ -254,14 +257,17 @@ static void BN_POOL_finish(BN_POOL *p) {

 static BIGNUM *BN_POOL_get(BN_POOL *p) {
  if (p->used == p->size) {
+    BIGNUM *bn;
+    unsigned int loop = 0;
    BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM));
    if (!item) {
      return NULL;
    }

    /* Initialise the structure */
-    for (size_t i = 0; i < BN_CTX_POOL_SIZE; i++) {
-      BN_init(&item->vals[i]);
+    bn = item->vals;
+    while (loop++ < BN_CTX_POOL_SIZE) {
+      BN_init(bn++);
    }

    item->prev = p->tail;
@@ -58,7 +58,6 @@

 #include <assert.h>
 #include <limits.h>
-
 #include <openssl/err.h>

 #include "internal.h"
@@ -183,12 +182,7 @@ static inline void bn_div_rem_words(BN_ULONG *quotient_out, BN_ULONG *rem_out,
 * Thus:
 *     dv->neg == num->neg ^ divisor->neg  (unless the result is zero)
 *     rm->neg == num->neg                 (unless the remainder is zero)
- * If 'dv' or 'rm' is NULL, the respective value is not returned.
- *
- * This was specifically designed to contain fewer branches that may leak
- * sensitive information; see "New Branch Prediction Vulnerabilities in OpenSSL
- * and Necessary Software Countermeasures" by Onur Acıçmez, Shay Gueron, and
- * Jean-Pierre Seifert. */
+ * If 'dv' or 'rm' is NULL, the respective value is not returned. */
 int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
           BN_CTX *ctx) {
  int norm_shift, i, loop;
@@ -196,6 +190,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
  BN_ULONG *resp, *wnump;
  BN_ULONG d0, d1;
  int num_n, div_n;
+  int no_branch = 0;

  /* Invalid zero-padding would have particularly bad consequences
   * so don't just rely on bn_check_top() here */
@@ -205,11 +200,28 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
    return 0;
  }

+  if ((num->flags & BN_FLG_CONSTTIME) != 0 ||
+      (divisor->flags & BN_FLG_CONSTTIME) != 0) {
+    no_branch = 1;
+  }
+
  if (BN_is_zero(divisor)) {
    OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
    return 0;
  }

+  if (!no_branch && BN_ucmp(num, divisor) < 0) {
+    if (rm != NULL) {
+      if (BN_copy(rm, num) == NULL) {
+        return 0;
+      }
+    }
+    if (dv != NULL) {
+      BN_zero(dv);
+    }
+    return 1;
+  }
+
  BN_CTX_start(ctx);
  tmp = BN_CTX_get(ctx);
  snum = BN_CTX_get(ctx);
@@ -235,23 +247,26 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
  }
  snum->neg = 0;

-  /* Since we don't want to have special-case logic for the case where snum is
-   * larger than sdiv, we pad snum with enough zeroes without changing its
-   * value. */
-  if (snum->top <= sdiv->top + 1) {
-    if (!bn_wexpand(snum, sdiv->top + 2)) {
-      goto err;
+  if (no_branch) {
+    /* Since we don't know whether snum is larger than sdiv,
+     * we pad snum with enough zeroes without changing its
+     * value.
+     */
+    if (snum->top <= sdiv->top + 1) {
+      if (bn_wexpand(snum, sdiv->top + 2) == NULL) {
+        goto err;
+      }
+      for (i = snum->top; i < sdiv->top + 2; i++) {
+        snum->d[i] = 0;
+      }
+      snum->top = sdiv->top + 2;
+    } else {
+      if (bn_wexpand(snum, snum->top + 1) == NULL) {
+        goto err;
+      }
+      snum->d[snum->top] = 0;
+      snum->top++;
    }
-    for (i = snum->top; i < sdiv->top + 2; i++) {
-      snum->d[i] = 0;
-    }
-    snum->top = sdiv->top + 2;
-  } else {
-    if (!bn_wexpand(snum, snum->top + 1)) {
-      goto err;
-    }
-    snum->d[snum->top] = 0;
-    snum->top++;
  }

  div_n = sdiv->top;
@@ -279,7 +294,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
  if (!bn_wexpand(res, (loop + 1))) {
    goto err;
  }
-  res->top = loop - 1;
+  res->top = loop - no_branch;
  resp = &(res->d[loop - 1]);

  /* space for temp */
@@ -287,6 +302,15 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
    goto err;
  }

+  if (!no_branch) {
+    if (BN_ucmp(&wnum, sdiv) >= 0) {
+      bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
+      *resp = 1;
+    } else {
+      res->top--;
+    }
+  }
+
  /* if res->top == 0 then clear the neg value otherwise decrease
   * the resp pointer */
  if (res->top == 0) {
@@ -377,7 +401,9 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
      rm->neg = neg;
    }
  }
-  bn_correct_top(res);
+  if (no_branch) {
+    bn_correct_top(res);
+  }
  BN_CTX_end(ctx);
  return 1;

@@ -602,10 +628,6 @@ BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) {
    a->top--;
  }

-  if (a->top == 0) {
-    a->neg = 0;
-  }
-
  ret >>= j;
  return ret;
 }
@@ -647,82 +669,3 @@ BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) {
  }
  return (BN_ULONG)ret;
 }
-
-int BN_mod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
-  if (e == 0 || a->top == 0) {
-    BN_zero(r);
-    return 1;
-  }
-
-  size_t num_words = 1 + ((e - 1) / BN_BITS2);
-
-  /* If |a| definitely has less than |e| bits, just BN_copy. */
-  if ((size_t) a->top < num_words) {
-    return BN_copy(r, a) != NULL;
-  }
-
-  /* Otherwise, first make sure we have enough space in |r|.
-   * Note that this will fail if num_words > INT_MAX. */
-  if (!bn_wexpand(r, num_words)) {
-    return 0;
-  }
-
-  /* Copy the content of |a| into |r|. */
-  OPENSSL_memcpy(r->d, a->d, num_words * sizeof(BN_ULONG));
-
-  /* If |e| isn't word-aligned, we have to mask off some of our bits. */
-  size_t top_word_exponent = e % (sizeof(BN_ULONG) * 8);
-  if (top_word_exponent != 0) {
-    r->d[num_words - 1] &= (((BN_ULONG) 1) << top_word_exponent) - 1;
-  }
-
-  /* Fill in the remaining fields of |r|. */
-  r->neg = a->neg;
-  r->top = (int) num_words;
-  bn_correct_top(r);
-  return 1;
-}
-
-int BN_nnmod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
-  if (!BN_mod_pow2(r, a, e)) {
-    return 0;
-  }
-
-  /* If the returned value was non-negative, we're done. */
-  if (BN_is_zero(r) || !r->neg) {
-    return 1;
-  }
-
-  size_t num_words = 1 + (e - 1) / BN_BITS2;
-
-  /* Expand |r| to the size of our modulus. */
-  if (!bn_wexpand(r, num_words)) {
-    return 0;
-  }
-
-  /* Clear the upper words of |r|. */
-  OPENSSL_memset(&r->d[r->top], 0, (num_words - r->top) * BN_BYTES);
-
-  /* Set parameters of |r|. */
-  r->neg = 0;
-  r->top = (int) num_words;
-
-  /* Now, invert every word. The idea here is that we want to compute 2^e-|x|,
-   * which is actually equivalent to the twos-complement representation of |x|
-   * in |e| bits, which is -x = ~x + 1. */
-  for (int i = 0; i < r->top; i++) {
-    r->d[i] = ~r->d[i];
-  }
-
-  /* If our exponent doesn't span the top word, we have to mask the rest. */
-  size_t top_word_exponent = e % BN_BITS2;
-  if (top_word_exponent != 0) {
-    r->d[r->top - 1] &= (((BN_ULONG) 1) << top_word_exponent) - 1;
-  }
-
-  /* Keep the correct_top invariant for BN_add. */
-  bn_correct_top(r);
-
-  /* Finally, add one, for the reason described above. */
-  return BN_add(r, r, BN_value_one());
-}
@@ -140,6 +140,12 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  int i, bits, ret = 0;
  BIGNUM *v, *rr;

+  if ((p->flags & BN_FLG_CONSTTIME) != 0) {
+    /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
+    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+
  BN_CTX_start(ctx);
  if (r == a || r == p) {
    rr = BN_CTX_get(ctx);
@@ -431,6 +437,12 @@ static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  BIGNUM *val[TABLE_SIZE];
  BN_RECP_CTX recp;

+  if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
+    /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
+    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+
  bits = BN_num_bits(p);

  if (bits == 0) {
@@ -581,6 +593,10 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  BIGNUM *val[TABLE_SIZE];
  BN_MONT_CTX *new_mont = NULL;

+  if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
+    return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, mont);
+  }
+
  if (!BN_is_odd(m)) {
    OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
    return 0;
@@ -651,7 +667,7 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,

  j = m->top; /* borrow j */
  if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
-    if (!bn_wexpand(r, j)) {
+    if (bn_wexpand(r, j) == NULL) {
      goto err;
    }
    /* 2^(top*BN_BITS2) - m */
@@ -760,7 +776,7 @@ static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  const int width = 1 << window;
  volatile BN_ULONG *table = (volatile BN_ULONG *)buf;

-  if (!bn_wexpand(b, top)) {
+  if (bn_wexpand(b, top) == NULL) {
    return 0;
  }

@@ -860,7 +876,6 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  int powerbufLen = 0;
  unsigned char *powerbuf = NULL;
  BIGNUM tmp, am;
-  BIGNUM *new_a = NULL;

  if (!BN_is_odd(m)) {
    OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
@@ -888,22 +903,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    mont = new_mont;
  }

-  if (a->neg || BN_ucmp(a, m) >= 0) {
-    new_a = BN_new();
-    if (new_a == NULL ||
-        !BN_nnmod(new_a, a, m, ctx)) {
-      goto err;
-    }
-    a = new_a;
-  }
-
 #ifdef RSAZ_ENABLED
  /* If the size of the operands allow it, perform the optimized
   * RSAZ exponentiation. For further information see
   * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
  if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
      rsaz_avx2_eligible()) {
-    if (!bn_wexpand(rr, 16)) {
+    if (NULL == bn_wexpand(rr, 16)) {
      goto err;
    }
    RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
@@ -912,6 +918,16 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    bn_correct_top(rr);
    ret = 1;
    goto err;
+  } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
+    if (NULL == bn_wexpand(rr, 8)) {
+      goto err;
+    }
+    RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
+    rr->top = 8;
+    rr->neg = 0;
+    bn_correct_top(rr);
+    ret = 1;
+    goto err;
  }
 #endif

@@ -945,7 +961,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  }

  powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
-  OPENSSL_memset(powerbuf, 0, powerbufLen);
+  memset(powerbuf, 0, powerbufLen);

 #ifdef alloca
  if (powerbufLen < 3072) {
@@ -975,9 +991,12 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  }

  /* prepare a^1 in Montgomery domain */
-  assert(!a->neg);
-  assert(BN_ucmp(a, m) < 0);
-  if (!BN_to_montgomery(&am, a, mont, ctx)) {
+  if (a->neg || BN_ucmp(a, m) >= 0) {
+    if (!BN_nnmod(&am, a, m, ctx) ||
+        !BN_to_montgomery(&am, &am, mont, ctx)) {
+      goto err;
+    }
+  } else if (!BN_to_montgomery(&am, a, mont, ctx)) {
    goto err;
  }

@@ -1171,7 +1190,6 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,

 err:
  BN_MONT_CTX_free(new_mont);
-  BN_clear_free(new_a);
  if (powerbuf != NULL) {
    OPENSSL_cleanse(powerbuf, powerbufLen);
    OPENSSL_free(powerbufFree);
@@ -399,6 +399,10 @@ err:

 BIGNUM *BN_mod_inverse(BIGNUM *out, const BIGNUM *a, const BIGNUM *n,
                       BN_CTX *ctx) {
+  int no_inverse;
+
+  BIGNUM *a_reduced = NULL;
+
  BIGNUM *new_out = NULL;
  if (out == NULL) {
    new_out = BN_new();
@@ -410,20 +414,25 @@ BIGNUM *BN_mod_inverse(BIGNUM *out, const BIGNUM *a, const BIGNUM *n,
  }

  int ok = 0;
-  BIGNUM *a_reduced = NULL;
+
+  int no_branch =
+      (a->flags & BN_FLG_CONSTTIME) != 0 || (n->flags & BN_FLG_CONSTTIME) != 0;
+
  if (a->neg || BN_ucmp(a, n) >= 0) {
    a_reduced = BN_dup(a);
    if (a_reduced == NULL) {
      goto err;
    }
+    if (no_branch) {
+      BN_set_flags(a_reduced, BN_FLG_CONSTTIME);
+    }
    if (!BN_nnmod(a_reduced, a_reduced, n, ctx)) {
      goto err;
    }
    a = a_reduced;
  }

-  int no_inverse;
-  if (!BN_is_odd(n)) {
+  if (no_branch || !BN_is_odd(n)) {
    if (!bn_mod_inverse_general(out, &no_inverse, a, n, ctx)) {
      goto err;
    }
@@ -472,13 +481,15 @@ err:

 /* bn_mod_inverse_general is the general inversion algorithm that works for
 * both even and odd |n|. It was specifically designed to contain fewer
- * branches that may leak sensitive information; see "New Branch Prediction
+ * branches that may leak sensitive information. See "New Branch Prediction
 * Vulnerabilities in OpenSSL and Necessary Software Countermeasures" by
 * Onur Acıçmez, Shay Gueron, and Jean-Pierre Seifert. */
 static int bn_mod_inverse_general(BIGNUM *out, int *out_no_inverse,
                                  const BIGNUM *a, const BIGNUM *n,
                                  BN_CTX *ctx) {
  BIGNUM *A, *B, *X, *Y, *M, *D, *T;
+  BIGNUM local_A;
+  BIGNUM *pA;
  int ret = 0;
  int sign;

@@ -521,8 +532,14 @@ static int bn_mod_inverse_general(BIGNUM *out, int *out_no_inverse,
     *      sign*Y*a  ==  A   (mod |n|)
     */

+    /* Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked,
+     * BN_div_no_branch will be called eventually.
+     */
+    pA = &local_A;
+    BN_with_flags(pA, A, BN_FLG_CONSTTIME);
+
    /* (D, M) := (A/B, A%B) ... */
-    if (!BN_div(D, M, A, B, ctx)) {
+    if (!BN_div(D, M, pA, B, ctx)) {
      goto err;
    }

@@ -609,27 +626,3 @@ err:
  BN_CTX_end(ctx);
  return ret;
 }
-
-int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
-                         BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
-  BN_CTX_start(ctx);
-  BIGNUM *p_minus_2 = BN_CTX_get(ctx);
-  int ok = p_minus_2 != NULL &&
-           BN_copy(p_minus_2, p) &&
-           BN_sub_word(p_minus_2, 2) &&
-           BN_mod_exp_mont(out, a, p_minus_2, p, ctx, mont_p);
-  BN_CTX_end(ctx);
-  return ok;
-}
-
-int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
-                                BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
-  BN_CTX_start(ctx);
-  BIGNUM *p_minus_2 = BN_CTX_get(ctx);
-  int ok = p_minus_2 != NULL &&
-           BN_copy(p_minus_2, p) &&
-           BN_sub_word(p_minus_2, 2) &&
-           BN_mod_exp_mont_consttime(out, a, p_minus_2, p, ctx, mont_p);
-  BN_CTX_end(ctx);
-  return ok;
-}
@@ -704,12 +704,4 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
  r[7] = c2;
 }

-#undef mul_add
-#undef mul
-#undef sqr
-#undef mul_add_c
-#undef mul_add_c2
-#undef sqr_add_c
-#undef sqr_add_c2
-
 #endif
@@ -132,12 +132,16 @@ OPENSSL_MSVC_PRAGMA(warning(pop))
 #pragma intrinsic(__umulh, _umul128)
 #endif

-#include "../../internal.h"
+#include "../internal.h"

 #if defined(__cplusplus)
 extern "C" {
 #endif

+/* bn_expand acts the same as |bn_wexpand|, but takes a number of bits rather
+ * than a number of words. */
+BIGNUM *bn_expand(BIGNUM *bn, size_t bits);
+
 #if defined(OPENSSL_64_BIT)

 #if !defined(_MSC_VER)
@@ -195,19 +199,6 @@ extern "C" {
 #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
 #endif

-/* bn_correct_top decrements |bn->top| until |bn->d[top-1]| is non-zero or
- * until |top| is zero. If |bn| is zero, |bn->neg| is set to zero. */
-void bn_correct_top(BIGNUM *bn);
-
-/* bn_wexpand ensures that |bn| has at least |words| works of space without
- * altering its value. It returns one on success or zero on allocation
- * failure. */
-int bn_wexpand(BIGNUM *bn, size_t words);
-
-/* bn_expand acts the same as |bn_wexpand|, but takes a number of bits rather
- * than a number of words. */
-int bn_expand(BIGNUM *bn, size_t bits);
-
 /* bn_set_words sets |bn| to the value encoded in the |num| words in |words|,
 * least significant word first. */
 int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
@@ -237,7 +228,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
                const BN_ULONG *np, const BN_ULONG *n0, int num);

 uint64_t bn_mont_n0(const BIGNUM *n);
-int bn_mod_exp_base_2_vartime(BIGNUM *r, unsigned p, const BIGNUM *n);

 #if defined(OPENSSL_X86_64) && defined(_MSC_VER)
 #define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high)))
@@ -247,22 +237,6 @@ int bn_mod_exp_base_2_vartime(BIGNUM *r, unsigned p, const BIGNUM *n);
 #error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
 #endif

-/* bn_mod_inverse_prime sets |out| to the modular inverse of |a| modulo |p|,
- * computed with Fermat's Little Theorem. It returns one on success and zero on
- * error. If |mont_p| is NULL, one will be computed temporarily. */
-int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
-                         BN_CTX *ctx, const BN_MONT_CTX *mont_p);
-
-/* bn_mod_inverse_secret_prime behaves like |bn_mod_inverse_prime| but uses
- * |BN_mod_exp_mont_consttime| instead of |BN_mod_exp_mont| in hopes of
- * protecting the exponent. */
-int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
-                                BN_CTX *ctx, const BN_MONT_CTX *mont_p);
-
-/* bn_jacobi returns the Jacobi symbol of |a| and |b| (which is -1, 0 or 1), or
- * -2 on error. */
-int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
-

 #if defined(__cplusplus)
 }  /* extern C */
@@ -52,15 +52,17 @@

 #include <openssl/bn.h>

-#include <openssl/err.h>
-
 #include "internal.h"


 /* least significant word */
 #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])

-int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
+/* Returns -2 for errors because both -1 and 0 are valid results. */
+int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
+  int i;
+  int ret = -2;
+  BIGNUM *A, *B, *tmp;
  /* In 'tab', only odd-indexed entries are relevant:
   * For any odd BIGNUM n,
   *     tab[BN_lsw(n) & 7]
@@ -68,22 +70,9 @@ int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
   * Note that the sign of n does not matter. */
  static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};

-  /* The Jacobi symbol is only defined for odd modulus. */
-  if (!BN_is_odd(b)) {
-    OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
-    return -2;
-  }
-
-  /* Require b be positive. */
-  if (BN_is_negative(b)) {
-    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
-    return -2;
-  }
-
-  int ret = -2;
  BN_CTX_start(ctx);
-  BIGNUM *A = BN_CTX_get(ctx);
-  BIGNUM *B = BN_CTX_get(ctx);
+  A = BN_CTX_get(ctx);
+  B = BN_CTX_get(ctx);
  if (B == NULL) {
    goto end;
  }
@@ -93,11 +82,52 @@ int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
    goto end;
  }

-  /* Adapted from logic to compute the Kronecker symbol, originally implemented
-   * according to Henri Cohen, "A Course in Computational Algebraic Number
-   * Theory" (algorithm 1.4.10). */
+  /* Kronecker symbol, imlemented according to Henri Cohen,
+   * "A Course in Computational Algebraic Number Theory"
+   * (algorithm 1.4.10). */

-  ret = 1;
+  /* Cohen's step 1: */
+
+  if (BN_is_zero(B)) {
+    ret = BN_abs_is_word(A, 1);
+    goto end;
+  }
+
+  /* Cohen's step 2: */
+
+  if (!BN_is_odd(A) && !BN_is_odd(B)) {
+    ret = 0;
+    goto end;
+  }
+
+  /* now B is non-zero */
+  i = 0;
+  while (!BN_is_bit_set(B, i)) {
+    i++;
+  }
+  if (!BN_rshift(B, B, i)) {
+    goto end;
+  }
+  if (i & 1) {
+    /* i is odd */
+    /* (thus B was even, thus A must be odd!)  */
+
+    /* set 'ret' to $(-1)^{(A^2-1)/8}$ */
+    ret = tab[BN_lsw(A) & 7];
+  } else {
+    /* i is even */
+    ret = 1;
+  }
+
+  if (B->neg) {
+    B->neg = 0;
+    if (A->neg) {
+      ret = -ret;
+    }
+  }
+
+  /* now B is positive and odd, so what remains to be done is to compute the
+   * Jacobi symbol (A/B) and multiply it by 'ret' */

  while (1) {
    /* Cohen's step 3: */
@@ -109,12 +139,11 @@ int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
    }

    /* now A is non-zero */
-    int i = 0;
+    i = 0;
    while (!BN_is_bit_set(A, i)) {
      i++;
    }
    if (!BN_rshift(A, A, i)) {
-      ret = -2;
      goto end;
    }
    if (i & 1) {
@@ -134,7 +163,7 @@ int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
      ret = -2;
      goto end;
    }
-    BIGNUM *tmp = A;
+    tmp = A;
    A = B;
    B = tmp;
    tmp->neg = 0;
@@ -108,7 +108,6 @@

 #include <openssl/bn.h>

-#include <assert.h>
 #include <string.h>

 #include <openssl/err.h>
@@ -116,7 +115,7 @@
 #include <openssl/thread.h>

 #include "internal.h"
-#include "../../internal.h"
+#include "../internal.h"


 #if !defined(OPENSSL_NO_ASM) &&                         \
@@ -125,11 +124,6 @@
 #define OPENSSL_BN_ASM_MONT
 #endif

-static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a,
-                                          const BIGNUM *b,
-                                          const BN_MONT_CTX *mont, BN_CTX *ctx);
-
-
 BN_MONT_CTX *BN_MONT_CTX_new(void) {
  BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX));

@@ -137,7 +131,7 @@ BN_MONT_CTX *BN_MONT_CTX_new(void) {
    return NULL;
  }

-  OPENSSL_memset(ret, 0, sizeof(BN_MONT_CTX));
+  memset(ret, 0, sizeof(BN_MONT_CTX));
  BN_init(&ret->RR);
  BN_init(&ret->N);

@@ -192,6 +186,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) {
    OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
    return 0;
  }
+  if (BN_get_flags(mod, BN_FLG_CONSTTIME)) {
+    BN_set_flags(&mont->N, BN_FLG_CONSTTIME);
+  }

  /* Find n0 such that n0 * N == -1 (mod r).
   *
@@ -210,13 +207,12 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) {
  /* Save RR = R**2 (mod N). R is the smallest power of 2**BN_BITS such that R
   * > mod. Even though the assembly on some 32-bit platforms works with 64-bit
   * values, using |BN_BITS2| here, rather than |BN_MONT_CTX_N0_LIMBS *
-   * BN_BITS2|, is correct because R**2 will still be a multiple of the latter
-   * as |BN_MONT_CTX_N0_LIMBS| is either one or two.
-   *
-   * XXX: This is not constant time with respect to |mont->N|, but it should
-   * be. */
+   * BN_BITS2|, is correct because because R^2 will still be a multiple of the
+   * latter as |BN_MONT_CTX_N0_LIMBS| is either one or two. */
  unsigned lgBigR = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;
-  if (!bn_mod_exp_base_2_vartime(&mont->RR, lgBigR * 2, &mont->N)) {
+  BN_zero(&mont->RR);
+  if (!BN_set_bit(&mont->RR, lgBigR * 2) ||
+      !BN_mod(&mont->RR, &mont->RR, &mont->N, ctx)) {
    return 0;
  }

@@ -273,7 +269,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r,
  }

  max = (2 * nl); /* carry is stored separately */
-  if (!bn_wexpand(r, max)) {
+  if (bn_wexpand(r, max) == NULL) {
    return 0;
  }

@@ -283,7 +279,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r,

  /* clear the top words of T */
  if (max > r->top) {
-    OPENSSL_memset(&rp[r->top], 0, (max - r->top) * sizeof(BN_ULONG));
+    memset(&rp[r->top], 0, (max - r->top) * sizeof(BN_ULONG));
  }

  r->top = max;
@@ -297,7 +293,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r,
    rp[nl] = v;
  }

-  if (!bn_wexpand(ret, nl)) {
+  if (bn_wexpand(ret, nl) == NULL) {
    return 0;
  }
  ret->top = nl;
@@ -366,43 +362,27 @@ err:

 int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
                          const BN_MONT_CTX *mont, BN_CTX *ctx) {
-#if !defined(OPENSSL_BN_ASM_MONT)
-  return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx);
-#else
-  int num = mont->N.top;
-
-  /* |bn_mul_mont| requires at least 128 bits of limbs, at least for x86. */
-  if (num < (128 / BN_BITS2) ||
-      a->top != num ||
-      b->top != num) {
-    return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx);
-  }
-
-  if (!bn_wexpand(r, num)) {
-    return 0;
-  }
-  if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
-    /* The check above ensures this won't happen. */
-    assert(0);
-    OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
-    return 0;
-  }
-  r->neg = a->neg ^ b->neg;
-  r->top = num;
-  bn_correct_top(r);
-
-  return 1;
-#endif
-}
-
-static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a,
-                                          const BIGNUM *b,
-                                          const BN_MONT_CTX *mont,
-                                          BN_CTX *ctx) {
+  BIGNUM *tmp;
  int ret = 0;

+#if defined(OPENSSL_BN_ASM_MONT)
+  int num = mont->N.top;
+
+  if (num > 1 && a->top == num && b->top == num) {
+    if (bn_wexpand(r, num) == NULL) {
+      return 0;
+    }
+    if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
+      r->neg = a->neg ^ b->neg;
+      r->top = num;
+      bn_correct_top(r);
+      return 1;
+    }
+  }
+#endif
+
  BN_CTX_start(ctx);
-  BIGNUM *tmp = BN_CTX_get(ctx);
+  tmp = BN_CTX_get(ctx);
  if (tmp == NULL) {
    goto err;
  }
@@ -17,13 +17,13 @@
 #include <assert.h>

 #include "internal.h"
-#include "../../internal.h"
+#include "../internal.h"


 static uint64_t bn_neg_inv_mod_r_u64(uint64_t n);

 OPENSSL_COMPILE_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
-                       BN_MONT_CTX_N0_LIMBS_VALUE_INVALID_2);
+                       BN_MONT_CTX_N0_LIMBS_VALUE_INVALID);
 OPENSSL_COMPILE_ASSERT(sizeof(uint64_t) ==
                       BN_MONT_CTX_N0_LIMBS * sizeof(BN_ULONG),
                       BN_MONT_CTX_N0_LIMBS_DOES_NOT_MATCH_UINT64_T);
@@ -158,50 +158,3 @@ static uint64_t bn_neg_inv_mod_r_u64(uint64_t n) {

  return v;
 }
-
-/* bn_mod_exp_base_2_vartime calculates r = 2**p (mod n). |p| must be larger
- * than log_2(n); i.e. 2**p must be larger than |n|. |n| must be positive and
- * odd. */
-int bn_mod_exp_base_2_vartime(BIGNUM *r, unsigned p, const BIGNUM *n) {
-  assert(!BN_is_zero(n));
-  assert(!BN_is_negative(n));
-  assert(BN_is_odd(n));
-
-  BN_zero(r);
-
-  unsigned n_bits = BN_num_bits(n);
-  assert(n_bits != 0);
-  if (n_bits == 1) {
-    return 1;
-  }
-
-  /* Set |r| to the smallest power of two larger than |n|. */
-  assert(p > n_bits);
-  if (!BN_set_bit(r, n_bits)) {
-    return 0;
-  }
-
-  /* Unconditionally reduce |r|. */
-  assert(BN_cmp(r, n) > 0);
-  if (!BN_usub(r, r, n)) {
-    return 0;
-  }
-  assert(BN_cmp(r, n) < 0);
-
-  for (unsigned i = n_bits; i < p; ++i) {
-    /* This is like |BN_mod_lshift1_quick| except using |BN_usub|.
-     *
-     * TODO: Replace this with the use of a constant-time variant of
-     * |BN_mod_lshift1_quick|. */
-    if (!BN_lshift1(r, r)) {
-      return 0;
-    }
-    if (BN_cmp(r, n) >= 0) {
-      if (!BN_usub(r, r, n)) {
-        return 0;
-      }
-    }
-  }
-
-  return 1;
-}
@@ -312,8 +312,7 @@ static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
  if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
    bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
    if ((dna + dnb) < 0) {
-      OPENSSL_memset(&r[2 * n2 + dna + dnb], 0,
-                     sizeof(BN_ULONG) * -(dna + dnb));
+      memset(&r[2 * n2 + dna + dnb], 0, sizeof(BN_ULONG) * -(dna + dnb));
    }
    return;
  }
@@ -359,7 +358,7 @@ static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
    if (!zero) {
      bn_mul_comba4(&(t[n2]), t, &(t[n]));
    } else {
-      OPENSSL_memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
+      memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
    }

    bn_mul_comba4(r, a, b);
@@ -369,7 +368,7 @@ static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
    if (!zero) {
      bn_mul_comba8(&(t[n2]), t, &(t[n]));
    } else {
-      OPENSSL_memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
+      memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
    }

    bn_mul_comba8(r, a, b);
@@ -379,7 +378,7 @@ static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
    if (!zero) {
      bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
    } else {
-      OPENSSL_memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
+      memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
    }
    bn_mul_recursive(r, a, b, n, 0, 0, p);
    bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
@@ -474,7 +473,7 @@ static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
    bn_mul_comba8(&(t[n2]), t, &(t[n]));
    bn_mul_comba8(r, a, b);
    bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
-    OPENSSL_memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
+    memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
  } else {
    p = &(t[n2 * 2]);
    bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
@@ -490,15 +489,14 @@ static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,

    if (j == 0) {
      bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p);
-      OPENSSL_memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2));
+      memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2));
    } else if (j > 0) {
      /* eg, n == 16, i == 8 and tn == 11 */
      bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p);
-      OPENSSL_memset(&(r[n2 + tna + tnb]), 0,
-                     sizeof(BN_ULONG) * (n2 - tna - tnb));
+      memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
    } else {
      /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
-      OPENSSL_memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
+      memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
      if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
          tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
        bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
@@ -591,7 +589,7 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
  i = al - bl;
  if (i == 0) {
    if (al == 8) {
-      if (!bn_wexpand(rr, 16)) {
+      if (bn_wexpand(rr, 16) == NULL) {
        goto err;
      }
      rr->top = 16;
@@ -619,19 +617,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
        goto err;
      }
      if (al > j || bl > j) {
-        if (!bn_wexpand(t, k * 4)) {
+        if (bn_wexpand(t, k * 4) == NULL) {
          goto err;
        }
-        if (!bn_wexpand(rr, k * 4)) {
+        if (bn_wexpand(rr, k * 4) == NULL) {
          goto err;
        }
        bn_mul_part_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
      } else {
        /* al <= j || bl <= j */
-        if (!bn_wexpand(t, k * 2)) {
+        if (bn_wexpand(t, k * 2) == NULL) {
          goto err;
        }
-        if (!bn_wexpand(rr, k * 2)) {
+        if (bn_wexpand(rr, k * 2) == NULL) {
          goto err;
        }
        bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
@@ -641,7 +639,7 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
    }
  }

-  if (!bn_wexpand(rr, top)) {
+  if (bn_wexpand(rr, top) == NULL) {
    goto err;
  }
  rr->top = top;
@@ -737,7 +735,7 @@ static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t
  if (!zero) {
    bn_sqr_recursive(&(t[n2]), t, n, p);
  } else {
-    OPENSSL_memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
+    memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
  }
  bn_sqr_recursive(r, a, n, p);
  bn_sqr_recursive(&(r[n2]), &(a[n]), n, p);
@@ -790,7 +788,7 @@ int BN_mul_word(BIGNUM *bn, BN_ULONG w) {

  ll = bn_mul_words(bn->d, bn->d, bn->top, w);
  if (ll) {
-    if (!bn_wexpand(bn, bn->top + 1)) {
+    if (bn_wexpand(bn, bn->top + 1) == NULL) {
      return 0;
    }
    bn->d[bn->top++] = ll;
@@ -819,7 +817,7 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
  }

  max = 2 * al; /* Non-zero (from above) */
-  if (!bn_wexpand(rr, max)) {
+  if (bn_wexpand(rr, max) == NULL) {
    goto err;
  }

@@ -838,12 +836,12 @@ int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
      j = 1 << (j - 1);
      k = j + j;
      if (al == j) {
-        if (!bn_wexpand(tmp, k * 2)) {
+        if (bn_wexpand(tmp, k * 2) == NULL) {
          goto err;
        }
        bn_sqr_recursive(rr->d, a->d, al, tmp->d);
      } else {
-        if (!bn_wexpand(tmp, max)) {
+        if (bn_wexpand(tmp, max) == NULL) {
          goto err;
        }
        bn_sqr_normal(rr->d, a->d, al, tmp->d);
@@ -113,10 +113,29 @@

 #include "internal.h"

+/* number of Miller-Rabin iterations for an error rate  of less than 2^-80
+ * for random 'b'-bit input, b >= 100 (taken from table 4.4 in the Handbook
+ * of Applied Cryptography [Menezes, van Oorschot, Vanstone; CRC Press 1996];
+ * original paper: Damgaard, Landrock, Pomerance: Average case error estimates
+ * for the strong probable prime test. -- Math. Comp. 61 (1993) 177-194) */
+#define BN_prime_checks_for_size(b) ((b) >= 1300 ?  2 : \
+                                (b) >=  850 ?  3 : \
+                                (b) >=  650 ?  4 : \
+                                (b) >=  550 ?  5 : \
+                                (b) >=  450 ?  6 : \
+                                (b) >=  400 ?  7 : \
+                                (b) >=  350 ?  8 : \
+                                (b) >=  300 ?  9 : \
+                                (b) >=  250 ? 12 : \
+                                (b) >=  200 ? 15 : \
+                                (b) >=  150 ? 18 : \
+                                /* b >= 100 */ 27)
+
 /* The quick sieve algorithm approach to weeding out primes is Philip
 * Zimmermann's, as implemented in PGP.  I have had a read of his comments and
 * implemented my own version. */

+/* NUMPRIMES is the number of primes that fit into a uint16_t. */
 #define NUMPRIMES 2048

 /* primes contains all the primes that fit into a uint16_t. */
@@ -310,37 +329,8 @@ static const uint16_t primes[NUMPRIMES] = {
    17851, 17863,
 };

-/* BN_prime_checks_for_size returns the number of Miller-Rabin iterations
- * necessary for a 'bits'-bit prime, in order to maintain an error rate greater
- * than the security level for an RSA prime of that many bits (calculated using
- * the FIPS SP 800-57 security level and 186-4 Section F.1; original paper:
- * Damgaard, Landrock, Pomerance: Average case error estimates for the strong
- * probable prime test. -- Math. Comp. 61 (1993) 177-194) */
-static int BN_prime_checks_for_size(int bits) {
-  if (bits >= 3747) {
-    return 3;
-  }
-  if (bits >= 1345) {
-    return 4;
-  }
-  if (bits >= 476) {
-    return 5;
-  }
-  if (bits >= 400) {
-    return 6;
-  }
-  if (bits >= 308) {
-    return 8;
-  }
-  if (bits >= 205) {
-    return 13;
-  }
-  if (bits >= 155) {
-    return 19;
-  }
-  return 28;
-}
-
+static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
+                   const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
 static int probable_prime(BIGNUM *rnd, int bits);
 static int probable_prime_dh(BIGNUM *rnd, int bits, const BIGNUM *add,
                             const BIGNUM *rem, BN_CTX *ctx);
@@ -481,201 +471,178 @@ int BN_is_prime_ex(const BIGNUM *candidate, int checks, BN_CTX *ctx, BN_GENCB *c
  return BN_is_prime_fasttest_ex(candidate, checks, ctx, 0, cb);
 }

-int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx,
+int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
                            int do_trial_division, BN_GENCB *cb) {
+  int i, j, ret = -1;
+  int k;
+  BN_CTX *ctx = NULL;
+  BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
+  BN_MONT_CTX *mont = NULL;
+  const BIGNUM *A = NULL;
+
  if (BN_cmp(a, BN_value_one()) <= 0) {
    return 0;
  }

+  if (checks == BN_prime_checks) {
+    checks = BN_prime_checks_for_size(BN_num_bits(a));
+  }
+
  /* first look for small factors */
  if (!BN_is_odd(a)) {
    /* a is even => a is prime if and only if a == 2 */
    return BN_is_word(a, 2);
  }

-  /* Enhanced Miller-Rabin does not work for three. */
-  if (BN_is_word(a, 3)) {
-    return 1;
-  }
-
  if (do_trial_division) {
-    for (int i = 1; i < NUMPRIMES; i++) {
+    for (i = 1; i < NUMPRIMES; i++) {
      BN_ULONG mod = BN_mod_word(a, primes[i]);
      if (mod == (BN_ULONG)-1) {
-        return -1;
+        goto err;
      }
      if (mod == 0) {
-        return BN_is_word(a, primes[i]);
+        return 0;
      }
    }

    if (!BN_GENCB_call(cb, 1, -1)) {
-      return -1;
+      goto err;
    }
  }

-  int ret = -1;
-  BN_CTX *ctx_allocated = NULL;
-  if (ctx == NULL) {
-    ctx_allocated = BN_CTX_new();
-    if (ctx_allocated == NULL) {
-      return -1;
-    }
-    ctx = ctx_allocated;
-  }
-
-  enum bn_primality_result_t result;
-  if (!BN_enhanced_miller_rabin_primality_test(&result, a, checks, ctx, cb)) {
+  if (ctx_passed != NULL) {
+    ctx = ctx_passed;
+  } else if ((ctx = BN_CTX_new()) == NULL) {
    goto err;
  }
-
-  ret = (result == bn_probably_prime);
-
-err:
-  BN_CTX_free(ctx_allocated);
-  return ret;
-}
-
-int BN_enhanced_miller_rabin_primality_test(
-    enum bn_primality_result_t *out_result, const BIGNUM *w, int iterations,
-    BN_CTX *ctx, BN_GENCB *cb) {
-  /* Enhanced Miller-Rabin is only valid on odd integers greater than 3. */
-  if (!BN_is_odd(w) || BN_cmp_word(w, 3) <= 0) {
-    OPENSSL_PUT_ERROR(BN, BN_R_INVALID_INPUT);
-    return 0;
-  }
-
-  if (iterations == BN_prime_checks) {
-    iterations = BN_prime_checks_for_size(BN_num_bits(w));
-  }
-
-  int ret = 0;
-  BN_MONT_CTX *mont = NULL;
-
  BN_CTX_start(ctx);

-  BIGNUM *w1 = BN_CTX_get(ctx);
-  if (w1 == NULL ||
-      !BN_copy(w1, w) ||
-      !BN_sub_word(w1, 1)) {
+  /* A := abs(a) */
+  if (a->neg) {
+    BIGNUM *t = BN_CTX_get(ctx);
+    if (t == NULL || !BN_copy(t, a)) {
+      goto err;
+    }
+    t->neg = 0;
+    A = t;
+  } else {
+    A = a;
+  }
+
+  A1 = BN_CTX_get(ctx);
+  A1_odd = BN_CTX_get(ctx);
+  check = BN_CTX_get(ctx);
+  if (check == NULL) {
    goto err;
  }

-  /* Write w1 as m*2^a (Steps 1 and 2). */
-  int a = 0;
-  while (!BN_is_bit_set(w1, a)) {
-    a++;
+  /* compute A1 := A - 1 */
+  if (!BN_copy(A1, A)) {
+    goto err;
  }
-  BIGNUM *m = BN_CTX_get(ctx);
-  if (m == NULL ||
-      !BN_rshift(m, w1, a)) {
+  if (!BN_sub_word(A1, 1)) {
+    goto err;
+  }
+  if (BN_is_zero(A1)) {
+    ret = 0;
    goto err;
  }

-  BIGNUM *b = BN_CTX_get(ctx);
-  BIGNUM *g = BN_CTX_get(ctx);
-  BIGNUM *z = BN_CTX_get(ctx);
-  BIGNUM *x = BN_CTX_get(ctx);
-  BIGNUM *x1 = BN_CTX_get(ctx);
-  if (b == NULL ||
-      g == NULL ||
-      z == NULL ||
-      x == NULL ||
-      x1 == NULL) {
+  /* write  A1  as  A1_odd * 2^k */
+  k = 1;
+  while (!BN_is_bit_set(A1, k)) {
+    k++;
+  }
+  if (!BN_rshift(A1_odd, A1, k)) {
    goto err;
  }

  /* Montgomery setup for computations mod A */
  mont = BN_MONT_CTX_new();
-  if (mont == NULL ||
-      !BN_MONT_CTX_set(mont, w, ctx)) {
+  if (mont == NULL) {
+    goto err;
+  }
+  if (!BN_MONT_CTX_set(mont, A, ctx)) {
    goto err;
  }

-  /* The following loop performs in inner iteration of the Enhanced Miller-Rabin
-   * Primality test (Step 4). */
-  for (int i = 1; i <= iterations; i++) {
-    /* Step 4.1-4.2 */
-    if (!BN_rand_range_ex(b, 2, w1)) {
+  for (i = 0; i < checks; i++) {
+    if (!BN_pseudo_rand_range(check, A1)) {
      goto err;
    }
-
-    /* Step 4.3-4.4 */
-    if (!BN_gcd(g, b, w, ctx)) {
+    if (!BN_add_word(check, 1)) {
      goto err;
    }
-    if (BN_cmp_word(g, 1) > 0) {
-      *out_result = bn_composite;
-      ret = 1;
+    /* now 1 <= check < A */
+
+    j = witness(check, A, A1, A1_odd, k, ctx, mont);
+    if (j == -1) {
      goto err;
    }
-
-    /* Step 4.5 */
-    if (!BN_mod_exp_mont(z, b, m, w, ctx, mont)) {
+    if (j) {
+      ret = 0;
      goto err;
    }
-
-    /* Step 4.6 */
-    if (BN_is_one(z) || BN_cmp(z, w1) == 0) {
-      goto loop;
-    }
-
-    /* Step 4.7 */
-    for (int j = 1; j < a; j++) {
-      if (!BN_copy(x, z) || !BN_mod_mul(z, x, x, w, ctx)) {
-        goto err;
-      }
-      if (BN_cmp(z, w1) == 0) {
-        goto loop;
-      }
-      if (BN_is_one(z)) {
-        goto composite;
-      }
-    }
-
-    /* Step 4.8-4.9 */
-    if (!BN_copy(x, z) || !BN_mod_mul(z, x, x, w, ctx)) {
-      goto err;
-    }
-
-    /* Step 4.10-4.11 */
-    if (!BN_is_one(z) && !BN_copy(x, z)) {
-      goto err;
-    }
-
- composite:
-    /* Step 4.12-4.14 */
-    if (!BN_copy(x1, x) ||
-        !BN_sub_word(x1, 1) ||
-        !BN_gcd(g, x1, w, ctx)) {
-      goto err;
-    }
-    if (BN_cmp_word(g, 1) > 0) {
-      *out_result = bn_composite;
-    } else {
-      *out_result = bn_non_prime_power_composite;
-    }
-
-    ret = 1;
-    goto err;
-
- loop:
-    /* Step 4.15 */
    if (!BN_GENCB_call(cb, 1, i)) {
      goto err;
    }
  }
-
-  *out_result = bn_probably_prime;
  ret = 1;

 err:
-  BN_MONT_CTX_free(mont);
-  BN_CTX_end(ctx);
+  if (ctx != NULL) {
+    BN_CTX_end(ctx);
+    if (ctx_passed == NULL) {
+      BN_CTX_free(ctx);
+    }
+  }
+  if (mont != NULL) {
+    BN_MONT_CTX_free(mont);
+  }

  return ret;
 }

+static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
+                   const BIGNUM *a1_odd, int k, BN_CTX *ctx,
+                   BN_MONT_CTX *mont) {
+  if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) { /* w := w^a1_odd mod a */
+    return -1;
+  }
+  if (BN_is_one(w)) {
+    return 0; /* probably prime */
+  }
+  if (BN_cmp(w, a1) == 0) {
+    return 0; /* w == -1 (mod a),  'a' is probably prime */
+  }
+
+  while (--k) {
+    if (!BN_mod_mul(w, w, w, a, ctx)) { /* w := w^2 mod a */
+      return -1;
+    }
+
+    if (BN_is_one(w)) {
+      return 1; /* 'a' is composite, otherwise a previous 'w' would
+                 * have been == -1 (mod 'a') */
+    }
+
+    if (BN_cmp(w, a1) == 0) {
+      return 0; /* w == -1 (mod a), 'a' is probably prime */
+    }
+  }
+
+  /* If we get here, 'w' is the (a-1)/2-th power of the original 'w',
+   * and it is neither -1 nor +1 -- so 'a' cannot be prime */
+  return 1;
+}
+
+static BN_ULONG get_word(const BIGNUM *bn) {
+  if (bn->top == 1) {
+    return bn->d[0];
+  }
+  return 0;
+}
+
 static int probable_prime(BIGNUM *rnd, int bits) {
  int i;
  uint16_t mods[NUMPRIMES];
@@ -702,9 +669,9 @@ again:
    BN_ULONG size_limit;
    if (bits == BN_BITS2) {
      /* Avoid undefined behavior. */
-      size_limit = ~((BN_ULONG)0) - BN_get_word(rnd);
+      size_limit = ~((BN_ULONG)0) - get_word(rnd);
    } else {
-      size_limit = (((BN_ULONG)1) << bits) - BN_get_word(rnd) - 1;
+      size_limit = (((BN_ULONG)1) << bits) - get_word(rnd) - 1;
    }
    if (size_limit < maxdelta) {
      maxdelta = size_limit;
@@ -714,7 +681,7 @@ again:

 loop:
  if (is_single_word) {
-    BN_ULONG rnd_word = BN_get_word(rnd);
+    BN_ULONG rnd_word = get_word(rnd);

    /* In the case that the candidate prime is a single word then
     * we check that:
@@ -114,17 +114,8 @@
 #include <openssl/mem.h>
 #include <openssl/rand.h>
 #include <openssl/sha.h>
-#include <openssl/type_check.h>

-#include "../../internal.h"
-#include "../rand/internal.h"
-
-
-static const uint8_t kDefaultAdditionalData[32] = {0};
-
-static int bn_rand_with_additional_data(BIGNUM *rnd, int bits, int top,
-                                        int bottom,
-                                        const uint8_t additional_data[32]) {
+int BN_rand(BIGNUM *rnd, int bits, int top, int bottom) {
  uint8_t *buf = NULL;
  int ret = 0, bit, bytes, mask;

@@ -159,7 +150,9 @@ static int bn_rand_with_additional_data(BIGNUM *rnd, int bits, int top,
  }

  /* Make a random number and set the top and bottom bits. */
-  RAND_bytes_with_additional_data(buf, bytes, additional_data);
+  if (!RAND_bytes(buf, bytes)) {
+    goto err;
+  }

  if (top != BN_RAND_TOP_ANY) {
    if (top == BN_RAND_TOP_TWO && bits > 1) {
@@ -195,56 +188,68 @@ err:
  return (ret);
 }

-int BN_rand(BIGNUM *rnd, int bits, int top, int bottom) {
-  return bn_rand_with_additional_data(rnd, bits, top, bottom,
-                                      kDefaultAdditionalData);
-}
-
 int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom) {
  return BN_rand(rnd, bits, top, bottom);
 }

-static int bn_rand_range_with_additional_data(
-    BIGNUM *r, BN_ULONG min_inclusive, const BIGNUM *max_exclusive,
-    const uint8_t additional_data[32]) {
+int BN_rand_range_ex(BIGNUM *r, BN_ULONG min_inclusive,
+                     const BIGNUM *max_exclusive) {
+  unsigned n;
+  unsigned count = 100;
+
  if (BN_cmp_word(max_exclusive, min_inclusive) <= 0) {
    OPENSSL_PUT_ERROR(BN, BN_R_INVALID_RANGE);
    return 0;
  }

-  /* This function is used to implement steps 4 through 7 of FIPS 186-4
-   * appendices B.4.2 and B.5.2. When called in those contexts, |max_exclusive|
-   * is n and |min_inclusive| is one. */
-  unsigned count = 100;
-  unsigned n = BN_num_bits(max_exclusive); /* n > 0 */
+  n = BN_num_bits(max_exclusive); /* n > 0 */
+
+  /* BN_is_bit_set(range, n - 1) always holds */
+  if (n == 1) {
+    BN_zero(r);
+    return 1;
+  }
+
  do {
    if (!--count) {
      OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_ITERATIONS);
      return 0;
    }

-    if (/* steps 4 and 5 */
-        !bn_rand_with_additional_data(r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY,
-                                      additional_data) ||
-        /* step 7 */
-        !BN_add_word(r, min_inclusive)) {
-      return 0;
-    }
+    if (!BN_is_bit_set(max_exclusive, n - 2) &&
+        !BN_is_bit_set(max_exclusive, n - 3)) {
+      /* range = 100..._2, so 3*range (= 11..._2) is exactly one bit longer
+       * than range. This is a common scenario when generating a random value
+       * modulo an RSA public modulus, e.g. for RSA base blinding. */
+      if (!BN_rand(r, n + 1, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) {
+        return 0;
+      }

-    /* Step 6. This loops if |r| >= |max_exclusive|. This is identical to
-     * checking |r| > |max_exclusive| - 1 or |r| - 1 > |max_exclusive| - 2, the
-     * formulation stated in FIPS 186-4. */
-  } while (BN_cmp(r, max_exclusive) >= 0);
+      /* If r < 3*range, use r := r MOD range (which is either r, r - range, or
+       * r - 2*range). Otherwise, iterate again. Since 3*range = 11..._2, each
+       * iteration succeeds with probability >= .75. */
+      if (BN_cmp(r, max_exclusive) >= 0) {
+        if (!BN_sub(r, r, max_exclusive)) {
+          return 0;
+        }
+        if (BN_cmp(r, max_exclusive) >= 0) {
+          if (!BN_sub(r, r, max_exclusive)) {
+            return 0;
+          }
+        }
+      }
+    } else {
+      /* range = 11..._2  or  range = 101..._2 */
+      if (!BN_rand(r, n, BN_RAND_TOP_ANY, BN_RAND_BOTTOM_ANY)) {
+        return 0;
+      }
+    }
+  } while (BN_cmp_word(r, min_inclusive) < 0 ||
+           BN_cmp(r, max_exclusive) >= 0);

  return 1;
 }

-int BN_rand_range_ex(BIGNUM *r, BN_ULONG min_inclusive,
-                     const BIGNUM *max_exclusive) {
-  return bn_rand_range_with_additional_data(r, min_inclusive, max_exclusive,
-                                            kDefaultAdditionalData);
-}
-
 int BN_rand_range(BIGNUM *r, const BIGNUM *range) {
  return BN_rand_range_ex(r, 0, range);
 }
@@ -256,31 +261,80 @@ int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range) {
 int BN_generate_dsa_nonce(BIGNUM *out, const BIGNUM *range, const BIGNUM *priv,
                          const uint8_t *message, size_t message_len,
                          BN_CTX *ctx) {
+  SHA512_CTX sha;
+  /* We use 512 bits of random data per iteration to
+   * ensure that we have at least |range| bits of randomness. */
+  uint8_t random_bytes[64];
+  uint8_t digest[SHA512_DIGEST_LENGTH];
+  size_t done, todo, attempt;
+  const unsigned num_k_bytes = BN_num_bytes(range);
+  const unsigned bits_to_mask = (8 - (BN_num_bits(range) % 8)) % 8;
+  uint8_t private_bytes[96];
+  uint8_t *k_bytes = NULL;
+  int ret = 0;
+
+  if (out == NULL) {
+    return 0;
+  }
+
+  if (BN_is_zero(range)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
+    goto err;
+  }
+
+  k_bytes = OPENSSL_malloc(num_k_bytes);
+  if (!k_bytes) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
  /* We copy |priv| into a local buffer to avoid furthur exposing its
   * length. */
-  uint8_t private_bytes[96];
-  size_t todo = sizeof(priv->d[0]) * priv->top;
+  todo = sizeof(priv->d[0]) * priv->top;
  if (todo > sizeof(private_bytes)) {
    /* No reasonable DSA or ECDSA key should have a private key
     * this large and we don't handle this case in order to avoid
     * leaking the length of the private key. */
    OPENSSL_PUT_ERROR(BN, BN_R_PRIVATE_KEY_TOO_LARGE);
-    return 0;
+    goto err;
  }
-  OPENSSL_memcpy(private_bytes, priv->d, todo);
-  OPENSSL_memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);
+  memcpy(private_bytes, priv->d, todo);
+  memset(private_bytes + todo, 0, sizeof(private_bytes) - todo);

-  /* Pass a SHA512 hash of the private key and message as additional data into
-   * the RBG. This is a hardening measure against entropy failure. */
-  OPENSSL_COMPILE_ASSERT(SHA512_DIGEST_LENGTH >= 32,
-                         additional_data_is_too_large_for_sha512);
-  SHA512_CTX sha;
-  uint8_t digest[SHA512_DIGEST_LENGTH];
-  SHA512_Init(&sha);
-  SHA512_Update(&sha, private_bytes, sizeof(private_bytes));
-  SHA512_Update(&sha, message, message_len);
-  SHA512_Final(digest, &sha);
+  for (attempt = 0;; attempt++) {
+    for (done = 0; done < num_k_bytes;) {
+      if (!RAND_bytes(random_bytes, sizeof(random_bytes))) {
+        goto err;
+      }
+      SHA512_Init(&sha);
+      SHA512_Update(&sha, &attempt, sizeof(attempt));
+      SHA512_Update(&sha, &done, sizeof(done));
+      SHA512_Update(&sha, private_bytes, sizeof(private_bytes));
+      SHA512_Update(&sha, message, message_len);
+      SHA512_Update(&sha, random_bytes, sizeof(random_bytes));
+      SHA512_Final(digest, &sha);

-  /* Select a value k from [1, range-1], following FIPS 186-4 appendix B.5.2. */
-  return bn_rand_range_with_additional_data(out, 1, range, digest);
+      todo = num_k_bytes - done;
+      if (todo > SHA512_DIGEST_LENGTH) {
+        todo = SHA512_DIGEST_LENGTH;
+      }
+      memcpy(k_bytes + done, digest, todo);
+      done += todo;
+    }
+
+    k_bytes[0] &= 0xff >> bits_to_mask;
+
+    if (!BN_bin2bn(k_bytes, num_k_bytes, out)) {
+      goto err;
+    }
+    if (BN_cmp(out, range) < 0) {
+      break;
+    }
+  }
+
+  ret = 1;
+
+err:
+  OPENSSL_free(k_bytes);
+  return ret;
 }
@@ -48,7 +48,7 @@

 #include <openssl/mem.h>

-#include "../../internal.h"
+#include "../internal.h"


 /*
@@ -251,4 +251,69 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
 	OPENSSL_cleanse(storage,sizeof(storage));
 }

+/*
+ * See crypto/bn/rsaz-x86_64.pl for further details.
+ */
+void rsaz_512_mul(void *ret,const void *a,const void *b,const void *n,BN_ULONG k);
+void rsaz_512_mul_scatter4(void *ret,const void *a,const void *n,BN_ULONG k,const void *tbl,unsigned int power);
+void rsaz_512_mul_gather4(void *ret,const void *a,const void *tbl,const void *n,BN_ULONG k,unsigned int power);
+void rsaz_512_mul_by_one(void *ret,const void *a,const void *n,BN_ULONG k);
+void rsaz_512_sqr(void *ret,const void *a,const void *n,BN_ULONG k,int cnt);
+void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power);
+void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power);
+
+void RSAZ_512_mod_exp(BN_ULONG result[8],
+	const BN_ULONG base[8], const BN_ULONG exponent[8],
+	const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8])
+{
+	alignas(64) uint8_t storage[(16*8*8) + (64 * 2)]; /* 1.2KB */
+	unsigned char	*table = storage;
+	BN_ULONG	*a_inv = (BN_ULONG *)(table+16*8*8),
+			*temp  = (BN_ULONG *)(table+16*8*8+8*8);
+	int index;
+	unsigned int wvalue;
+
+	/* table[0] = 1_inv */
+	temp[0] = 0-m[0];	temp[1] = ~m[1];
+	temp[2] = ~m[2];	temp[3] = ~m[3];
+	temp[4] = ~m[4];	temp[5] = ~m[5];
+	temp[6] = ~m[6];	temp[7] = ~m[7];
+	rsaz_512_scatter4(table, temp, 0);
+
+	/* table [1] = a_inv^1 */
+	rsaz_512_mul(a_inv, base, RR, m, k0);
+	rsaz_512_scatter4(table, a_inv, 1);
+
+	/* table [2] = a_inv^2 */
+	rsaz_512_sqr(temp, a_inv, m, k0, 1);
+	rsaz_512_scatter4(table, temp, 2);
+
+	for (index=3; index<16; index++)
+		rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index);
+
+	const uint8_t *p_str = (const uint8_t *)exponent;
+
+	/* load first window */
+	wvalue = p_str[63];
+
+	rsaz_512_gather4(temp, table, wvalue>>4);
+	rsaz_512_sqr(temp, temp, m, k0, 4);
+	rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue&0xf);
+
+	for (index=62; index>=0; index--) {
+		wvalue = p_str[index];
+
+		rsaz_512_sqr(temp, temp, m, k0, 4);
+		rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue>>4);
+
+		rsaz_512_sqr(temp, temp, m, k0, 4);
+		rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue&0x0f);
+	}
+
+	/* from Montgomery */
+	rsaz_512_mul_by_one(result, temp, m, k0);
+
+	OPENSSL_cleanse(storage,sizeof(storage));
+}
+
 #endif  /* OPENSSL_X86_64 */
@@ -50,4 +50,7 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16],
 	const BN_ULONG m_norm[16], const BN_ULONG RR[16], BN_ULONG k0);
 int rsaz_avx2_eligible(void);

+void RSAZ_512_mod_exp(BN_ULONG result[8],
+	const BN_ULONG base_norm[8], const BN_ULONG exponent[8],
+	const BN_ULONG m_norm[8], BN_ULONG k0, const BN_ULONG RR[8]);
 #endif
@@ -75,7 +75,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) {

  r->neg = a->neg;
  nw = n / BN_BITS2;
-  if (!bn_wexpand(r, a->top + nw + 1)) {
+  if (bn_wexpand(r, a->top + nw + 1) == NULL) {
    return 0;
  }
  lb = n % BN_BITS2;
@@ -94,7 +94,7 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) {
      t[nw + i] = (l << lb) & BN_MASK2;
    }
  }
-  OPENSSL_memset(t, 0, nw * sizeof(t[0]));
+  memset(t, 0, nw * sizeof(t[0]));
  r->top = a->top + nw + 1;
  bn_correct_top(r);

@@ -107,12 +107,12 @@ int BN_lshift1(BIGNUM *r, const BIGNUM *a) {

  if (r != a) {
    r->neg = a->neg;
-    if (!bn_wexpand(r, a->top + 1)) {
+    if (bn_wexpand(r, a->top + 1) == NULL) {
      return 0;
    }
    r->top = a->top;
  } else {
-    if (!bn_wexpand(r, a->top + 1)) {
+    if (bn_wexpand(r, a->top + 1) == NULL) {
      return 0;
    }
  }
@@ -152,7 +152,7 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) {
  i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2;
  if (r != a) {
    r->neg = a->neg;
-    if (!bn_wexpand(r, i)) {
+    if (bn_wexpand(r, i) == NULL) {
      return 0;
    }
  } else {
@@ -182,10 +182,6 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) {
    }
  }

-  if (r->top == 0) {
-    r->neg = 0;
-  }
-
  return 1;
 }

@@ -201,7 +197,7 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) {
  ap = a->d;
  j = i - (ap[i - 1] == 1);
  if (a != r) {
-    if (!bn_wexpand(r, j)) {
+    if (bn_wexpand(r, j) == NULL) {
      return 0;
    }
    r->neg = a->neg;
@@ -219,10 +215,6 @@ int BN_rshift1(BIGNUM *r, const BIGNUM *a) {
  }
  r->top = j;

-  if (r->top == 0) {
-    r->neg = 0;
-  }
-
  return 1;
 }

@@ -236,7 +228,7 @@ int BN_set_bit(BIGNUM *a, int n) {
  i = n / BN_BITS2;
  j = n % BN_BITS2;
  if (a->top <= i) {
-    if (!bn_wexpand(a, i + 1)) {
+    if (bn_wexpand(a, i + 1) == NULL) {
      return 0;
    }
    for (k = a->top; k < i + 1; k++) {
@@ -56,8 +56,6 @@

 #include <openssl/err.h>

-#include "internal.h"
-

 BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  /* Compute a square root of |a| mod |p| using the Tonelli/Shanks algorithm
@@ -150,7 +148,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
    }
    q->neg = 0;
    if (!BN_add_word(q, 1) ||
-        !BN_mod_exp_mont(ret, A, q, p, ctx, NULL)) {
+        !BN_mod_exp(ret, A, q, p, ctx)) {
      goto end;
    }
    err = 0;
@@ -195,7 +193,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
      goto end;
    }
    q->neg = 0;
-    if (!BN_mod_exp_mont(b, t, q, p, ctx, NULL)) {
+    if (!BN_mod_exp(b, t, q, p, ctx)) {
      goto end;
    }

@@ -255,7 +253,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
      }
    }

-    r = bn_jacobi(y, q, ctx); /* here 'q' is |p| */
+    r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
    if (r < -1) {
      goto end;
    }
@@ -283,7 +281,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {

  /* Now that we have some non-square, we can find an element
   * of order  2^e  by computing its q'th power. */
-  if (!BN_mod_exp_mont(y, y, q, p, ctx, NULL)) {
+  if (!BN_mod_exp(y, y, q, p, ctx)) {
    goto end;
  }
  if (BN_is_one(y)) {
@@ -329,7 +327,7 @@ BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
      goto end;
    }
  } else {
-    if (!BN_mod_exp_mont(x, A, t, p, ctx, NULL)) {
+    if (!BN_mod_exp(x, A, t, p, ctx)) {
      goto end;
    }
    if (BN_is_zero(x)) {
@@ -458,9 +456,7 @@ int BN_sqrt(BIGNUM *out_sqrt, const BIGNUM *in, BN_CTX *ctx) {
  }

  /* We estimate that the square root of an n-bit number is 2^{n/2}. */
-  if (!BN_lshift(estimate, BN_value_one(), BN_num_bits(in)/2)) {
-    goto err;
-  }
+  BN_lshift(estimate, BN_value_one(), BN_num_bits(in)/2);

  /* This is Newton's method for finding a root of the equation |estimate|^2 -
   * |in| = 0. */
@@ -1,10 +0,0 @@
-include_directories(../../include)
-
-add_library(
-  bn_extra
-
-  OBJECT
-
-  bn_asn1.c
-  convert.c
-)
@@ -61,8 +61,6 @@
 #include <openssl/mem.h>
 #include <openssl/err.h>

-#include "../internal.h"
-

 BUF_MEM *BUF_MEM_new(void) {
  BUF_MEM *ret;
@@ -73,7 +71,7 @@ BUF_MEM *BUF_MEM_new(void) {
    return NULL;
  }

-  OPENSSL_memset(ret, 0, sizeof(BUF_MEM));
+  memset(ret, 0, sizeof(BUF_MEM));
  return ret;
 }

@@ -139,7 +137,7 @@ static size_t buf_mem_grow(BUF_MEM *buf, size_t len, int clean) {
    return 0;
  }
  if (buf->length < len) {
-    OPENSSL_memset(&buf->data[buf->length], 0, len - buf->length);
+    memset(&buf->data[buf->length], 0, len - buf->length);
  }
  buf->length = len;
  return len;
@@ -195,7 +193,7 @@ char *BUF_strndup(const char *buf, size_t size) {
    return NULL;
  }

-  OPENSSL_memcpy(ret, buf, size);
+  memcpy(ret, buf, size);
  ret[size] = '\0';
  return ret;
 }
@@ -236,6 +234,6 @@ void *BUF_memdup(const void *data, size_t dst_size) {
    return NULL;
  }

-  OPENSSL_memcpy(ret, data, dst_size);
+  memcpy(ret, data, dst_size);
  return ret;
 }
@@ -10,3 +10,14 @@ add_library(
  cbs.c
  cbb.c
 )
+
+add_executable(
+  bytestring_test
+
+  bytestring_test.cc
+
+  $<TARGET_OBJECTS:test_support>
+)
+
+target_link_libraries(bytestring_test crypto)
+add_dependencies(all_tests bytestring_test)
@@ -22,7 +22,6 @@
 #include <openssl/mem.h>

 #include "internal.h"
-#include "../internal.h"


 int CBB_finish_i2d(CBB *cbb, uint8_t **outp) {
@@ -43,7 +42,7 @@ int CBB_finish_i2d(CBB *cbb, uint8_t **outp) {
      *outp = der;
      der = NULL;
    } else {
-      OPENSSL_memcpy(*outp, der, der_len);
+      memcpy(*outp, der, der_len);
      *outp += der_len;
    }
  }
@@ -18,7 +18,6 @@
 #include <string.h>

 #include "internal.h"
-#include "../internal.h"


 /* kMaxDepth is a just a sanity limit. The code should be such that the length
@@ -38,7 +37,7 @@ static int is_string_type(unsigned tag) {
    case CBS_ASN1_UTF8STRING:
    case CBS_ASN1_NUMERICSTRING:
    case CBS_ASN1_PRINTABLESTRING:
-    case CBS_ASN1_T61STRING:
+    case CBS_ASN1_T16STRING:
    case CBS_ASN1_VIDEOTEXSTRING:
    case CBS_ASN1_IA5STRING:
    case CBS_ASN1_GRAPHICSTRING:
@@ -101,7 +100,7 @@ static int cbs_find_ber(const CBS *orig_in, char *ber_found, unsigned depth) {
 * |CBS_get_any_ber_asn1_element|, indicate an "end of contents" (EOC) value. */
 static char is_eoc(size_t header_len, CBS *contents) {
  return header_len == 2 && CBS_len(contents) == 2 &&
-         OPENSSL_memcmp(CBS_data(contents), "\x00\x00", 2) == 0;
+         memcmp(CBS_data(contents), "\x00\x00", 2) == 0;
 }

 /* cbs_convert_ber reads BER data from |in| and writes DER data to |out|. If
--- a/Show More
+++ b/Show More