From 71a3a4d7aed7cab7257f73610dc688384dad5112 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 8 May 2026 12:33:05 +0530 Subject: [PATCH] Switch to using directory format for bundled CA certs This is the preferred format for OpenSSL so use it. Supposedly has better performance, though given the very small number of certificates involved I doubt it's even measurable. --- .gitignore | 2 +- bypy/macos/__main__.py | 2 +- bypy/windows/util.c | 4 +-- setup/resources.py | 33 ++++++++++------------- setup/unix-ci.py | 4 +-- src/calibre/test_build.py | 10 ++++--- src/calibre/utils/certgen.c | 54 ++++++++++++++++++++++++++++++++++++- src/calibre/utils/https.py | 12 ++++++--- 8 files changed, 88 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index 8f6d15ffc0..e3ba411826 100644 --- a/.gitignore +++ b/.gitignore @@ -32,7 +32,7 @@ /resources/content-server/locales.zip /resources/mathjax /resources/fonts/liberation -/resources/mozilla-ca-certs.pem +/resources/mozilla-ca-certs /resources/user-agent-data.json /resources/piper-voices.json /icons/icns/*.icns diff --git a/bypy/macos/__main__.py b/bypy/macos/__main__.py index 28c6e093ec..7b3e692b23 100644 --- a/bypy/macos/__main__.py +++ b/bypy/macos/__main__.py @@ -40,7 +40,7 @@ QT_FRAMEWORKS = [x.replace(f'{QT_MAJOR}', '') for x in QT_DLLS] ENV = dict( FONTCONFIG_PATH='@executable_path/../Resources/fonts', FONTCONFIG_FILE='@executable_path/../Resources/fonts/fonts.conf', - SSL_CERT_FILE='@executable_path/../Resources/resources/mozilla-ca-certs.pem', + SSL_CERT_DIR='@executable_path/../Resources/resources/mozilla-ca-certs', OPENSSL_ENGINES='@executable_path/../Frameworks/engines-3', OPENSSL_MODULES='@executable_path/../Frameworks/ossl-modules', ) diff --git a/bypy/windows/util.c b/bypy/windows/util.c index e070a9389e..21e40a29cb 100644 --- a/bypy/windows/util.c +++ b/bypy/windows/util.c @@ -52,9 +52,9 @@ get_install_locations(void) { // Lots of people have trouble with various websites failing to download // because of missing intermediate certificates in the windows store // so use the Mozilla certificate bundle - _snwprintf_s(qt_prefix_dir, MAX_PATH-1, _TRUNCATE, L"%ls\\mozilla-ca-certs.pem", interpreter_data.resources_path); + _snwprintf_s(qt_prefix_dir, MAX_PATH-1, _TRUNCATE, L"%ls\\mozilla-ca-certs", interpreter_data.resources_path); const char *s = getenv("CALIBRE_USE_SYSTEM_CERTIFICATES"); - if (!s || strcmp(s, "1") != 0) _wputenv_s(L"SSL_CERT_FILE", qt_prefix_dir); + if (!s || strcmp(s, "1") != 0) _wputenv_s(L"SSL_CERT_DIR", qt_prefix_dir); } static void diff --git a/setup/resources.py b/setup/resources.py index fbd918c5df..467d4193f6 100644 --- a/setup/resources.py +++ b/setup/resources.py @@ -5,7 +5,6 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import errno import glob import json import os @@ -30,35 +29,31 @@ def get_opts_from_parser(parser): class CACerts(Command): # {{{ description = 'Get updated mozilla CA certificate bundle' - CA_PATH = os.path.join(Command.RESOURCES, 'mozilla-ca-certs.pem') + CA_PATH = os.path.join(Command.RESOURCES, 'mozilla-ca-certs') def add_options(self, parser): parser.add_option('--path-to-cacerts', help='Path to previously downloaded mozilla-ca-certs.pem') def run(self, opts): + import calibre # needed to ensure calibre_extensions is available + _ = calibre + from calibre_extensions.certgen import create_CA_dir if opts.path_to_cacerts: - shutil.copyfile(opts.path_to_cacerts, self.CA_PATH) - os.chmod(self.CA_PATH, 0o644) + with open(opts.path_to_cacerts, 'rb') as f: + raw = f.read() else: - try: - with open(self.CA_PATH, 'rb') as f: - raw = f.read() - except OSError as err: - if err.errno != errno.ENOENT: - raise - raw = b'' - nraw = download_securely('https://curl.haxx.se/ca/cacert.pem') - if not nraw: + raw = download_securely('https://curl.haxx.se/ca/cacert.pem') + if not raw: raise RuntimeError('Failed to download CA cert bundle') - if nraw != raw: - self.info('Updating Mozilla CA certificates') - with open(self.CA_PATH, 'wb') as f: - f.write(nraw) - self.verify_ca_certs() + if os.path.exists(self.CA_PATH): + shutil.rmtree(self.CA_PATH) + os.mkdir(self.CA_PATH) + create_CA_dir(raw, self.CA_PATH) + self.verify_ca_certs() def verify_ca_certs(self): from calibre.utils.https import get_https_resource_securely - get_https_resource_securely('https://calibre-ebook.com', cacerts=self.b(self.CA_PATH)) + get_https_resource_securely('https://calibre-ebook.com', cadir=self.CA_PATH) # }}} diff --git a/setup/unix-ci.py b/setup/unix-ci.py index e25d184add..e4d272262f 100644 --- a/setup/unix-ci.py +++ b/setup/unix-ci.py @@ -316,8 +316,8 @@ username = api os.environ['OPENSSL_MODULES'] = os.path.join(SW, 'lib', 'ossl-modules') os.environ['PIPER_TTS_DIR'] = os.path.join(SW, 'piper') if ismacos: - os.environ['SSL_CERT_FILE'] = os.path.abspath( - 'resources/mozilla-ca-certs.pem') + os.environ['SSL_CERT_DIR'] = os.path.abspath( + 'resources/mozilla-ca-certs') # needed to ensure correct libxml2 is loaded os.environ['DYLD_INSERT_LIBRARIES'] = ':'.join(os.path.join(SW, 'lib', x) for x in 'libxml2.dylib libxslt.dylib libexslt.dylib'.split()) os.environ['OPENSSL_ENGINES'] = os.path.join(SW, 'lib', 'engines-3') diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index b60e2f1cd6..059bdccd7f 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -521,10 +521,12 @@ class BuildTest(unittest.TestCase): def test_openssl(self): import ssl ssl.PROTOCOL_TLSv1_2 - if ismacos: - cafile = ssl.get_default_verify_paths().cafile - if not cafile or not cafile.endswith('/mozilla-ca-certs.pem') or not os.access(cafile, os.R_OK): - raise AssertionError('Mozilla CA certs not loaded') + if ismacos or iswindows: + paths = ssl.get_default_verify_paths() + capath = paths.capath + if not capath or os.path.basename(capath) != 'mozilla-ca-certs' or not os.path.isdir(capath): + cadir_env = os.environ.get('SSL_CERT_DIR') + raise AssertionError(f'Mozilla CA certs not loaded ({paths=} {cadir_env=})') # On Fedora create_default_context() succeeds in the main thread but # not in other threads, because upstream OpenSSL cannot read whatever # shit Fedora puts in /etc/ssl, so this check makes sure our bundled diff --git a/src/calibre/utils/certgen.c b/src/calibre/utils/certgen.c index abac05501b..7fbce85f09 100644 --- a/src/calibre/utils/certgen.c +++ b/src/calibre/utils/certgen.c @@ -41,6 +41,21 @@ set_error(const char *where) { return set_error_with_detail(where, NULL); } +static PyObject* +set_openssl_error(PyObject *err_class, const char *msg) { + BIO *bio = BIO_new(BIO_s_mem()); + ERR_print_errors(bio); + char *data = NULL; + long len = BIO_get_mem_data(bio, &data); + PyObject *s = NULL; + if (len > 0 && data != NULL) s = PyUnicode_FromStringAndSize(data, len); + PyObject *m = PyUnicode_FromString(msg); + PyErr_Format(err_class, "%V: %V", m, s); + Py_XDECREF(m); Py_XDECREF(s); + BIO_free(bio); + return NULL; +} + static void free_rsa_keypair(PyObject *capsule) { EVP_PKEY *pkey= PyCapsule_GetPointer(capsule, NULL); EVP_PKEY_free(pkey); @@ -407,6 +422,39 @@ verify_cert(PyObject *self, PyObject *args) { Py_RETURN_NONE; } +static PyObject* +create_CA_dir(PyObject *self, PyObject *args) { + const char *pem_bundle; Py_ssize_t bundle_sz; const char *output_path; + if (!PyArg_ParseTuple(args, "s#s", &pem_bundle, &bundle_sz, &output_path)) return NULL; + BIO *mem_in = BIO_new_mem_buf(pem_bundle, bundle_sz); + if (!mem_in) return PyErr_NoMemory(); + X509 *x = NULL; char path[4096]; + // Iterate through the certificates in memory + while ((x = PEM_read_bio_X509(mem_in, NULL, NULL, NULL))) { + unsigned long hash = X509_subject_name_hash(x); + int suffix = 0; + + // Collision handling: check if .0, .1, etc exists + while(1) { + snprintf(path, sizeof(path), "%s/%08lx.%d", output_path, hash, suffix); + struct stat buffer; + if (stat(path, &buffer) != 0) break; // File doesn't exist, we can use this name + suffix++; + }; + + // Write the individual PEM file + BIO *out = BIO_new_file(path, "w"); + int ok = 0; + if (out) ok = PEM_write_bio_X509(out, x); + if (out) BIO_free(out); + X509_free(x); + if (!ok) { set_openssl_error(PyExc_ValueError, "failed to write inidividual PEM certificate"); break; } + } + BIO_free(mem_in); + if (PyErr_Occurred()) return NULL; + Py_RETURN_NONE; +} + static PyMethodDef certgen_methods[] = { {"create_rsa_keypair", create_rsa_keypair, METH_VARARGS, "create_rsa_keypair(size)\n\nCreate a RSA keypair of the specified size" @@ -433,7 +481,11 @@ static PyMethodDef certgen_methods[] = { }, {"verify_cert", verify_cert, METH_VARARGS, - "verify_cert(cacert, cert)\n\nVerift cert against CA cert" + "verify_cert(cacert, cert)\n\nVerify cert against CA cert" + }, + + {"create_CA_dir", create_CA_dir, METH_VARARGS, + "create_CA_dir(cacerts_as_pem_bundle_string, output_path)\n\nCreate an OpenSSL CA certificate lookup directory. output_path must be an empty directory." }, {NULL, NULL, 0, NULL} diff --git a/src/calibre/utils/https.py b/src/calibre/utils/https.py index 5e49035beb..1a661d001e 100644 --- a/src/calibre/utils/https.py +++ b/src/calibre/utils/https.py @@ -26,8 +26,11 @@ class HTTPSConnection(http.client.HTTPSConnection): def __init__(self, *args, **kwargs): cafile = kwargs.pop('cert_file', None) - if cafile is None: + capath = kwargs.pop('cadir', None) + if cafile is None and capath is None: kwargs['context'] = ssl._create_unverified_context() + elif capath: # prefer capath as it performs better + kwargs['context'] = ssl.create_default_context(capath=capath) else: kwargs['context'] = ssl.create_default_context(cafile=cafile) if kwargs.pop('disable_x509_strict_checking', False): @@ -40,7 +43,10 @@ class HTTPSConnection(http.client.HTTPSConnection): def get_https_resource_securely( - url, cacerts='calibre-ebook-root-CA.crt', timeout=60, max_redirects=5, ssl_version=None, headers=None, get_response=False): + url, cacerts='calibre-ebook-root-CA.crt', timeout=60, max_redirects=5, + ssl_version=None, headers=None, get_response=False, + cadir='', +): ''' Download the resource pointed to by url using https securely (verify server certificate). Ensures that redirects, if any, are also downloaded @@ -72,7 +78,7 @@ def get_https_resource_securely( # Invalid proxy, ignore pass - c = HTTPSConnection(hostname, port, cert_file=cert_file, timeout=timeout, disable_x509_strict_checking=disable_x509_strict_checking) + c = HTTPSConnection(hostname, port, cert_file=cert_file, timeout=timeout, disable_x509_strict_checking=disable_x509_strict_checking, cadir=cadir) if has_proxy: c.set_tunnel(p.hostname, p.port)