diff --git a/SPECS/pytorch/CVE-2024-27318.patch b/SPECS/pytorch/CVE-2024-27318.patch new file mode 100644 index 00000000000..761b46cdf90 --- /dev/null +++ b/SPECS/pytorch/CVE-2024-27318.patch @@ -0,0 +1,376 @@ +Modified patch to apply to vendored onnx sources +Modified by: sumsharma@microsoft.com + +From 66b7fb630903fdcf3e83b6b6d56d82e904264a20 Mon Sep 17 00:00:00 2001 +From: liqun Fu +Date: Mon, 19 Feb 2024 11:12:40 -0800 +Subject: [PATCH] Fix path sanitization bypass leading to arbitrary read + (#5917) + +Signed-off-by: liqunfu +Signed-off-by: liqun Fu +Co-authored-by: Justin Chu +--- + third_party/onnx/onnx/checker.cc | 163 ++++++++++-------- + third_party/onnx/onnx/checker.h | 5 +- + third_party/onnx/onnx/common/path.h | 17 +- + third_party/onnx/onnx/cpp2py_export.cc | 2 + + third_party/onnx/onnx/external_data_helper.py | 17 +- + .../onnx/onnx/test/test_external_data.py | 47 +++++ + 6 files changed, 160 insertions(+), 91 deletions(-) + +diff --git a/third_party/onnx/onnx/checker.cc b/third_party/onnx/onnx/checker.cc +index 0c81c87e..38a068dd 100644 +--- a/third_party/onnx/onnx/checker.cc ++++ b/third_party/onnx/onnx/checker.cc +@@ -4,7 +4,6 @@ + + #include "onnx/checker.h" + #include "onnx/common/file_utils.h" +-#include "onnx/common/path.h" + #include "onnx/defs/schema.h" + #include "onnx/defs/tensor_proto_util.h" + #include "onnx/proto_utils.h" +@@ -129,80 +128,7 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) { + for (const StringStringEntryProto& entry : tensor.external_data()) { + if (entry.has_key() && entry.has_value() && entry.key() == "location") { + has_location = true; +-#ifdef _WIN32 +- auto file_path = std::filesystem::path(utf8str_to_wstring(entry.value())); +- if (file_path.is_absolute()) { +- fail_check( +- "Location of external TensorProto ( tensor name: ", +- tensor.name(), +- ") should be a relative path, but it is an absolute path: ", +- entry.value()); +- } +- auto relative_path = file_path.lexically_normal().make_preferred().wstring(); +- // Check that normalized relative path contains ".." on Windows. +- if (relative_path.find(L"..", 0) != std::string::npos) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be file inside the ", +- ctx.get_model_dir(), +- ", but the '", +- entry.value(), +- "' points outside the directory"); +- } +- std::wstring data_path = path_join(utf8str_to_wstring(ctx.get_model_dir()), relative_path); +- struct _stat buff; +- if (_wstat(data_path.c_str(), &buff) != 0) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be stored in ", +- entry.value(), +- ", but it doesn't exist or is not accessible."); +- } +-#else // POSIX +- if (entry.value().empty()) { +- fail_check("Location of external TensorProto ( tensor name: ", tensor.name(), ") should not be empty."); +- } else if (entry.value()[0] == '/') { +- fail_check( +- "Location of external TensorProto ( tensor name: ", +- tensor.name(), +- ") should be a relative path, but it is an absolute path: ", +- entry.value()); +- } +- std::string relative_path = clean_relative_path(entry.value()); +- // Check that normalized relative path contains ".." on POSIX +- if (relative_path.find("..", 0) != std::string::npos) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be file inside the ", +- ctx.get_model_dir(), +- ", but the '", +- entry.value(), +- "' points outside the directory"); +- } +- std::string data_path = path_join(ctx.get_model_dir(), relative_path); +- // use stat to check whether the file exists +- struct stat buffer; +- if (stat((data_path).c_str(), &buffer) != 0) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be stored in ", +- data_path, +- ", but it doesn't exist or is not accessible."); +- } +- // Do not allow symlinks or directories. +- if (!S_ISREG(buffer.st_mode)) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be stored in ", +- data_path, +- ", but it is not regular file."); +- } +-#endif ++ resolve_external_data_location(ctx.get_model_dir(), entry.value(), tensor.name()); + } + } + if (!has_location) { +@@ -1028,6 +954,93 @@ void check_model(const ModelProto& model, bool full_check) { + } + } + ++std::string resolve_external_data_location( ++ const std::string& base_dir, ++ const std::string& location, ++ const std::string& tensor_name) { ++#ifdef _WIN32 ++ auto file_path = std::filesystem::path(utf8str_to_wstring(location)); ++ if (file_path.is_absolute()) { ++ fail_check( ++ "Location of external TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be a relative path, but it is an absolute path: ", ++ location); ++ } ++ auto relative_path = file_path.lexically_normal().make_preferred().wstring(); ++ // Check that normalized relative path contains ".." on Windows. ++ if (relative_path.find(L"..", 0) != std::string::npos) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be file inside the ", ++ base_dir, ++ ", but the '", ++ location, ++ "' points outside the directory"); ++ } ++ std::wstring data_path = path_join(utf8str_to_wstring(base_dir), relative_path); ++ struct _stat64 buff; ++ if (data_path.empty() || (data_path[0] != '#' && _wstat64(data_path.c_str(), &buff) != 0)) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be stored in ", ++ location, ++ ", but it doesn't exist or is not accessible."); ++ } ++ return wstring_to_utf8str(data_path); ++#else // POSIX ++ if (location.empty()) { ++ fail_check("Location of external TensorProto ( tensor name: ", tensor_name, ") should not be empty."); ++ } else if (location[0] == '/') { ++ fail_check( ++ "Location of external TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be a relative path, but it is an absolute path: ", ++ location); ++ } ++ std::string relative_path = clean_relative_path(location); ++ // Check that normalized relative path contains ".." on POSIX ++ if (relative_path.find("..", 0) != std::string::npos) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be file inside the ", ++ base_dir, ++ ", but the '", ++ location, ++ "' points outside the directory"); ++ } ++ std::string data_path = path_join(base_dir, relative_path); ++ // use stat64 to check whether the file exists ++#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__) ++ struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64 ++ if (data_path.empty() || (data_path[0] != '#' && stat((data_path).c_str(), &buffer) != 0)) { ++#else ++ struct stat64 buffer; // All POSIX under glibc except APPLE and wasm have stat64 ++ if (data_path.empty() || (data_path[0] != '#' && stat64((data_path).c_str(), &buffer) != 0)) { ++#endif ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be stored in ", ++ data_path, ++ ", but it doesn't exist or is not accessible."); ++ } ++ // Do not allow symlinks or directories. ++ if (data_path.empty() || (data_path[0] != '#' && !S_ISREG(buffer.st_mode))) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be stored in ", ++ data_path, ++ ", but it is not regular file."); ++ } ++ return data_path; ++#endif ++} ++ + std::set experimental_ops = { + "ATen", + "Affine", +diff --git a/third_party/onnx/onnx/checker.h b/third_party/onnx/onnx/checker.h +index 05eeaad0..50381aae 100644 +--- a/third_party/onnx/onnx/checker.h ++++ b/third_party/onnx/onnx/checker.h +@@ -148,7 +148,10 @@ void check_model_local_functions( + + void check_model(const ModelProto& model, bool full_check = false); + void check_model(const std::string& model_path, bool full_check = false); +- ++std::string resolve_external_data_location( ++ const std::string& base_dir, ++ const std::string& location, ++ const std::string& tensor_name); + bool check_is_experimental_op(const NodeProto& node); + + } // namespace checker +diff --git a/third_party/onnx/onnx/common/path.h b/third_party/onnx/onnx/common/path.h +index f71b5b12..3d69e448 100644 +--- a/third_party/onnx/onnx/common/path.h ++++ b/third_party/onnx/onnx/common/path.h +@@ -30,12 +30,23 @@ inline std::wstring utf8str_to_wstring(const std::string& utf8str) { + if (utf8str.size() > INT_MAX) { + fail_check("utf8str_to_wstring: string is too long for converting to wstring."); + } +- int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), NULL, 0); ++ int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast(utf8str.size()), NULL, 0); + std::wstring ws_str(size_required, 0); +- MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), &ws_str[0], size_required); ++ MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast(utf8str.size()), &ws_str[0], size_required); + return ws_str; + } +- ++inline std::string wstring_to_utf8str(const std::wstring& ws_str) { ++ if (ws_str.size() > INT_MAX) { ++ fail_check("wstring_to_utf8str: string is too long for converting to UTF-8."); ++ } ++ int size_required = ++ WideCharToMultiByte(CP_UTF8, 0, ws_str.c_str(), static_cast(ws_str.size()), NULL, 0, NULL, NULL); ++ std::string utf8str(size_required, 0); ++ WideCharToMultiByte( ++ CP_UTF8, 0, ws_str.c_str(), static_cast(ws_str.size()), &utf8str[0], size_required, NULL, NULL); ++ return utf8str; ++} ++ + #else + std::string path_join(const std::string& origin, const std::string& append); + // TODO: also use std::filesystem::path for clean_relative_path after ONNX has supported C++17 for POSIX +diff --git a/third_party/onnx/onnx/cpp2py_export.cc b/third_party/onnx/onnx/cpp2py_export.cc +index f6e1738e..02aabcab 100644 +--- a/third_party/onnx/onnx/cpp2py_export.cc ++++ b/third_party/onnx/onnx/cpp2py_export.cc +@@ -395,6 +395,8 @@ PYBIND11_MODULE(onnx_cpp2py_export, onnx_cpp2py_export) { + "path"_a, + "full_check"_a = false); + ++ checker.def("_resolve_external_data_location", &checker::resolve_external_data_location); ++ + // Submodule `version_converter` + auto version_converter = onnx_cpp2py_export.def_submodule("version_converter"); + version_converter.doc() = "VersionConverter submodule"; +diff --git a/third_party/onnx/onnx/external_data_helper.py b/third_party/onnx/onnx/external_data_helper.py +index 54c0f403..6763b11d 100644 +--- a/third_party/onnx/onnx/external_data_helper.py ++++ b/third_party/onnx/onnx/external_data_helper.py +@@ -5,7 +5,8 @@ import sys + import uuid + from itertools import chain + from typing import Callable, Iterable, Optional +- ++ ++import onnx.onnx_cpp2py_export.checker as c_checker + from .onnx_pb import AttributeProto, GraphProto, ModelProto, TensorProto + + +@@ -37,9 +38,9 @@ def load_external_data_for_tensor(tensor: TensorProto, base_dir: str) -> None: + base_dir: directory that contains the external data. + """ + info = ExternalDataInfo(tensor) +- file_location = _sanitize_path(info.location) +- external_data_file_path = os.path.join(base_dir, file_location) +- ++ external_data_file_path = c_checker._resolve_external_data_location( # type: ignore[attr-defined] ++ base_dir, info.location, tensor.name ++ ) + with open(external_data_file_path, "rb") as data_file: + if info.offset: + data_file.seek(info.offset) +@@ -251,14 +252,6 @@ def _get_attribute_tensors(onnx_model_proto: ModelProto) -> Iterable[TensorProto + yield from _get_attribute_tensors_from_graph(onnx_model_proto.graph) + + +-def _sanitize_path(path: str) -> str: +- """Remove path components which would allow traversing up a directory tree from a base path. +- +- Note: This method is currently very basic and should be expanded. +- """ +- return path.lstrip("/.") +- +- + def _is_valid_filename(filename: str) -> bool: + """Utility to check whether the provided filename is valid.""" + exp = re.compile('^[^<>:;,?"*|/]+$') +diff --git a/third_party/onnx/onnx/test/test_external_data.py b/third_party/onnx/onnx/test/test_external_data.py +index 5ba6b9cf..6e06312b 100644 +--- a/third_party/onnx/onnx/test/test_external_data.py ++++ b/third_party/onnx/onnx/test/test_external_data.py +@@ -1,4 +1,5 @@ + # SPDX-License-Identifier: Apache-2.0 ++import itertools + import os + import os.path as Path + import shutil +@@ -186,6 +187,52 @@ class TestLoadExternalDataSingleFile(TestLoadExternalDataBase): + attribute_tensor = new_model.graph.node[0].attribute[0].t + self.assertTrue(np.allclose(to_array(attribute_tensor), self.attribute_value)) + ++ @parameterized.parameterized.expand(itertools.product((True, False), (True, False))) ++ def test_save_external_invalid_single_file_data_and_check( ++ self, use_absolute_path: bool, use_model_path: bool ++ ) -> None: ++ model = onnx.load_model(self.model_filename, self.serialization_format) ++ ++ model_dir = os.path.join(self.temp_dir, "save_copy") ++ os.mkdir(model_dir) ++ ++ traversal_external_data_dir = os.path.join( ++ self.temp_dir, "invlid_external_data" ++ ) ++ os.mkdir(traversal_external_data_dir) ++ ++ if use_absolute_path: ++ traversal_external_data_location = os.path.join( ++ traversal_external_data_dir, "tensors.bin" ++ ) ++ else: ++ traversal_external_data_location = "../invlid_external_data/tensors.bin" ++ ++ external_data_dir = os.path.join(self.temp_dir, "external_data") ++ os.mkdir(external_data_dir) ++ new_model_filepath = os.path.join(model_dir, "model.onnx") ++ ++ def convert_model_to_external_data_no_check(model: ModelProto, location: str): ++ for tensor in model.graph.initializer: ++ if tensor.HasField("raw_data"): ++ set_external_data(tensor, location) ++ ++ convert_model_to_external_data_no_check( ++ model, ++ location=traversal_external_data_location, ++ ) ++ ++ onnx.save_model(model, new_model_filepath, self.serialization_format) ++ if use_model_path: ++ with self.assertRaises(onnx.checker.ValidationError): ++ _ = onnx.load_model(new_model_filepath, self.serialization_format) ++ else: ++ onnx_model = onnx.load_model( ++ new_model_filepath, self.serialization_format, load_external_data=False ++ ) ++ with self.assertRaises(onnx.checker.ValidationError): ++ load_external_data_for_model(onnx_model, external_data_dir) ++ + + class TestSaveAllTensorsAsExternalData(TestLoadExternalDataBase): + def setUp(self) -> None: +-- +2.25.1 diff --git a/SPECS/pytorch/pytorch.spec b/SPECS/pytorch/pytorch.spec index cf4db56fd85..8fbd4d3fa73 100644 --- a/SPECS/pytorch/pytorch.spec +++ b/SPECS/pytorch/pytorch.spec @@ -2,7 +2,7 @@ Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration. Name: pytorch Version: 2.2.2 -Release: 1%{?dist} +Release: 2%{?dist} License: BSD-3-Clause Vendor: Microsoft Corporation Distribution: Azure Linux @@ -24,6 +24,8 @@ BuildRequires: python3-setuptools BuildRequires: python3-typing-extensions BuildRequires: python3-six +Patch1: CVE-2024-27318.patch + %description PyTorch is a Python package that provides two high-level features: - Tensor computation (like NumPy) with strong GPU acceleration @@ -56,7 +58,7 @@ PyTorch is a Python package that provides two high-level features: You can reuse your favorite Python packages such as NumPy, SciPy and Cython to extend PyTorch when needed. %prep -%autosetup -a 1 -n %{name}-v%{version} +%autosetup -a 1 -p 1 -n %{name}-v%{version} %build export USE_CUDA=0 @@ -84,6 +86,9 @@ cp -arf docs %{buildroot}/%{_pkgdocdir} %{_docdir}/* %changelog +* Tue Sep 17 2024 Archana Choudhary - 2.2.2-2 +- patch for CVE-2024-27318 + * Tue Apr 02 2024 Riken Maharjan - 2.2.2-1 - Upgrade to pytorch 2.2.2