-
Notifications
You must be signed in to change notification settings - Fork 522
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pytorch: add patch for CVE-2024-27318
- Loading branch information
Showing
2 changed files
with
383 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,376 @@ | ||
Modified patch to apply to vendored onnx sources | ||
Modified by: [email protected] | ||
|
||
From 66b7fb630903fdcf3e83b6b6d56d82e904264a20 Mon Sep 17 00:00:00 2001 | ||
From: liqun Fu <[email protected]> | ||
Date: Mon, 19 Feb 2024 11:12:40 -0800 | ||
Subject: [PATCH] Fix path sanitization bypass leading to arbitrary read | ||
(#5917) | ||
|
||
Signed-off-by: liqunfu <[email protected]> | ||
Signed-off-by: liqun Fu <[email protected]> | ||
Co-authored-by: Justin Chu <[email protected]> | ||
--- | ||
third_party/onnx/onnx/checker.cc | 163 ++++++++++-------- | ||
third_party/onnx/onnx/checker.h | 5 +- | ||
third_party/onnx/onnx/common/path.h | 17 +- | ||
third_party/onnx/onnx/cpp2py_export.cc | 2 + | ||
third_party/onnx/onnx/external_data_helper.py | 17 +- | ||
.../onnx/onnx/test/test_external_data.py | 47 +++++ | ||
6 files changed, 160 insertions(+), 91 deletions(-) | ||
|
||
diff --git a/third_party/onnx/onnx/checker.cc b/third_party/onnx/onnx/checker.cc | ||
index 0c81c87e..38a068dd 100644 | ||
--- a/third_party/onnx/onnx/checker.cc | ||
+++ b/third_party/onnx/onnx/checker.cc | ||
@@ -4,7 +4,6 @@ | ||
|
||
#include "onnx/checker.h" | ||
#include "onnx/common/file_utils.h" | ||
-#include "onnx/common/path.h" | ||
#include "onnx/defs/schema.h" | ||
#include "onnx/defs/tensor_proto_util.h" | ||
#include "onnx/proto_utils.h" | ||
@@ -129,80 +128,7 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) { | ||
for (const StringStringEntryProto& entry : tensor.external_data()) { | ||
if (entry.has_key() && entry.has_value() && entry.key() == "location") { | ||
has_location = true; | ||
-#ifdef _WIN32 | ||
- auto file_path = std::filesystem::path(utf8str_to_wstring(entry.value())); | ||
- if (file_path.is_absolute()) { | ||
- fail_check( | ||
- "Location of external TensorProto ( tensor name: ", | ||
- tensor.name(), | ||
- ") should be a relative path, but it is an absolute path: ", | ||
- entry.value()); | ||
- } | ||
- auto relative_path = file_path.lexically_normal().make_preferred().wstring(); | ||
- // Check that normalized relative path contains ".." on Windows. | ||
- if (relative_path.find(L"..", 0) != std::string::npos) { | ||
- fail_check( | ||
- "Data of TensorProto ( tensor name: ", | ||
- tensor.name(), | ||
- ") should be file inside the ", | ||
- ctx.get_model_dir(), | ||
- ", but the '", | ||
- entry.value(), | ||
- "' points outside the directory"); | ||
- } | ||
- std::wstring data_path = path_join(utf8str_to_wstring(ctx.get_model_dir()), relative_path); | ||
- struct _stat buff; | ||
- if (_wstat(data_path.c_str(), &buff) != 0) { | ||
- fail_check( | ||
- "Data of TensorProto ( tensor name: ", | ||
- tensor.name(), | ||
- ") should be stored in ", | ||
- entry.value(), | ||
- ", but it doesn't exist or is not accessible."); | ||
- } | ||
-#else // POSIX | ||
- if (entry.value().empty()) { | ||
- fail_check("Location of external TensorProto ( tensor name: ", tensor.name(), ") should not be empty."); | ||
- } else if (entry.value()[0] == '/') { | ||
- fail_check( | ||
- "Location of external TensorProto ( tensor name: ", | ||
- tensor.name(), | ||
- ") should be a relative path, but it is an absolute path: ", | ||
- entry.value()); | ||
- } | ||
- std::string relative_path = clean_relative_path(entry.value()); | ||
- // Check that normalized relative path contains ".." on POSIX | ||
- if (relative_path.find("..", 0) != std::string::npos) { | ||
- fail_check( | ||
- "Data of TensorProto ( tensor name: ", | ||
- tensor.name(), | ||
- ") should be file inside the ", | ||
- ctx.get_model_dir(), | ||
- ", but the '", | ||
- entry.value(), | ||
- "' points outside the directory"); | ||
- } | ||
- std::string data_path = path_join(ctx.get_model_dir(), relative_path); | ||
- // use stat to check whether the file exists | ||
- struct stat buffer; | ||
- if (stat((data_path).c_str(), &buffer) != 0) { | ||
- fail_check( | ||
- "Data of TensorProto ( tensor name: ", | ||
- tensor.name(), | ||
- ") should be stored in ", | ||
- data_path, | ||
- ", but it doesn't exist or is not accessible."); | ||
- } | ||
- // Do not allow symlinks or directories. | ||
- if (!S_ISREG(buffer.st_mode)) { | ||
- fail_check( | ||
- "Data of TensorProto ( tensor name: ", | ||
- tensor.name(), | ||
- ") should be stored in ", | ||
- data_path, | ||
- ", but it is not regular file."); | ||
- } | ||
-#endif | ||
+ resolve_external_data_location(ctx.get_model_dir(), entry.value(), tensor.name()); | ||
} | ||
} | ||
if (!has_location) { | ||
@@ -1028,6 +954,93 @@ void check_model(const ModelProto& model, bool full_check) { | ||
} | ||
} | ||
|
||
+std::string resolve_external_data_location( | ||
+ const std::string& base_dir, | ||
+ const std::string& location, | ||
+ const std::string& tensor_name) { | ||
+#ifdef _WIN32 | ||
+ auto file_path = std::filesystem::path(utf8str_to_wstring(location)); | ||
+ if (file_path.is_absolute()) { | ||
+ fail_check( | ||
+ "Location of external TensorProto ( tensor name: ", | ||
+ tensor_name, | ||
+ ") should be a relative path, but it is an absolute path: ", | ||
+ location); | ||
+ } | ||
+ auto relative_path = file_path.lexically_normal().make_preferred().wstring(); | ||
+ // Check that normalized relative path contains ".." on Windows. | ||
+ if (relative_path.find(L"..", 0) != std::string::npos) { | ||
+ fail_check( | ||
+ "Data of TensorProto ( tensor name: ", | ||
+ tensor_name, | ||
+ ") should be file inside the ", | ||
+ base_dir, | ||
+ ", but the '", | ||
+ location, | ||
+ "' points outside the directory"); | ||
+ } | ||
+ std::wstring data_path = path_join(utf8str_to_wstring(base_dir), relative_path); | ||
+ struct _stat64 buff; | ||
+ if (data_path.empty() || (data_path[0] != '#' && _wstat64(data_path.c_str(), &buff) != 0)) { | ||
+ fail_check( | ||
+ "Data of TensorProto ( tensor name: ", | ||
+ tensor_name, | ||
+ ") should be stored in ", | ||
+ location, | ||
+ ", but it doesn't exist or is not accessible."); | ||
+ } | ||
+ return wstring_to_utf8str(data_path); | ||
+#else // POSIX | ||
+ if (location.empty()) { | ||
+ fail_check("Location of external TensorProto ( tensor name: ", tensor_name, ") should not be empty."); | ||
+ } else if (location[0] == '/') { | ||
+ fail_check( | ||
+ "Location of external TensorProto ( tensor name: ", | ||
+ tensor_name, | ||
+ ") should be a relative path, but it is an absolute path: ", | ||
+ location); | ||
+ } | ||
+ std::string relative_path = clean_relative_path(location); | ||
+ // Check that normalized relative path contains ".." on POSIX | ||
+ if (relative_path.find("..", 0) != std::string::npos) { | ||
+ fail_check( | ||
+ "Data of TensorProto ( tensor name: ", | ||
+ tensor_name, | ||
+ ") should be file inside the ", | ||
+ base_dir, | ||
+ ", but the '", | ||
+ location, | ||
+ "' points outside the directory"); | ||
+ } | ||
+ std::string data_path = path_join(base_dir, relative_path); | ||
+ // use stat64 to check whether the file exists | ||
+#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__) | ||
+ struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64 | ||
+ if (data_path.empty() || (data_path[0] != '#' && stat((data_path).c_str(), &buffer) != 0)) { | ||
+#else | ||
+ struct stat64 buffer; // All POSIX under glibc except APPLE and wasm have stat64 | ||
+ if (data_path.empty() || (data_path[0] != '#' && stat64((data_path).c_str(), &buffer) != 0)) { | ||
+#endif | ||
+ fail_check( | ||
+ "Data of TensorProto ( tensor name: ", | ||
+ tensor_name, | ||
+ ") should be stored in ", | ||
+ data_path, | ||
+ ", but it doesn't exist or is not accessible."); | ||
+ } | ||
+ // Do not allow symlinks or directories. | ||
+ if (data_path.empty() || (data_path[0] != '#' && !S_ISREG(buffer.st_mode))) { | ||
+ fail_check( | ||
+ "Data of TensorProto ( tensor name: ", | ||
+ tensor_name, | ||
+ ") should be stored in ", | ||
+ data_path, | ||
+ ", but it is not regular file."); | ||
+ } | ||
+ return data_path; | ||
+#endif | ||
+} | ||
+ | ||
std::set<std::string> experimental_ops = { | ||
"ATen", | ||
"Affine", | ||
diff --git a/third_party/onnx/onnx/checker.h b/third_party/onnx/onnx/checker.h | ||
index 05eeaad0..50381aae 100644 | ||
--- a/third_party/onnx/onnx/checker.h | ||
+++ b/third_party/onnx/onnx/checker.h | ||
@@ -148,7 +148,10 @@ void check_model_local_functions( | ||
|
||
void check_model(const ModelProto& model, bool full_check = false); | ||
void check_model(const std::string& model_path, bool full_check = false); | ||
- | ||
+std::string resolve_external_data_location( | ||
+ const std::string& base_dir, | ||
+ const std::string& location, | ||
+ const std::string& tensor_name); | ||
bool check_is_experimental_op(const NodeProto& node); | ||
|
||
} // namespace checker | ||
diff --git a/third_party/onnx/onnx/common/path.h b/third_party/onnx/onnx/common/path.h | ||
index f71b5b12..3d69e448 100644 | ||
--- a/third_party/onnx/onnx/common/path.h | ||
+++ b/third_party/onnx/onnx/common/path.h | ||
@@ -30,12 +30,23 @@ inline std::wstring utf8str_to_wstring(const std::string& utf8str) { | ||
if (utf8str.size() > INT_MAX) { | ||
fail_check("utf8str_to_wstring: string is too long for converting to wstring."); | ||
} | ||
- int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), NULL, 0); | ||
+ int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast<int>(utf8str.size()), NULL, 0); | ||
std::wstring ws_str(size_required, 0); | ||
- MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), &ws_str[0], size_required); | ||
+ MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast<int>(utf8str.size()), &ws_str[0], size_required); | ||
return ws_str; | ||
} | ||
- | ||
+inline std::string wstring_to_utf8str(const std::wstring& ws_str) { | ||
+ if (ws_str.size() > INT_MAX) { | ||
+ fail_check("wstring_to_utf8str: string is too long for converting to UTF-8."); | ||
+ } | ||
+ int size_required = | ||
+ WideCharToMultiByte(CP_UTF8, 0, ws_str.c_str(), static_cast<int>(ws_str.size()), NULL, 0, NULL, NULL); | ||
+ std::string utf8str(size_required, 0); | ||
+ WideCharToMultiByte( | ||
+ CP_UTF8, 0, ws_str.c_str(), static_cast<int>(ws_str.size()), &utf8str[0], size_required, NULL, NULL); | ||
+ return utf8str; | ||
+} | ||
+ | ||
#else | ||
std::string path_join(const std::string& origin, const std::string& append); | ||
// TODO: also use std::filesystem::path for clean_relative_path after ONNX has supported C++17 for POSIX | ||
diff --git a/third_party/onnx/onnx/cpp2py_export.cc b/third_party/onnx/onnx/cpp2py_export.cc | ||
index f6e1738e..02aabcab 100644 | ||
--- a/third_party/onnx/onnx/cpp2py_export.cc | ||
+++ b/third_party/onnx/onnx/cpp2py_export.cc | ||
@@ -395,6 +395,8 @@ PYBIND11_MODULE(onnx_cpp2py_export, onnx_cpp2py_export) { | ||
"path"_a, | ||
"full_check"_a = false); | ||
|
||
+ checker.def("_resolve_external_data_location", &checker::resolve_external_data_location); | ||
+ | ||
// Submodule `version_converter` | ||
auto version_converter = onnx_cpp2py_export.def_submodule("version_converter"); | ||
version_converter.doc() = "VersionConverter submodule"; | ||
diff --git a/third_party/onnx/onnx/external_data_helper.py b/third_party/onnx/onnx/external_data_helper.py | ||
index 54c0f403..6763b11d 100644 | ||
--- a/third_party/onnx/onnx/external_data_helper.py | ||
+++ b/third_party/onnx/onnx/external_data_helper.py | ||
@@ -5,7 +5,8 @@ import sys | ||
import uuid | ||
from itertools import chain | ||
from typing import Callable, Iterable, Optional | ||
- | ||
+ | ||
+import onnx.onnx_cpp2py_export.checker as c_checker | ||
from .onnx_pb import AttributeProto, GraphProto, ModelProto, TensorProto | ||
|
||
|
||
@@ -37,9 +38,9 @@ def load_external_data_for_tensor(tensor: TensorProto, base_dir: str) -> None: | ||
base_dir: directory that contains the external data. | ||
""" | ||
info = ExternalDataInfo(tensor) | ||
- file_location = _sanitize_path(info.location) | ||
- external_data_file_path = os.path.join(base_dir, file_location) | ||
- | ||
+ external_data_file_path = c_checker._resolve_external_data_location( # type: ignore[attr-defined] | ||
+ base_dir, info.location, tensor.name | ||
+ ) | ||
with open(external_data_file_path, "rb") as data_file: | ||
if info.offset: | ||
data_file.seek(info.offset) | ||
@@ -251,14 +252,6 @@ def _get_attribute_tensors(onnx_model_proto: ModelProto) -> Iterable[TensorProto | ||
yield from _get_attribute_tensors_from_graph(onnx_model_proto.graph) | ||
|
||
|
||
-def _sanitize_path(path: str) -> str: | ||
- """Remove path components which would allow traversing up a directory tree from a base path. | ||
- | ||
- Note: This method is currently very basic and should be expanded. | ||
- """ | ||
- return path.lstrip("/.") | ||
- | ||
- | ||
def _is_valid_filename(filename: str) -> bool: | ||
"""Utility to check whether the provided filename is valid.""" | ||
exp = re.compile('^[^<>:;,?"*|/]+$') | ||
diff --git a/third_party/onnx/onnx/test/test_external_data.py b/third_party/onnx/onnx/test/test_external_data.py | ||
index 5ba6b9cf..6e06312b 100644 | ||
--- a/third_party/onnx/onnx/test/test_external_data.py | ||
+++ b/third_party/onnx/onnx/test/test_external_data.py | ||
@@ -1,4 +1,5 @@ | ||
# SPDX-License-Identifier: Apache-2.0 | ||
+import itertools | ||
import os | ||
import os.path as Path | ||
import shutil | ||
@@ -186,6 +187,52 @@ class TestLoadExternalDataSingleFile(TestLoadExternalDataBase): | ||
attribute_tensor = new_model.graph.node[0].attribute[0].t | ||
self.assertTrue(np.allclose(to_array(attribute_tensor), self.attribute_value)) | ||
|
||
+ @parameterized.parameterized.expand(itertools.product((True, False), (True, False))) | ||
+ def test_save_external_invalid_single_file_data_and_check( | ||
+ self, use_absolute_path: bool, use_model_path: bool | ||
+ ) -> None: | ||
+ model = onnx.load_model(self.model_filename, self.serialization_format) | ||
+ | ||
+ model_dir = os.path.join(self.temp_dir, "save_copy") | ||
+ os.mkdir(model_dir) | ||
+ | ||
+ traversal_external_data_dir = os.path.join( | ||
+ self.temp_dir, "invlid_external_data" | ||
+ ) | ||
+ os.mkdir(traversal_external_data_dir) | ||
+ | ||
+ if use_absolute_path: | ||
+ traversal_external_data_location = os.path.join( | ||
+ traversal_external_data_dir, "tensors.bin" | ||
+ ) | ||
+ else: | ||
+ traversal_external_data_location = "../invlid_external_data/tensors.bin" | ||
+ | ||
+ external_data_dir = os.path.join(self.temp_dir, "external_data") | ||
+ os.mkdir(external_data_dir) | ||
+ new_model_filepath = os.path.join(model_dir, "model.onnx") | ||
+ | ||
+ def convert_model_to_external_data_no_check(model: ModelProto, location: str): | ||
+ for tensor in model.graph.initializer: | ||
+ if tensor.HasField("raw_data"): | ||
+ set_external_data(tensor, location) | ||
+ | ||
+ convert_model_to_external_data_no_check( | ||
+ model, | ||
+ location=traversal_external_data_location, | ||
+ ) | ||
+ | ||
+ onnx.save_model(model, new_model_filepath, self.serialization_format) | ||
+ if use_model_path: | ||
+ with self.assertRaises(onnx.checker.ValidationError): | ||
+ _ = onnx.load_model(new_model_filepath, self.serialization_format) | ||
+ else: | ||
+ onnx_model = onnx.load_model( | ||
+ new_model_filepath, self.serialization_format, load_external_data=False | ||
+ ) | ||
+ with self.assertRaises(onnx.checker.ValidationError): | ||
+ load_external_data_for_model(onnx_model, external_data_dir) | ||
+ | ||
|
||
class TestSaveAllTensorsAsExternalData(TestLoadExternalDataBase): | ||
def setUp(self) -> None: | ||
-- | ||
2.25.1 |
Oops, something went wrong.