From 6afd1d5386e8cbcb95d6dcae8ad3df7bc4bbc255 Mon Sep 17 00:00:00 2001 From: Archana Choudhary Date: Tue, 17 Sep 2024 12:16:35 +0000 Subject: [PATCH] pytorch: add patch for CVE-2024-27318, CVE-2022-1941 --- SPECS/pytorch/CVE-2022-1941.patch | 352 +++++++++++++++++++++++++++ SPECS/pytorch/CVE-2024-27318.patch | 375 +++++++++++++++++++++++++++++ SPECS/pytorch/pytorch.spec | 10 +- 3 files changed, 735 insertions(+), 2 deletions(-) create mode 100644 SPECS/pytorch/CVE-2022-1941.patch create mode 100644 SPECS/pytorch/CVE-2024-27318.patch diff --git a/SPECS/pytorch/CVE-2022-1941.patch b/SPECS/pytorch/CVE-2022-1941.patch new file mode 100644 index 00000000000..cd961581a22 --- /dev/null +++ b/SPECS/pytorch/CVE-2022-1941.patch @@ -0,0 +1,352 @@ +# Patch generated by Archana Choudhary +# Source: https://github.com/protocolbuffers/protobuf/commit/55815e423bb82cc828836bbd60c79c1f9a195763 + +diff --color -ruN a/third_party/protobuf/src/google/protobuf/extension_set_inl.h b/third_party/protobuf/src/google/protobuf/extension_set_inl.h +--- a/third_party/protobuf/src/google/protobuf/extension_set_inl.h 2024-03-27 22:28:55.000000000 +0000 ++++ b/third_party/protobuf/src/google/protobuf/extension_set_inl.h 2024-09-18 11:49:16.390834276 +0000 +@@ -206,16 +206,21 @@ + const char* ptr, const Msg* containing_type, + internal::InternalMetadata* metadata, internal::ParseContext* ctx) { + std::string payload; +- uint32 type_id = 0; +- bool payload_read = false; ++ uint32 type_id; ++ enum class State { kNoTag, kHasType, kHasPayload, kDone }; ++ State state = State::kNoTag; ++ + while (!ctx->Done(&ptr)) { + uint32 tag = static_cast(*ptr++); + if (tag == WireFormatLite::kMessageSetTypeIdTag) { + uint64 tmp; + ptr = ParseBigVarint(ptr, &tmp); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); +- type_id = tmp; +- if (payload_read) { ++ if (state == State::kNoTag) { ++ type_id = tmp; ++ state = State::kHasType; ++ } else if (state == State::kHasPayload) { ++ type_id = tmp; + ExtensionInfo extension; + bool was_packed_on_wire; + if (!FindExtension(2, type_id, containing_type, ctx, &extension, +@@ -241,20 +246,24 @@ + GOOGLE_PROTOBUF_PARSER_ASSERT(value->_InternalParse(p, &tmp_ctx) && + tmp_ctx.EndedAtLimit()); + } +- type_id = 0; ++ state = State::kDone; + } + } else if (tag == WireFormatLite::kMessageSetMessageTag) { +- if (type_id != 0) { ++ if (state == State::kHasType) { + ptr = ParseFieldMaybeLazily(static_cast(type_id) * 8 + 2, ptr, + containing_type, metadata, ctx); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr); +- type_id = 0; ++ state = State::kDone; + } else { ++ std::string tmp; + int32 size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); +- ptr = ctx->ReadString(ptr, size, &payload); ++ ptr = ctx->ReadString(ptr, size, &tmp); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); +- payload_read = true; ++ if (state == State::kNoTag) { ++ payload = std::move(tmp); ++ state = State::kHasPayload; ++ } + } + } else { + ptr = ReadTag(ptr - 1, &tag); +diff --color -ruN a/third_party/protobuf/src/google/protobuf/wire_format.cc b/third_party/protobuf/src/google/protobuf/wire_format.cc +--- a/third_party/protobuf/src/google/protobuf/wire_format.cc 2024-03-27 22:28:55.000000000 +0000 ++++ b/third_party/protobuf/src/google/protobuf/wire_format.cc 2024-09-18 11:49:16.390834276 +0000 +@@ -659,9 +659,11 @@ + const char* _InternalParse(const char* ptr, internal::ParseContext* ctx) { + // Parse a MessageSetItem + auto metadata = reflection->MutableInternalMetadata(msg); ++ enum class State { kNoTag, kHasType, kHasPayload, kDone }; ++ State state = State::kNoTag; ++ + std::string payload; + uint32 type_id = 0; +- bool payload_read = false; + while (!ctx->Done(&ptr)) { + // We use 64 bit tags in order to allow typeid's that span the whole + // range of 32 bit numbers. +@@ -670,8 +672,11 @@ + uint64 tmp; + ptr = ParseBigVarint(ptr, &tmp); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); +- type_id = tmp; +- if (payload_read) { ++ if (state == State::kNoTag) { ++ type_id = tmp; ++ state = State::kHasType; ++ } else if (state == State::kHasPayload) { ++ type_id = tmp; + const FieldDescriptor* field; + if (ctx->data().pool == nullptr) { + field = reflection->FindKnownExtensionByNumber(type_id); +@@ -698,17 +703,17 @@ + GOOGLE_PROTOBUF_PARSER_ASSERT(value->_InternalParse(p, &tmp_ctx) && + tmp_ctx.EndedAtLimit()); + } +- type_id = 0; ++ state = State::kDone; + } + continue; + } else if (tag == WireFormatLite::kMessageSetMessageTag) { +- if (type_id == 0) { ++ if (state == State::kNoTag) { + int32 size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + ptr = ctx->ReadString(ptr, size, &payload); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); +- payload_read = true; +- } else { ++ state = State::kHasPayload; ++ } else if (state == State::kHasType) { + // We're now parsing the payload + const FieldDescriptor* field = nullptr; + if (descriptor->IsExtensionNumber(type_id)) { +@@ -722,7 +727,12 @@ + ptr = WireFormat::_InternalParseAndMergeField( + msg, ptr, ctx, static_cast(type_id) * 8 + 2, reflection, + field); +- type_id = 0; ++ state = State::kDone; ++ } else { ++ int32 size = ReadSize(&ptr); ++ GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); ++ ptr = ctx->Skip(ptr, size); ++ GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + } + } else { + // An unknown field in MessageSetItem. +diff --color -ruN a/third_party/protobuf/src/google/protobuf/wire_format_lite.h b/third_party/protobuf/src/google/protobuf/wire_format_lite.h +--- a/third_party/protobuf/src/google/protobuf/wire_format_lite.h 2024-03-27 22:28:55.000000000 +0000 ++++ b/third_party/protobuf/src/google/protobuf/wire_format_lite.h 2024-09-18 11:49:16.390834276 +0000 +@@ -1798,6 +1798,9 @@ + // we can parse it later. + std::string message_data; + ++ enum class State { kNoTag, kHasType, kHasPayload, kDone }; ++ State state = State::kNoTag; ++ + while (true) { + const uint32 tag = input->ReadTagNoLastTag(); + if (tag == 0) return false; +@@ -1806,26 +1809,34 @@ + case WireFormatLite::kMessageSetTypeIdTag: { + uint32 type_id; + if (!input->ReadVarint32(&type_id)) return false; +- last_type_id = type_id; +- +- if (!message_data.empty()) { ++ if (state == State::kNoTag) { ++ last_type_id = type_id; ++ state = State::kHasType; ++ } else if (state == State::kHasPayload) { + // We saw some message data before the type_id. Have to parse it + // now. + io::CodedInputStream sub_input( + reinterpret_cast(message_data.data()), + static_cast(message_data.size())); + sub_input.SetRecursionLimit(input->RecursionBudget()); +- if (!ms.ParseField(last_type_id, &sub_input)) { ++ if (!ms.ParseField(type_id, &sub_input)) { + return false; + } + message_data.clear(); ++ state = State::kDone; + } + + break; + } + + case WireFormatLite::kMessageSetMessageTag: { +- if (last_type_id == 0) { ++ if (state == State::kHasType) { ++ // Already saw type_id, so we can parse this directly. ++ if (!ms.ParseField(last_type_id, input)) { ++ return false; ++ } ++ state = State::kDone; ++ } else if (state == State::kNoTag) { + // We haven't seen a type_id yet. Append this data to message_data. + uint32 length; + if (!input->ReadVarint32(&length)) return false; +@@ -1836,11 +1847,9 @@ + auto ptr = reinterpret_cast(&message_data[0]); + ptr = io::CodedOutputStream::WriteVarint32ToArray(length, ptr); + if (!input->ReadRaw(ptr, length)) return false; ++ state = State::kHasPayload; + } else { +- // Already saw type_id, so we can parse this directly. +- if (!ms.ParseField(last_type_id, input)) { +- return false; +- } ++ if (!ms.SkipField(tag, input)) return false; + } + + break; +diff --color -ruN a/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc b/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc +--- a/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc 2024-03-27 22:28:55.000000000 +0000 ++++ b/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc 2024-09-18 11:49:16.394834273 +0000 +@@ -47,6 +47,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -585,30 +586,56 @@ + EXPECT_EQ(message_set.DebugString(), dynamic_message_set.DebugString()); + } + +-TEST(WireFormatTest, ParseMessageSetWithReverseTagOrder) { ++namespace { ++std::string BuildMessageSetItemStart() { + std::string data; + { +- unittest::TestMessageSetExtension1 message; +- message.set_i(123); +- // Build a MessageSet manually with its message content put before its +- // type_id. + io::StringOutputStream output_stream(&data); + io::CodedOutputStream coded_output(&output_stream); + coded_output.WriteTag(WireFormatLite::kMessageSetItemStartTag); ++ } ++ return data; ++} ++std::string BuildMessageSetItemEnd() { ++ std::string data; ++ { ++ io::StringOutputStream output_stream(&data); ++ io::CodedOutputStream coded_output(&output_stream); ++ coded_output.WriteTag(WireFormatLite::kMessageSetItemEndTag); ++ } ++ return data; ++} ++std::string BuildMessageSetTestExtension1(int value = 123) { ++ std::string data; ++ { ++ unittest::TestMessageSetExtension1 message; ++ message.set_i(value); ++ io::StringOutputStream output_stream(&data); ++ io::CodedOutputStream coded_output(&output_stream); + // Write the message content first. + WireFormatLite::WriteTag(WireFormatLite::kMessageSetMessageNumber, + WireFormatLite::WIRETYPE_LENGTH_DELIMITED, + &coded_output); + coded_output.WriteVarint32(message.ByteSizeLong()); + message.SerializeWithCachedSizes(&coded_output); +- // Write the type id. +- uint32 type_id = message.GetDescriptor()->extension(0)->number(); ++ } ++ return data; ++} ++std::string BuildMessageSetItemTypeId(int extension_number) { ++ std::string data; ++ { ++ io::StringOutputStream output_stream(&data); ++ io::CodedOutputStream coded_output(&output_stream); + WireFormatLite::WriteUInt32(WireFormatLite::kMessageSetTypeIdNumber, +- type_id, &coded_output); +- coded_output.WriteTag(WireFormatLite::kMessageSetItemEndTag); ++ extension_number, &coded_output); + } ++ return data; ++} ++void ValidateTestMessageSet(const std::string& test_case, ++ const std::string& data) { ++ SCOPED_TRACE(test_case); + { +- proto2_wireformat_unittest::TestMessageSet message_set; ++ ::proto2_wireformat_unittest::TestMessageSet message_set; + ASSERT_TRUE(message_set.ParseFromString(data)); + + EXPECT_EQ(123, +@@ -616,10 +643,15 @@ + .GetExtension( + unittest::TestMessageSetExtension1::message_set_extension) + .i()); ++ ++ // Make sure it does not contain anything else. ++ message_set.ClearExtension( ++ unittest::TestMessageSetExtension1::message_set_extension); ++ EXPECT_EQ(message_set.SerializeAsString(), ""); + } + { + // Test parse the message via Reflection. +- proto2_wireformat_unittest::TestMessageSet message_set; ++ ::proto2_wireformat_unittest::TestMessageSet message_set; + io::CodedInputStream input(reinterpret_cast(data.data()), + data.size()); + EXPECT_TRUE(WireFormat::ParseAndMergePartial(&input, &message_set)); +@@ -631,6 +663,61 @@ + unittest::TestMessageSetExtension1::message_set_extension) + .i()); + } ++ { ++ // Test parse the message via DynamicMessage. ++ DynamicMessageFactory factory; ++ std::unique_ptr msg( ++ factory ++ .GetPrototype( ++ ::proto2_wireformat_unittest::TestMessageSet::descriptor()) ++ ->New()); ++ msg->ParseFromString(data); ++ auto* reflection = msg->GetReflection(); ++ std::vector fields; ++ reflection->ListFields(*msg, &fields); ++ ASSERT_EQ(fields.size(), 1); ++ const auto& sub = reflection->GetMessage(*msg, fields[0]); ++ reflection = sub.GetReflection(); ++ EXPECT_EQ(123, reflection->GetInt32( ++ sub, sub.GetDescriptor()->FindFieldByName("i"))); ++ } ++} ++} // namespace ++ ++TEST(WireFormatTest, ParseMessageSetWithAnyTagOrder) { ++ std::string start = BuildMessageSetItemStart(); ++ std::string end = BuildMessageSetItemEnd(); ++ std::string id = BuildMessageSetItemTypeId( ++ unittest::TestMessageSetExtension1::descriptor()->extension(0)->number()); ++ std::string message = BuildMessageSetTestExtension1(); ++ ++ ValidateTestMessageSet("id + message", start + id + message + end); ++ ValidateTestMessageSet("message + id", start + message + id + end); ++} ++ ++TEST(WireFormatTest, ParseMessageSetWithDuplicateTags) { ++ std::string start = BuildMessageSetItemStart(); ++ std::string end = BuildMessageSetItemEnd(); ++ std::string id = BuildMessageSetItemTypeId( ++ unittest::TestMessageSetExtension1::descriptor()->extension(0)->number()); ++ std::string other_id = BuildMessageSetItemTypeId(123456); ++ std::string message = BuildMessageSetTestExtension1(); ++ std::string other_message = BuildMessageSetTestExtension1(321); ++ ++ // Double id ++ ValidateTestMessageSet("id + other_id + message", ++ start + id + other_id + message + end); ++ ValidateTestMessageSet("id + message + other_id", ++ start + id + message + other_id + end); ++ ValidateTestMessageSet("message + id + other_id", ++ start + message + id + other_id + end); ++ // Double message ++ ValidateTestMessageSet("id + message + other_message", ++ start + id + message + other_message + end); ++ ValidateTestMessageSet("message + id + other_message", ++ start + message + id + other_message + end); ++ ValidateTestMessageSet("message + other_message + id", ++ start + message + other_message + id + end); + } + + void SerializeReverseOrder( diff --git a/SPECS/pytorch/CVE-2024-27318.patch b/SPECS/pytorch/CVE-2024-27318.patch new file mode 100644 index 00000000000..40c1c7a7bff --- /dev/null +++ b/SPECS/pytorch/CVE-2024-27318.patch @@ -0,0 +1,375 @@ +From 4458baf0be43d07acc2adab99d48689f78ff1fe1 Mon Sep 17 00:00:00 2001 +From: liqun Fu +Date: Mon, 19 Feb 2024 11:12:40 -0800 +Subject: [PATCH] Fix path sanitization bypass leading to arbitrary read + (#5917) + +Signed-off-by: liqunfu +Signed-off-by: liqun Fu +Co-authored-by: Justin Chu +(cherry picked from commit 66b7fb630903fdcf3e83b6b6d56d82e904264a20) +--- + onnx/checker.cc | 168 +++++++++++++++++--------------- + onnx/checker.h | 5 +- + onnx/common/path.h | 15 ++- + onnx/cpp2py_export.cc | 2 + + onnx/external_data_helper.py | 15 +-- + onnx/test/test_external_data.py | 47 +++++++++ + 6 files changed, 158 insertions(+), 94 deletions(-) + +diff --git a/third_party/onnx/onnx/checker.cc b/third_party/onnx/onnx/checker.cc +index fac56f5655f..66716e97f92 100644 +--- a/third_party/onnx/onnx/checker.cc ++++ b/third_party/onnx/onnx/checker.cc +@@ -13,7 +13,6 @@ + #include + + #include "onnx/common/file_utils.h" +-#include "onnx/common/path.h" + #include "onnx/defs/schema.h" + #include "onnx/defs/tensor_proto_util.h" + #include "onnx/proto_utils.h" +@@ -135,85 +134,7 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) { + for (const StringStringEntryProto& entry : tensor.external_data()) { + if (entry.has_key() && entry.has_value() && entry.key() == "location") { + has_location = true; +-#ifdef _WIN32 +- auto file_path = std::filesystem::path(utf8str_to_wstring(entry.value())); +- if (file_path.is_absolute()) { +- fail_check( +- "Location of external TensorProto ( tensor name: ", +- tensor.name(), +- ") should be a relative path, but it is an absolute path: ", +- entry.value()); +- } +- auto relative_path = file_path.lexically_normal().make_preferred().wstring(); +- // Check that normalized relative path contains ".." on Windows. +- if (relative_path.find(L"..", 0) != std::string::npos) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be file inside the ", +- ctx.get_model_dir(), +- ", but the '", +- entry.value(), +- "' points outside the directory"); +- } +- std::wstring data_path = path_join(utf8str_to_wstring(ctx.get_model_dir()), relative_path); +- struct _stat64 buff; +- if (_wstat64(data_path.c_str(), &buff) != 0) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be stored in ", +- entry.value(), +- ", but it doesn't exist or is not accessible."); +- } +-#else // POSIX +- if (entry.value().empty()) { +- fail_check("Location of external TensorProto ( tensor name: ", tensor.name(), ") should not be empty."); +- } else if (entry.value()[0] == '/') { +- fail_check( +- "Location of external TensorProto ( tensor name: ", +- tensor.name(), +- ") should be a relative path, but it is an absolute path: ", +- entry.value()); +- } +- std::string relative_path = clean_relative_path(entry.value()); +- // Check that normalized relative path contains ".." on POSIX +- if (relative_path.find("..", 0) != std::string::npos) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be file inside the ", +- ctx.get_model_dir(), +- ", but the '", +- entry.value(), +- "' points outside the directory"); +- } +- std::string data_path = path_join(ctx.get_model_dir(), relative_path); +- // use stat64 to check whether the file exists +-#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__) +- struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64 +- if (stat((data_path).c_str(), &buffer) != 0) { +-#else +- struct stat64 buffer; // All POSIX under glibc except APPLE and wasm have stat64 +- if (stat64((data_path).c_str(), &buffer) != 0) { +-#endif +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be stored in ", +- data_path, +- ", but it doesn't exist or is not accessible."); +- } +- // Do not allow symlinks or directories. +- if (!S_ISREG(buffer.st_mode)) { +- fail_check( +- "Data of TensorProto ( tensor name: ", +- tensor.name(), +- ") should be stored in ", +- data_path, +- ", but it is not regular file."); +- } +-#endif ++ resolve_external_data_location(ctx.get_model_dir(), entry.value(), tensor.name()); + } + } + if (!has_location) { +@@ -1054,6 +975,93 @@ void check_model(const ModelProto& model, bool full_check, bool skip_opset_compa + } + } + ++std::string resolve_external_data_location( ++ const std::string& base_dir, ++ const std::string& location, ++ const std::string& tensor_name) { ++#ifdef _WIN32 ++ auto file_path = std::filesystem::path(utf8str_to_wstring(location)); ++ if (file_path.is_absolute()) { ++ fail_check( ++ "Location of external TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be a relative path, but it is an absolute path: ", ++ location); ++ } ++ auto relative_path = file_path.lexically_normal().make_preferred().wstring(); ++ // Check that normalized relative path contains ".." on Windows. ++ if (relative_path.find(L"..", 0) != std::string::npos) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be file inside the ", ++ base_dir, ++ ", but the '", ++ location, ++ "' points outside the directory"); ++ } ++ std::wstring data_path = path_join(utf8str_to_wstring(base_dir), relative_path); ++ struct _stat64 buff; ++ if (data_path.empty() || (data_path[0] != '#' && _wstat64(data_path.c_str(), &buff) != 0)) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be stored in ", ++ location, ++ ", but it doesn't exist or is not accessible."); ++ } ++ return wstring_to_utf8str(data_path); ++#else // POSIX ++ if (location.empty()) { ++ fail_check("Location of external TensorProto ( tensor name: ", tensor_name, ") should not be empty."); ++ } else if (location[0] == '/') { ++ fail_check( ++ "Location of external TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be a relative path, but it is an absolute path: ", ++ location); ++ } ++ std::string relative_path = clean_relative_path(location); ++ // Check that normalized relative path contains ".." on POSIX ++ if (relative_path.find("..", 0) != std::string::npos) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be file inside the ", ++ base_dir, ++ ", but the '", ++ location, ++ "' points outside the directory"); ++ } ++ std::string data_path = path_join(base_dir, relative_path); ++ // use stat64 to check whether the file exists ++#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__) ++ struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64 ++ if (data_path.empty() || (data_path[0] != '#' && stat((data_path).c_str(), &buffer) != 0)) { ++#else ++ struct stat64 buffer; // All POSIX under glibc except APPLE and wasm have stat64 ++ if (data_path.empty() || (data_path[0] != '#' && stat64((data_path).c_str(), &buffer) != 0)) { ++#endif ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be stored in ", ++ data_path, ++ ", but it doesn't exist or is not accessible."); ++ } ++ // Do not allow symlinks or directories. ++ if (data_path.empty() || (data_path[0] != '#' && !S_ISREG(buffer.st_mode))) { ++ fail_check( ++ "Data of TensorProto ( tensor name: ", ++ tensor_name, ++ ") should be stored in ", ++ data_path, ++ ", but it is not regular file."); ++ } ++ return data_path; ++#endif ++} ++ + std::set experimental_ops = { + "ATen", + "Affine", +diff --git a/third_party/onnx/onnx/checker.h b/third_party/onnx/onnx/checker.h +index 6796acab222..83012213469 100644 +--- a/third_party/onnx/onnx/checker.h ++++ b/third_party/onnx/onnx/checker.h +@@ -160,7 +160,10 @@ void check_model_local_functions( + + void check_model(const ModelProto& model, bool full_check = false, bool skip_opset_compatibility_check = false); + void check_model(const std::string& model_path, bool full_check = false, bool skip_opset_compatibility_check = false); +- ++std::string resolve_external_data_location( ++ const std::string& base_dir, ++ const std::string& location, ++ const std::string& tensor_name); + bool check_is_experimental_op(const NodeProto& node); + + } // namespace checker +diff --git a/third_party/onnx/onnx/common/path.h b/third_party/onnx/onnx/common/path.h +index 6eaf5e67baf..09212747f7f 100644 +--- a/third_party/onnx/onnx/common/path.h ++++ b/third_party/onnx/onnx/common/path.h +@@ -31,11 +31,22 @@ inline std::wstring utf8str_to_wstring(const std::string& utf8str) { + if (utf8str.size() > INT_MAX) { + fail_check("utf8str_to_wstring: string is too long for converting to wstring."); + } +- int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), NULL, 0); ++ int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast(utf8str.size()), NULL, 0); + std::wstring ws_str(size_required, 0); +- MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), &ws_str[0], size_required); ++ MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast(utf8str.size()), &ws_str[0], size_required); + return ws_str; + } ++inline std::string wstring_to_utf8str(const std::wstring& ws_str) { ++ if (ws_str.size() > INT_MAX) { ++ fail_check("wstring_to_utf8str: string is too long for converting to UTF-8."); ++ } ++ int size_required = ++ WideCharToMultiByte(CP_UTF8, 0, ws_str.c_str(), static_cast(ws_str.size()), NULL, 0, NULL, NULL); ++ std::string utf8str(size_required, 0); ++ WideCharToMultiByte( ++ CP_UTF8, 0, ws_str.c_str(), static_cast(ws_str.size()), &utf8str[0], size_required, NULL, NULL); ++ return utf8str; ++} + + #else + std::string path_join(const std::string& origin, const std::string& append); +diff --git a/third_party/onnx/onnx/cpp2py_export.cc b/third_party/onnx/onnx/cpp2py_export.cc +index bc2594db0db..83cea68f3eb 100644 +--- a/third_party/onnx/onnx/cpp2py_export.cc ++++ b/third_party/onnx/onnx/cpp2py_export.cc +@@ -545,6 +545,8 @@ PYBIND11_MODULE(onnx_cpp2py_export, onnx_cpp2py_export) { + "full_check"_a = false, + "skip_opset_compatibility_check"_a = false); + ++ checker.def("_resolve_external_data_location", &checker::resolve_external_data_location); ++ + // Submodule `version_converter` + auto version_converter = onnx_cpp2py_export.def_submodule("version_converter"); + version_converter.doc() = "VersionConverter submodule"; +diff --git a/third_party/onnx/onnx/external_data_helper.py b/third_party/onnx/onnx/external_data_helper.py +index bbc2717fb4c..05c486c621a 100644 +--- a/third_party/onnx/onnx/external_data_helper.py ++++ b/third_party/onnx/onnx/external_data_helper.py +@@ -8,6 +8,7 @@ + from itertools import chain + from typing import Callable, Iterable, Optional + ++import onnx.onnx_cpp2py_export.checker as c_checker + from onnx.onnx_pb import AttributeProto, GraphProto, ModelProto, TensorProto + + +@@ -39,9 +40,9 @@ def load_external_data_for_tensor(tensor: TensorProto, base_dir: str) -> None: + base_dir: directory that contains the external data. + """ + info = ExternalDataInfo(tensor) +- file_location = _sanitize_path(info.location) +- external_data_file_path = os.path.join(base_dir, file_location) +- ++ external_data_file_path = c_checker._resolve_external_data_location( # type: ignore[attr-defined] ++ base_dir, info.location, tensor.name ++ ) + with open(external_data_file_path, "rb") as data_file: + if info.offset: + data_file.seek(info.offset) +@@ -259,14 +260,6 @@ def _get_attribute_tensors(onnx_model_proto: ModelProto) -> Iterable[TensorProto + yield from _get_attribute_tensors_from_graph(onnx_model_proto.graph) + + +-def _sanitize_path(path: str) -> str: +- """Remove path components which would allow traversing up a directory tree from a base path. +- +- Note: This method is currently very basic and should be expanded. +- """ +- return path.lstrip("/.") +- +- + def _is_valid_filename(filename: str) -> bool: + """Utility to check whether the provided filename is valid.""" + exp = re.compile('^[^<>:;,?"*|/]+$') +diff --git a/third_party/onnx/onnx/test/test_external_data.py b/third_party/onnx/onnx/test/test_external_data.py +index 63f6b4efedd..bb14d279aff 100644 +--- a/third_party/onnx/onnx/test/test_external_data.py ++++ b/third_party/onnx/onnx/test/test_external_data.py +@@ -3,6 +3,7 @@ + # SPDX-License-Identifier: Apache-2.0 + from __future__ import annotations + ++import itertools + import os + import pathlib + import tempfile +@@ -204,6 +205,52 @@ def test_save_external_single_file_data(self) -> None: + attribute_tensor = new_model.graph.node[0].attribute[0].t + np.testing.assert_allclose(to_array(attribute_tensor), self.attribute_value) + ++ @parameterized.parameterized.expand(itertools.product((True, False), (True, False))) ++ def test_save_external_invalid_single_file_data_and_check( ++ self, use_absolute_path: bool, use_model_path: bool ++ ) -> None: ++ model = onnx.load_model(self.model_filename, self.serialization_format) ++ ++ model_dir = os.path.join(self.temp_dir, "save_copy") ++ os.mkdir(model_dir) ++ ++ traversal_external_data_dir = os.path.join( ++ self.temp_dir, "invlid_external_data" ++ ) ++ os.mkdir(traversal_external_data_dir) ++ ++ if use_absolute_path: ++ traversal_external_data_location = os.path.join( ++ traversal_external_data_dir, "tensors.bin" ++ ) ++ else: ++ traversal_external_data_location = "../invlid_external_data/tensors.bin" ++ ++ external_data_dir = os.path.join(self.temp_dir, "external_data") ++ os.mkdir(external_data_dir) ++ new_model_filepath = os.path.join(model_dir, "model.onnx") ++ ++ def convert_model_to_external_data_no_check(model: ModelProto, location: str): ++ for tensor in model.graph.initializer: ++ if tensor.HasField("raw_data"): ++ set_external_data(tensor, location) ++ ++ convert_model_to_external_data_no_check( ++ model, ++ location=traversal_external_data_location, ++ ) ++ ++ onnx.save_model(model, new_model_filepath, self.serialization_format) ++ if use_model_path: ++ with self.assertRaises(onnx.checker.ValidationError): ++ _ = onnx.load_model(new_model_filepath, self.serialization_format) ++ else: ++ onnx_model = onnx.load_model( ++ new_model_filepath, self.serialization_format, load_external_data=False ++ ) ++ with self.assertRaises(onnx.checker.ValidationError): ++ load_external_data_for_model(onnx_model, external_data_dir) ++ + + @parameterized.parameterized_class( + [ diff --git a/SPECS/pytorch/pytorch.spec b/SPECS/pytorch/pytorch.spec index cf4db56fd85..4256781ebbf 100644 --- a/SPECS/pytorch/pytorch.spec +++ b/SPECS/pytorch/pytorch.spec @@ -2,7 +2,7 @@ Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration. Name: pytorch Version: 2.2.2 -Release: 1%{?dist} +Release: 2%{?dist} License: BSD-3-Clause Vendor: Microsoft Corporation Distribution: Azure Linux @@ -24,6 +24,9 @@ BuildRequires: python3-setuptools BuildRequires: python3-typing-extensions BuildRequires: python3-six +Patch1: CVE-2024-27318.patch +Patch2: CVE-2022-1941.patch + %description PyTorch is a Python package that provides two high-level features: - Tensor computation (like NumPy) with strong GPU acceleration @@ -56,7 +59,7 @@ PyTorch is a Python package that provides two high-level features: You can reuse your favorite Python packages such as NumPy, SciPy and Cython to extend PyTorch when needed. %prep -%autosetup -a 1 -n %{name}-v%{version} +%autosetup -a 1 -p 1 -n %{name}-v%{version} %build export USE_CUDA=0 @@ -84,6 +87,9 @@ cp -arf docs %{buildroot}/%{_pkgdocdir} %{_docdir}/* %changelog +* Tue Sep 17 2024 Archana Choudhary - 2.2.2-2 +- patch for CVE-2024-27318, CVE-2022-1941 + * Tue Apr 02 2024 Riken Maharjan - 2.2.2-1 - Upgrade to pytorch 2.2.2