From 6afd1d5386e8cbcb95d6dcae8ad3df7bc4bbc255 Mon Sep 17 00:00:00 2001
From: Archana Choudhary <archana1@microsoft.com>
Date: Tue, 17 Sep 2024 12:16:35 +0000
Subject: [PATCH] pytorch: add patch for CVE-2024-27318, CVE-2022-1941

---
 SPECS/pytorch/CVE-2022-1941.patch  | 352 +++++++++++++++++++++++++++
 SPECS/pytorch/CVE-2024-27318.patch | 375 +++++++++++++++++++++++++++++
 SPECS/pytorch/pytorch.spec         |  10 +-
 3 files changed, 735 insertions(+), 2 deletions(-)
 create mode 100644 SPECS/pytorch/CVE-2022-1941.patch
 create mode 100644 SPECS/pytorch/CVE-2024-27318.patch
diff --git a/SPECS/pytorch/CVE-2022-1941.patch b/SPECS/pytorch/CVE-2022-1941.patch
new file mode 100644
index 00000000000..cd961581a22
--- /dev/null
+++ b/SPECS/pytorch/CVE-2022-1941.patch
@@ -0,0 +1,352 @@
+# Patch generated by Archana Choudhary <archana1@microsoft.com>
+# Source: https://github.com/protocolbuffers/protobuf/commit/55815e423bb82cc828836bbd60c79c1f9a195763
+
+diff --color -ruN a/third_party/protobuf/src/google/protobuf/extension_set_inl.h b/third_party/protobuf/src/google/protobuf/extension_set_inl.h
+--- a/third_party/protobuf/src/google/protobuf/extension_set_inl.h	2024-03-27 22:28:55.000000000 +0000
++++ b/third_party/protobuf/src/google/protobuf/extension_set_inl.h	2024-09-18 11:49:16.390834276 +0000
+@@ -206,16 +206,21 @@
+     const char* ptr, const Msg* containing_type,
+     internal::InternalMetadata* metadata, internal::ParseContext* ctx) {
+   std::string payload;
+-  uint32 type_id = 0;
+-  bool payload_read = false;
++  uint32 type_id;
++  enum class State { kNoTag, kHasType, kHasPayload, kDone };
++  State state = State::kNoTag;
++
+   while (!ctx->Done(&ptr)) {
+     uint32 tag = static_cast<uint8>(*ptr++);
+     if (tag == WireFormatLite::kMessageSetTypeIdTag) {
+       uint64 tmp;
+       ptr = ParseBigVarint(ptr, &tmp);
+       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
+-      type_id = tmp;
+-      if (payload_read) {
++      if (state == State::kNoTag) {
++        type_id = tmp;
++        state = State::kHasType;
++      } else if (state == State::kHasPayload) {
++        type_id = tmp;
+         ExtensionInfo extension;
+         bool was_packed_on_wire;
+         if (!FindExtension(2, type_id, containing_type, ctx, &extension,
+@@ -241,20 +246,24 @@
+           GOOGLE_PROTOBUF_PARSER_ASSERT(value->_InternalParse(p, &tmp_ctx) &&
+                                          tmp_ctx.EndedAtLimit());
+         }
+-        type_id = 0;
++        state = State::kDone;
+       }
+     } else if (tag == WireFormatLite::kMessageSetMessageTag) {
+-      if (type_id != 0) {
++      if (state == State::kHasType) {
+         ptr = ParseFieldMaybeLazily(static_cast<uint64>(type_id) * 8 + 2, ptr,
+                                     containing_type, metadata, ctx);
+         GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
+-        type_id = 0;
++        state = State::kDone;
+       } else {
++        std::string tmp;
+         int32 size = ReadSize(&ptr);
+         GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
+-        ptr = ctx->ReadString(ptr, size, &payload);
++        ptr = ctx->ReadString(ptr, size, &tmp);
+         GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
+-        payload_read = true;
++        if (state == State::kNoTag) {
++          payload = std::move(tmp);
++          state = State::kHasPayload;
++        }
+       }
+     } else {
+       ptr = ReadTag(ptr - 1, &tag);
+diff --color -ruN a/third_party/protobuf/src/google/protobuf/wire_format.cc b/third_party/protobuf/src/google/protobuf/wire_format.cc
+--- a/third_party/protobuf/src/google/protobuf/wire_format.cc	2024-03-27 22:28:55.000000000 +0000
++++ b/third_party/protobuf/src/google/protobuf/wire_format.cc	2024-09-18 11:49:16.390834276 +0000
+@@ -659,9 +659,11 @@
+   const char* _InternalParse(const char* ptr, internal::ParseContext* ctx) {
+     // Parse a MessageSetItem
+     auto metadata = reflection->MutableInternalMetadata(msg);
++    enum class State { kNoTag, kHasType, kHasPayload, kDone };
++    State state = State::kNoTag;
++
+     std::string payload;
+     uint32 type_id = 0;
+-    bool payload_read = false;
+     while (!ctx->Done(&ptr)) {
+       // We use 64 bit tags in order to allow typeid's that span the whole
+       // range of 32 bit numbers.
+@@ -670,8 +672,11 @@
+         uint64 tmp;
+         ptr = ParseBigVarint(ptr, &tmp);
+         GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
+-        type_id = tmp;
+-        if (payload_read) {
++        if (state == State::kNoTag) {
++          type_id = tmp;
++          state = State::kHasType;
++        } else if (state == State::kHasPayload) {
++          type_id = tmp;
+           const FieldDescriptor* field;
+           if (ctx->data().pool == nullptr) {
+             field = reflection->FindKnownExtensionByNumber(type_id);
+@@ -698,17 +703,17 @@
+             GOOGLE_PROTOBUF_PARSER_ASSERT(value->_InternalParse(p, &tmp_ctx) &&
+                                            tmp_ctx.EndedAtLimit());
+           }
+-          type_id = 0;
++          state = State::kDone;
+         }
+         continue;
+       } else if (tag == WireFormatLite::kMessageSetMessageTag) {
+-        if (type_id == 0) {
++        if (state == State::kNoTag) {
+           int32 size = ReadSize(&ptr);
+           GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
+           ptr = ctx->ReadString(ptr, size, &payload);
+           GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
+-          payload_read = true;
+-        } else {
++          state = State::kHasPayload;
++        } else if (state == State::kHasType) {
+           // We're now parsing the payload
+           const FieldDescriptor* field = nullptr;
+           if (descriptor->IsExtensionNumber(type_id)) {
+@@ -722,7 +727,12 @@
+           ptr = WireFormat::_InternalParseAndMergeField(
+               msg, ptr, ctx, static_cast<uint64>(type_id) * 8 + 2, reflection,
+               field);
+-          type_id = 0;
++          state = State::kDone;
++        } else {
++          int32 size = ReadSize(&ptr);
++          GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
++          ptr = ctx->Skip(ptr, size);
++          GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
+         }
+       } else {
+         // An unknown field in MessageSetItem.
+diff --color -ruN a/third_party/protobuf/src/google/protobuf/wire_format_lite.h b/third_party/protobuf/src/google/protobuf/wire_format_lite.h
+--- a/third_party/protobuf/src/google/protobuf/wire_format_lite.h	2024-03-27 22:28:55.000000000 +0000
++++ b/third_party/protobuf/src/google/protobuf/wire_format_lite.h	2024-09-18 11:49:16.390834276 +0000
+@@ -1798,6 +1798,9 @@
+   // we can parse it later.
+   std::string message_data;
+ 
++  enum class State { kNoTag, kHasType, kHasPayload, kDone };
++  State state = State::kNoTag;
++
+   while (true) {
+     const uint32 tag = input->ReadTagNoLastTag();
+     if (tag == 0) return false;
+@@ -1806,26 +1809,34 @@
+       case WireFormatLite::kMessageSetTypeIdTag: {
+         uint32 type_id;
+         if (!input->ReadVarint32(&type_id)) return false;
+-        last_type_id = type_id;
+-
+-        if (!message_data.empty()) {
++        if (state == State::kNoTag) {
++          last_type_id = type_id;
++          state = State::kHasType;
++        } else if (state == State::kHasPayload) {
+           // We saw some message data before the type_id.  Have to parse it
+           // now.
+           io::CodedInputStream sub_input(
+               reinterpret_cast<const uint8*>(message_data.data()),
+               static_cast<int>(message_data.size()));
+           sub_input.SetRecursionLimit(input->RecursionBudget());
+-          if (!ms.ParseField(last_type_id, &sub_input)) {
++          if (!ms.ParseField(type_id, &sub_input)) {
+             return false;
+           }
+           message_data.clear();
++          state = State::kDone;
+         }
+ 
+         break;
+       }
+ 
+       case WireFormatLite::kMessageSetMessageTag: {
+-        if (last_type_id == 0) {
++        if (state == State::kHasType) {
++          // Already saw type_id, so we can parse this directly.
++          if (!ms.ParseField(last_type_id, input)) {
++            return false;
++          }
++          state = State::kDone;
++        } else if (state == State::kNoTag) {
+           // We haven't seen a type_id yet.  Append this data to message_data.
+           uint32 length;
+           if (!input->ReadVarint32(&length)) return false;
+@@ -1836,11 +1847,9 @@
+           auto ptr = reinterpret_cast<uint8*>(&message_data[0]);
+           ptr = io::CodedOutputStream::WriteVarint32ToArray(length, ptr);
+           if (!input->ReadRaw(ptr, length)) return false;
++          state = State::kHasPayload;
+         } else {
+-          // Already saw type_id, so we can parse this directly.
+-          if (!ms.ParseField(last_type_id, input)) {
+-            return false;
+-          }
++          if (!ms.SkipField(tag, input)) return false;
+         }
+ 
+         break;
+diff --color -ruN a/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc b/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc
+--- a/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc	2024-03-27 22:28:55.000000000 +0000
++++ b/third_party/protobuf/src/google/protobuf/wire_format_unittest.cc	2024-09-18 11:49:16.394834273 +0000
+@@ -47,6 +47,7 @@
+ #include <google/protobuf/io/zero_copy_stream_impl.h>
+ #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+ #include <google/protobuf/descriptor.h>
++#include <google/protobuf/dynamic_message.h>
+ #include <google/protobuf/wire_format_lite.h>
+ #include <google/protobuf/testing/googletest.h>
+ #include <gmock/gmock.h>
+@@ -585,30 +586,56 @@
+   EXPECT_EQ(message_set.DebugString(), dynamic_message_set.DebugString());
+ }
+ 
+-TEST(WireFormatTest, ParseMessageSetWithReverseTagOrder) {
++namespace {
++std::string BuildMessageSetItemStart() {
+   std::string data;
+   {
+-    unittest::TestMessageSetExtension1 message;
+-    message.set_i(123);
+-    // Build a MessageSet manually with its message content put before its
+-    // type_id.
+     io::StringOutputStream output_stream(&data);
+     io::CodedOutputStream coded_output(&output_stream);
+     coded_output.WriteTag(WireFormatLite::kMessageSetItemStartTag);
++  }
++  return data;
++}
++std::string BuildMessageSetItemEnd() {
++  std::string data;
++  {
++    io::StringOutputStream output_stream(&data);
++    io::CodedOutputStream coded_output(&output_stream);
++    coded_output.WriteTag(WireFormatLite::kMessageSetItemEndTag);
++  }
++  return data;
++}
++std::string BuildMessageSetTestExtension1(int value = 123) {
++  std::string data;
++  {
++    unittest::TestMessageSetExtension1 message;
++    message.set_i(value);
++    io::StringOutputStream output_stream(&data);
++    io::CodedOutputStream coded_output(&output_stream);
+     // Write the message content first.
+     WireFormatLite::WriteTag(WireFormatLite::kMessageSetMessageNumber,
+                              WireFormatLite::WIRETYPE_LENGTH_DELIMITED,
+                              &coded_output);
+     coded_output.WriteVarint32(message.ByteSizeLong());
+     message.SerializeWithCachedSizes(&coded_output);
+-    // Write the type id.
+-    uint32 type_id = message.GetDescriptor()->extension(0)->number();
++  }
++  return data;
++}
++std::string BuildMessageSetItemTypeId(int extension_number) {
++  std::string data;
++  {
++    io::StringOutputStream output_stream(&data);
++    io::CodedOutputStream coded_output(&output_stream);
+     WireFormatLite::WriteUInt32(WireFormatLite::kMessageSetTypeIdNumber,
+-                                type_id, &coded_output);
+-    coded_output.WriteTag(WireFormatLite::kMessageSetItemEndTag);
++                                extension_number, &coded_output);
+   }
++  return data;
++}
++void ValidateTestMessageSet(const std::string& test_case,
++                            const std::string& data) {
++  SCOPED_TRACE(test_case);
+   {
+-    proto2_wireformat_unittest::TestMessageSet message_set;
++    ::proto2_wireformat_unittest::TestMessageSet message_set;
+     ASSERT_TRUE(message_set.ParseFromString(data));
+ 
+     EXPECT_EQ(123,
+@@ -616,10 +643,15 @@
+                   .GetExtension(
+                       unittest::TestMessageSetExtension1::message_set_extension)
+                   .i());
++
++    // Make sure it does not contain anything else.
++    message_set.ClearExtension(
++        unittest::TestMessageSetExtension1::message_set_extension);
++    EXPECT_EQ(message_set.SerializeAsString(), "");
+   }
+   {
+     // Test parse the message via Reflection.
+-    proto2_wireformat_unittest::TestMessageSet message_set;
++    ::proto2_wireformat_unittest::TestMessageSet message_set;
+     io::CodedInputStream input(reinterpret_cast<const uint8*>(data.data()),
+                                data.size());
+     EXPECT_TRUE(WireFormat::ParseAndMergePartial(&input, &message_set));
+@@ -631,6 +663,61 @@
+                       unittest::TestMessageSetExtension1::message_set_extension)
+                   .i());
+   }
++  {
++    // Test parse the message via DynamicMessage.
++    DynamicMessageFactory factory;
++    std::unique_ptr<Message> msg(
++        factory
++            .GetPrototype(
++                ::proto2_wireformat_unittest::TestMessageSet::descriptor())
++            ->New());
++    msg->ParseFromString(data);
++    auto* reflection = msg->GetReflection();
++    std::vector<const FieldDescriptor*> fields;
++    reflection->ListFields(*msg, &fields);
++    ASSERT_EQ(fields.size(), 1);
++    const auto& sub = reflection->GetMessage(*msg, fields[0]);
++    reflection = sub.GetReflection();
++    EXPECT_EQ(123, reflection->GetInt32(
++                       sub, sub.GetDescriptor()->FindFieldByName("i")));
++  }
++}
++}  // namespace
++
++TEST(WireFormatTest, ParseMessageSetWithAnyTagOrder) {
++  std::string start = BuildMessageSetItemStart();
++  std::string end = BuildMessageSetItemEnd();
++  std::string id = BuildMessageSetItemTypeId(
++      unittest::TestMessageSetExtension1::descriptor()->extension(0)->number());
++  std::string message = BuildMessageSetTestExtension1();
++
++  ValidateTestMessageSet("id + message", start + id + message + end);
++  ValidateTestMessageSet("message + id", start + message + id + end);
++}
++
++TEST(WireFormatTest, ParseMessageSetWithDuplicateTags) {
++  std::string start = BuildMessageSetItemStart();
++  std::string end = BuildMessageSetItemEnd();
++  std::string id = BuildMessageSetItemTypeId(
++      unittest::TestMessageSetExtension1::descriptor()->extension(0)->number());
++  std::string other_id = BuildMessageSetItemTypeId(123456);
++  std::string message = BuildMessageSetTestExtension1();
++  std::string other_message = BuildMessageSetTestExtension1(321);
++
++  // Double id
++  ValidateTestMessageSet("id + other_id + message",
++                         start + id + other_id + message + end);
++  ValidateTestMessageSet("id + message + other_id",
++                         start + id + message + other_id + end);
++  ValidateTestMessageSet("message + id + other_id",
++                         start + message + id + other_id + end);
++  // Double message
++  ValidateTestMessageSet("id + message + other_message",
++                         start + id + message + other_message + end);
++  ValidateTestMessageSet("message + id + other_message",
++                         start + message + id + other_message + end);
++  ValidateTestMessageSet("message + other_message + id",
++                         start + message + other_message + id + end);
+ }
+ 
+ void SerializeReverseOrder(
diff --git a/SPECS/pytorch/CVE-2024-27318.patch b/SPECS/pytorch/CVE-2024-27318.patch
new file mode 100644
index 00000000000..40c1c7a7bff
--- /dev/null
+++ b/SPECS/pytorch/CVE-2024-27318.patch
@@ -0,0 +1,375 @@
+From 4458baf0be43d07acc2adab99d48689f78ff1fe1 Mon Sep 17 00:00:00 2001
+From: liqun Fu <liqfu@microsoft.com>
+Date: Mon, 19 Feb 2024 11:12:40 -0800
+Subject: [PATCH] Fix path sanitization bypass leading to arbitrary read
+ (#5917)
+
+Signed-off-by: liqunfu <liqun.fu@microsoft.com>
+Signed-off-by: liqun Fu <liqun.fu@microsoft.com>
+Co-authored-by: Justin Chu <justinchuby@users.noreply.github.com>
+(cherry picked from commit 66b7fb630903fdcf3e83b6b6d56d82e904264a20)
+---
+ onnx/checker.cc                 | 168 +++++++++++++++++---------------
+ onnx/checker.h                  |   5 +-
+ onnx/common/path.h              |  15 ++-
+ onnx/cpp2py_export.cc           |   2 +
+ onnx/external_data_helper.py    |  15 +--
+ onnx/test/test_external_data.py |  47 +++++++++
+ 6 files changed, 158 insertions(+), 94 deletions(-)
+
+diff --git a/third_party/onnx/onnx/checker.cc b/third_party/onnx/onnx/checker.cc
+index fac56f5655f..66716e97f92 100644
+--- a/third_party/onnx/onnx/checker.cc
++++ b/third_party/onnx/onnx/checker.cc
+@@ -13,7 +13,6 @@
+ #include <vector>
+ 
+ #include "onnx/common/file_utils.h"
+-#include "onnx/common/path.h"
+ #include "onnx/defs/schema.h"
+ #include "onnx/defs/tensor_proto_util.h"
+ #include "onnx/proto_utils.h"
+@@ -135,85 +134,7 @@ void check_tensor(const TensorProto& tensor, const CheckerContext& ctx) {
+     for (const StringStringEntryProto& entry : tensor.external_data()) {
+       if (entry.has_key() && entry.has_value() && entry.key() == "location") {
+         has_location = true;
+-#ifdef _WIN32
+-        auto file_path = std::filesystem::path(utf8str_to_wstring(entry.value()));
+-        if (file_path.is_absolute()) {
+-          fail_check(
+-              "Location of external TensorProto ( tensor name: ",
+-              tensor.name(),
+-              ") should be a relative path, but it is an absolute path: ",
+-              entry.value());
+-        }
+-        auto relative_path = file_path.lexically_normal().make_preferred().wstring();
+-        // Check that normalized relative path contains ".." on Windows.
+-        if (relative_path.find(L"..", 0) != std::string::npos) {
+-          fail_check(
+-              "Data of TensorProto ( tensor name: ",
+-              tensor.name(),
+-              ") should be file inside the ",
+-              ctx.get_model_dir(),
+-              ", but the '",
+-              entry.value(),
+-              "' points outside the directory");
+-        }
+-        std::wstring data_path = path_join(utf8str_to_wstring(ctx.get_model_dir()), relative_path);
+-        struct _stat64 buff;
+-        if (_wstat64(data_path.c_str(), &buff) != 0) {
+-          fail_check(
+-              "Data of TensorProto ( tensor name: ",
+-              tensor.name(),
+-              ") should be stored in ",
+-              entry.value(),
+-              ", but it doesn't exist or is not accessible.");
+-        }
+-#else // POSIX
+-        if (entry.value().empty()) {
+-          fail_check("Location of external TensorProto ( tensor name: ", tensor.name(), ") should not be empty.");
+-        } else if (entry.value()[0] == '/') {
+-          fail_check(
+-              "Location of external TensorProto ( tensor name: ",
+-              tensor.name(),
+-              ") should be a relative path, but it is an absolute path: ",
+-              entry.value());
+-        }
+-        std::string relative_path = clean_relative_path(entry.value());
+-        // Check that normalized relative path contains ".." on POSIX
+-        if (relative_path.find("..", 0) != std::string::npos) {
+-          fail_check(
+-              "Data of TensorProto ( tensor name: ",
+-              tensor.name(),
+-              ") should be file inside the ",
+-              ctx.get_model_dir(),
+-              ", but the '",
+-              entry.value(),
+-              "' points outside the directory");
+-        }
+-        std::string data_path = path_join(ctx.get_model_dir(), relative_path);
+-        // use stat64 to check whether the file exists
+-#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__)
+-        struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64
+-        if (stat((data_path).c_str(), &buffer) != 0) {
+-#else
+-        struct stat64 buffer; // All POSIX under glibc except APPLE and wasm have stat64
+-        if (stat64((data_path).c_str(), &buffer) != 0) {
+-#endif
+-          fail_check(
+-              "Data of TensorProto ( tensor name: ",
+-              tensor.name(),
+-              ") should be stored in ",
+-              data_path,
+-              ", but it doesn't exist or is not accessible.");
+-        }
+-        // Do not allow symlinks or directories.
+-        if (!S_ISREG(buffer.st_mode)) {
+-          fail_check(
+-              "Data of TensorProto ( tensor name: ",
+-              tensor.name(),
+-              ") should be stored in ",
+-              data_path,
+-              ", but it is not regular file.");
+-        }
+-#endif
++        resolve_external_data_location(ctx.get_model_dir(), entry.value(), tensor.name());
+       }
+     }
+     if (!has_location) {
+@@ -1054,6 +975,93 @@ void check_model(const ModelProto& model, bool full_check, bool skip_opset_compa
+   }
+ }
+ 
++std::string resolve_external_data_location(
++    const std::string& base_dir,
++    const std::string& location,
++    const std::string& tensor_name) {
++#ifdef _WIN32
++  auto file_path = std::filesystem::path(utf8str_to_wstring(location));
++  if (file_path.is_absolute()) {
++    fail_check(
++        "Location of external TensorProto ( tensor name: ",
++        tensor_name,
++        ") should be a relative path, but it is an absolute path: ",
++        location);
++  }
++  auto relative_path = file_path.lexically_normal().make_preferred().wstring();
++  // Check that normalized relative path contains ".." on Windows.
++  if (relative_path.find(L"..", 0) != std::string::npos) {
++    fail_check(
++        "Data of TensorProto ( tensor name: ",
++        tensor_name,
++        ") should be file inside the ",
++        base_dir,
++        ", but the '",
++        location,
++        "' points outside the directory");
++  }
++  std::wstring data_path = path_join(utf8str_to_wstring(base_dir), relative_path);
++  struct _stat64 buff;
++  if (data_path.empty() || (data_path[0] != '#' && _wstat64(data_path.c_str(), &buff) != 0)) {
++    fail_check(
++        "Data of TensorProto ( tensor name: ",
++        tensor_name,
++        ") should be stored in ",
++        location,
++        ", but it doesn't exist or is not accessible.");
++  }
++  return wstring_to_utf8str(data_path);
++#else // POSIX
++  if (location.empty()) {
++    fail_check("Location of external TensorProto ( tensor name: ", tensor_name, ") should not be empty.");
++  } else if (location[0] == '/') {
++    fail_check(
++        "Location of external TensorProto ( tensor name: ",
++        tensor_name,
++        ") should be a relative path, but it is an absolute path: ",
++        location);
++  }
++  std::string relative_path = clean_relative_path(location);
++  // Check that normalized relative path contains ".." on POSIX
++  if (relative_path.find("..", 0) != std::string::npos) {
++    fail_check(
++        "Data of TensorProto ( tensor name: ",
++        tensor_name,
++        ") should be file inside the ",
++        base_dir,
++        ", but the '",
++        location,
++        "' points outside the directory");
++  }
++  std::string data_path = path_join(base_dir, relative_path);
++  // use stat64 to check whether the file exists
++#if defined(__APPLE__) || defined(__wasm__) || !defined(__GLIBC__)
++  struct stat buffer; // APPLE, wasm and non-glic stdlibs do not have stat64
++  if (data_path.empty() || (data_path[0] != '#' && stat((data_path).c_str(), &buffer) != 0)) {
++#else
++  struct stat64 buffer; // All POSIX under glibc except APPLE and wasm have stat64
++  if (data_path.empty() || (data_path[0] != '#' && stat64((data_path).c_str(), &buffer) != 0)) {
++#endif
++    fail_check(
++        "Data of TensorProto ( tensor name: ",
++        tensor_name,
++        ") should be stored in ",
++        data_path,
++        ", but it doesn't exist or is not accessible.");
++  }
++  // Do not allow symlinks or directories.
++  if (data_path.empty() || (data_path[0] != '#' && !S_ISREG(buffer.st_mode))) {
++    fail_check(
++        "Data of TensorProto ( tensor name: ",
++        tensor_name,
++        ") should be stored in ",
++        data_path,
++        ", but it is not regular file.");
++  }
++  return data_path;
++#endif
++}
++
+ std::set<std::string> experimental_ops = {
+     "ATen",
+     "Affine",
+diff --git a/third_party/onnx/onnx/checker.h b/third_party/onnx/onnx/checker.h
+index 6796acab222..83012213469 100644
+--- a/third_party/onnx/onnx/checker.h
++++ b/third_party/onnx/onnx/checker.h
+@@ -160,7 +160,10 @@ void check_model_local_functions(
+ 
+ void check_model(const ModelProto& model, bool full_check = false, bool skip_opset_compatibility_check = false);
+ void check_model(const std::string& model_path, bool full_check = false, bool skip_opset_compatibility_check = false);
+-
++std::string resolve_external_data_location(
++    const std::string& base_dir,
++    const std::string& location,
++    const std::string& tensor_name);
+ bool check_is_experimental_op(const NodeProto& node);
+ 
+ } // namespace checker
+diff --git a/third_party/onnx/onnx/common/path.h b/third_party/onnx/onnx/common/path.h
+index 6eaf5e67baf..09212747f7f 100644
+--- a/third_party/onnx/onnx/common/path.h
++++ b/third_party/onnx/onnx/common/path.h
+@@ -31,11 +31,22 @@ inline std::wstring utf8str_to_wstring(const std::string& utf8str) {
+   if (utf8str.size() > INT_MAX) {
+     fail_check("utf8str_to_wstring: string is too long for converting to wstring.");
+   }
+-  int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), NULL, 0);
++  int size_required = MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast<int>(utf8str.size()), NULL, 0);
+   std::wstring ws_str(size_required, 0);
+-  MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), (int)utf8str.size(), &ws_str[0], size_required);
++  MultiByteToWideChar(CP_UTF8, 0, utf8str.c_str(), static_cast<int>(utf8str.size()), &ws_str[0], size_required);
+   return ws_str;
+ }
++inline std::string wstring_to_utf8str(const std::wstring& ws_str) {
++  if (ws_str.size() > INT_MAX) {
++    fail_check("wstring_to_utf8str: string is too long for converting to UTF-8.");
++  }
++  int size_required =
++      WideCharToMultiByte(CP_UTF8, 0, ws_str.c_str(), static_cast<int>(ws_str.size()), NULL, 0, NULL, NULL);
++  std::string utf8str(size_required, 0);
++  WideCharToMultiByte(
++      CP_UTF8, 0, ws_str.c_str(), static_cast<int>(ws_str.size()), &utf8str[0], size_required, NULL, NULL);
++  return utf8str;
++}
+ 
+ #else
+ std::string path_join(const std::string& origin, const std::string& append);
+diff --git a/third_party/onnx/onnx/cpp2py_export.cc b/third_party/onnx/onnx/cpp2py_export.cc
+index bc2594db0db..83cea68f3eb 100644
+--- a/third_party/onnx/onnx/cpp2py_export.cc
++++ b/third_party/onnx/onnx/cpp2py_export.cc
+@@ -545,6 +545,8 @@ PYBIND11_MODULE(onnx_cpp2py_export, onnx_cpp2py_export) {
+       "full_check"_a = false,
+       "skip_opset_compatibility_check"_a = false);
+ 
++  checker.def("_resolve_external_data_location", &checker::resolve_external_data_location);
++
+   // Submodule `version_converter`
+   auto version_converter = onnx_cpp2py_export.def_submodule("version_converter");
+   version_converter.doc() = "VersionConverter submodule";
+diff --git a/third_party/onnx/onnx/external_data_helper.py b/third_party/onnx/onnx/external_data_helper.py
+index bbc2717fb4c..05c486c621a 100644
+--- a/third_party/onnx/onnx/external_data_helper.py
++++ b/third_party/onnx/onnx/external_data_helper.py
+@@ -8,6 +8,7 @@
+ from itertools import chain
+ from typing import Callable, Iterable, Optional
+ 
++import onnx.onnx_cpp2py_export.checker as c_checker
+ from onnx.onnx_pb import AttributeProto, GraphProto, ModelProto, TensorProto
+ 
+ 
+@@ -39,9 +40,9 @@ def load_external_data_for_tensor(tensor: TensorProto, base_dir: str) -> None:
+         base_dir: directory that contains the external data.
+     """
+     info = ExternalDataInfo(tensor)
+-    file_location = _sanitize_path(info.location)
+-    external_data_file_path = os.path.join(base_dir, file_location)
+-
++    external_data_file_path = c_checker._resolve_external_data_location(  # type: ignore[attr-defined]
++        base_dir, info.location, tensor.name
++    )
+     with open(external_data_file_path, "rb") as data_file:
+         if info.offset:
+             data_file.seek(info.offset)
+@@ -259,14 +260,6 @@ def _get_attribute_tensors(onnx_model_proto: ModelProto) -> Iterable[TensorProto
+     yield from _get_attribute_tensors_from_graph(onnx_model_proto.graph)
+ 
+ 
+-def _sanitize_path(path: str) -> str:
+-    """Remove path components which would allow traversing up a directory tree from a base path.
+-
+-    Note: This method is currently very basic and should be expanded.
+-    """
+-    return path.lstrip("/.")
+-
+-
+ def _is_valid_filename(filename: str) -> bool:
+     """Utility to check whether the provided filename is valid."""
+     exp = re.compile('^[^<>:;,?"*|/]+$')
+diff --git a/third_party/onnx/onnx/test/test_external_data.py b/third_party/onnx/onnx/test/test_external_data.py
+index 63f6b4efedd..bb14d279aff 100644
+--- a/third_party/onnx/onnx/test/test_external_data.py
++++ b/third_party/onnx/onnx/test/test_external_data.py
+@@ -3,6 +3,7 @@
+ # SPDX-License-Identifier: Apache-2.0
+ from __future__ import annotations
+ 
++import itertools
+ import os
+ import pathlib
+ import tempfile
+@@ -204,6 +205,52 @@ def test_save_external_single_file_data(self) -> None:
+         attribute_tensor = new_model.graph.node[0].attribute[0].t
+         np.testing.assert_allclose(to_array(attribute_tensor), self.attribute_value)
+ 
++    @parameterized.parameterized.expand(itertools.product((True, False), (True, False)))
++    def test_save_external_invalid_single_file_data_and_check(
++        self, use_absolute_path: bool, use_model_path: bool
++    ) -> None:
++        model = onnx.load_model(self.model_filename, self.serialization_format)
++
++        model_dir = os.path.join(self.temp_dir, "save_copy")
++        os.mkdir(model_dir)
++
++        traversal_external_data_dir = os.path.join(
++            self.temp_dir, "invlid_external_data"
++        )
++        os.mkdir(traversal_external_data_dir)
++
++        if use_absolute_path:
++            traversal_external_data_location = os.path.join(
++                traversal_external_data_dir, "tensors.bin"
++            )
++        else:
++            traversal_external_data_location = "../invlid_external_data/tensors.bin"
++
++        external_data_dir = os.path.join(self.temp_dir, "external_data")
++        os.mkdir(external_data_dir)
++        new_model_filepath = os.path.join(model_dir, "model.onnx")
++
++        def convert_model_to_external_data_no_check(model: ModelProto, location: str):
++            for tensor in model.graph.initializer:
++                if tensor.HasField("raw_data"):
++                    set_external_data(tensor, location)
++
++        convert_model_to_external_data_no_check(
++            model,
++            location=traversal_external_data_location,
++        )
++
++        onnx.save_model(model, new_model_filepath, self.serialization_format)
++        if use_model_path:
++            with self.assertRaises(onnx.checker.ValidationError):
++                _ = onnx.load_model(new_model_filepath, self.serialization_format)
++        else:
++            onnx_model = onnx.load_model(
++                new_model_filepath, self.serialization_format, load_external_data=False
++            )
++            with self.assertRaises(onnx.checker.ValidationError):
++                load_external_data_for_model(onnx_model, external_data_dir)
++
+ 
+ @parameterized.parameterized_class(
+     [
diff --git a/SPECS/pytorch/pytorch.spec b/SPECS/pytorch/pytorch.spec
index cf4db56fd85..4256781ebbf 100644
--- a/SPECS/pytorch/pytorch.spec
+++ b/SPECS/pytorch/pytorch.spec
@@ -2,7 +2,7 @@
 Summary:        Tensors and Dynamic neural networks in Python with strong GPU acceleration.
 Name:           pytorch
 Version:        2.2.2
-Release:        1%{?dist}
+Release:        2%{?dist}
 License:        BSD-3-Clause
 Vendor:         Microsoft Corporation
 Distribution:   Azure Linux
@@ -24,6 +24,9 @@ BuildRequires:  python3-setuptools
 BuildRequires:  python3-typing-extensions
 BuildRequires:  python3-six
 
+Patch1:         CVE-2024-27318.patch
+Patch2:         CVE-2022-1941.patch
+
 %description
 PyTorch is a Python package that provides two high-level features:
 - Tensor computation (like NumPy) with strong GPU acceleration
@@ -56,7 +59,7 @@ PyTorch is a Python package that provides two high-level features:
 You can reuse your favorite Python packages such as NumPy, SciPy and Cython to extend PyTorch when needed.
 
 %prep
-%autosetup -a 1 -n %{name}-v%{version}
+%autosetup -a 1 -p 1 -n %{name}-v%{version}
 
 %build
 export USE_CUDA=0
@@ -84,6 +87,9 @@ cp -arf docs %{buildroot}/%{_pkgdocdir}
 %{_docdir}/*
 
 %changelog
+* Tue Sep 17 2024 Archana Choudhary <archana1@microsoft.com> - 2.2.2-2
+- patch for CVE-2024-27318, CVE-2022-1941
+
 * Tue Apr 02 2024 Riken Maharjan <rmaharjan@microsoft.com> - 2.2.2-1
 - Upgrade to pytorch 2.2.2