Fix computation of image IDs

- Take TOC digests into account even if we are converting from OCI to another format - Compute the image ID based on whether we _used_ the TOC, not whether it just exists. - Also don't forget to include the config digest in the ID computation... Signed-off-by: Miloslav Trmač <[email protected]>
containers · Feb 8, 2024 · d8d3103 · d8d3103
1 parent 00df874
commit d8d3103
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 51 deletions.
diff --git a/manifest/oci.go b/manifest/oci.go
@@ -9,7 +9,6 @@ import (
 	compressiontypes "github.com/containers/image/v5/pkg/compression/types"
 	"github.com/containers/image/v5/types"
 	ociencspec "github.com/containers/ocicrypt/spec"
-	chunkedToc "github.com/containers/storage/pkg/chunked/toc"
 	"github.com/opencontainers/go-digest"
 	"github.com/opencontainers/image-spec/specs-go"
 	imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
@@ -260,44 +259,9 @@ func (m *OCI1) ImageID(diffIDs []digest.Digest) (string, error) {
 	if err := m.Config.Digest.Validate(); err != nil {
 		return "", err
 	}
-
-	// If there is any layer that is using partial content, we calculate the image ID
-	// in a different way since the diffID cannot be validated as for regular pulled images.
-	for _, layer := range m.Layers {
-		toc, err := chunkedToc.GetTOCDigest(layer.Annotations)
-		if err != nil {
-			return "", fmt.Errorf("error looking up annotation for layer %q: %w", layer.Digest, err)
-		}
-		if toc != nil {
-			return m.calculateImageIDForPartialImage(diffIDs)
-		}
-	}
-
 	return m.Config.Digest.Hex(), nil
 }
 
-func (m *OCI1) calculateImageIDForPartialImage(diffIDs []digest.Digest) (string, error) {
-	newID := digest.Canonical.Digester()
-	for i, layer := range m.Layers {
-		diffID := diffIDs[i]
-		_, err := newID.Hash().Write([]byte(diffID.Hex()))
-		if err != nil {
-			return "", fmt.Errorf("error writing diffID %q: %w", diffID, err)
-		}
-		toc, err := chunkedToc.GetTOCDigest(layer.Annotations)
-		if err != nil {
-			return "", fmt.Errorf("error looking up annotation for layer %q: %w", layer.Digest, err)
-		}
-		if toc != nil {
-			_, err = newID.Hash().Write([]byte(toc.Hex()))
-			if err != nil {
-				return "", fmt.Errorf("error writing TOC %q: %w", toc, err)
-			}
-		}
-	}
-	return newID.Digest().Hex(), nil
-}
-
 // CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image
 // (and the code can handle that).
 // NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted

diff --git a/storage/storage_dest.go b/storage/storage_dest.go
@@ -488,29 +488,49 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string {
 			}
 			diffIDs = append([]digest.Digest{diffID}, diffIDs...)
 		}
-	case *manifest.Schema2:
+	case *manifest.Schema2, *manifest.OCI1:
 		// We know the ID calculation doesn't actually use the diffIDs, so we don't need to populate
 		// the diffID list.
-	case *manifest.OCI1:
-		for i, l := range m.Layers {
-			if l.Digest != "" {
-				// if a layer was pulled using a partial blob, we need to use the TOC digest
-				// to calculate the image ID, since the layer digest was not validated.
-				if tocDigest, found := s.indexToTOCDigest[i]; found {
-					diffIDs = append(diffIDs, tocDigest)
-				} else {
-					diffIDs = append(diffIDs, l.Digest)
-				}
-			}
-		}
 	default:
 		return ""
 	}
-	id, err := m.ImageID(diffIDs)
+
+	// We want to use the same ID for “the same” images, but without risking unwanted sharing / malicious image corruption.
+	//
+	// Traditionally that means the same ~config digest, as computed by m.ImageID;
+	// but if we pull a layer by TOC, we verify the layer against neither the (compressed) blob digest in the manifest,
+	// nor against the config’s RootFS.DiffIDs. We don’t really want to do either, to allow partial layer pulls where we never see
+	// most of the data.
+	//
+	// So, if a layer is pulled by TOC (and we do validate against the TOC), the fact that we used the TOC, and the value of the TOC,
+	// must enter into the image ID computation.
+	// But for images where no TOC was used, continue to use IDs computed the traditional way, to maximize image reuse on upgrades,
+	// and to introduce the changed behavior only when partial pulls are used.
+	//
+	// Note that it’s not 100% guaranteed that an image pulled by TOC uses an OCI manifest; consider
+	// (skopeo copy --format v2s2 docker://…/zstd-chunked-image containers-storage:… ). So this is not happening only in the OCI case above.
+	ordinaryImageID, err := m.ImageID(diffIDs)
 	if err != nil {
 		return ""
 	}
-	return id
+	tocIDInput := ""
+	hasLayerPulledByTOC := false
+	for i := range m.LayerInfos() {
+		layerValue := ""                       // An empty string is not a valid digest, so this is unambiguous with the TOC case.
+		tocDigest, ok := s.indexToTOCDigest[i] // "" if not a TOC
+		if ok {
+			hasLayerPulledByTOC = true
+			layerValue = tocDigest.String()
+		}
+		tocIDInput += layerValue + "|" // "|" can not be present in a TOC digest, so this is an unambiguous separator.
+	}
+
+	if !hasLayerPulledByTOC {
+		return ordinaryImageID
+	}
+	// ordinaryImageID is a digest of a config, which is a JSON value.
+	// To avoid the risk of collisions, start the input with @ so that the input is not a valid JSON.
+	return digest.FromString("@With TOC:" + tocIDInput).Hex()
 }
 
 // getConfigBlob exists only to let us retrieve the configuration blob so that the manifest package can dig