Skip to content

Commit

Permalink
Fix computation of image IDs
Browse files Browse the repository at this point in the history
- Take TOC digests into account even if we are converting from OCI
  to another format
- Compute the image ID based on whether we _used_ the TOC, not whether
  it just exists.
- Also don't forget to include the config digest in the ID computation...

Signed-off-by: Miloslav Trmač <[email protected]>
  • Loading branch information
mtrmac committed Feb 8, 2024
1 parent 00df874 commit d8d3103
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 51 deletions.
36 changes: 0 additions & 36 deletions manifest/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
compressiontypes "github.com/containers/image/v5/pkg/compression/types"
"github.com/containers/image/v5/types"
ociencspec "github.com/containers/ocicrypt/spec"
chunkedToc "github.com/containers/storage/pkg/chunked/toc"
"github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/specs-go"
imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1"
Expand Down Expand Up @@ -260,44 +259,9 @@ func (m *OCI1) ImageID(diffIDs []digest.Digest) (string, error) {
if err := m.Config.Digest.Validate(); err != nil {
return "", err
}

// If there is any layer that is using partial content, we calculate the image ID
// in a different way since the diffID cannot be validated as for regular pulled images.
for _, layer := range m.Layers {
toc, err := chunkedToc.GetTOCDigest(layer.Annotations)
if err != nil {
return "", fmt.Errorf("error looking up annotation for layer %q: %w", layer.Digest, err)
}
if toc != nil {
return m.calculateImageIDForPartialImage(diffIDs)
}
}

return m.Config.Digest.Hex(), nil
}

func (m *OCI1) calculateImageIDForPartialImage(diffIDs []digest.Digest) (string, error) {
newID := digest.Canonical.Digester()
for i, layer := range m.Layers {
diffID := diffIDs[i]
_, err := newID.Hash().Write([]byte(diffID.Hex()))
if err != nil {
return "", fmt.Errorf("error writing diffID %q: %w", diffID, err)
}
toc, err := chunkedToc.GetTOCDigest(layer.Annotations)
if err != nil {
return "", fmt.Errorf("error looking up annotation for layer %q: %w", layer.Digest, err)
}
if toc != nil {
_, err = newID.Hash().Write([]byte(toc.Hex()))
if err != nil {
return "", fmt.Errorf("error writing TOC %q: %w", toc, err)
}
}
}
return newID.Digest().Hex(), nil
}

// CanChangeLayerCompression returns true if we can compress/decompress layers with mimeType in the current image
// (and the code can handle that).
// NOTE: Even if this returns true, the relevant format might not accept all compression algorithms; the set of accepted
Expand Down
50 changes: 35 additions & 15 deletions storage/storage_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,29 +488,49 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string {
}
diffIDs = append([]digest.Digest{diffID}, diffIDs...)
}
case *manifest.Schema2:
case *manifest.Schema2, *manifest.OCI1:
// We know the ID calculation doesn't actually use the diffIDs, so we don't need to populate
// the diffID list.
case *manifest.OCI1:
for i, l := range m.Layers {
if l.Digest != "" {
// if a layer was pulled using a partial blob, we need to use the TOC digest
// to calculate the image ID, since the layer digest was not validated.
if tocDigest, found := s.indexToTOCDigest[i]; found {
diffIDs = append(diffIDs, tocDigest)
} else {
diffIDs = append(diffIDs, l.Digest)
}
}
}
default:
return ""
}
id, err := m.ImageID(diffIDs)

// We want to use the same ID for “the same” images, but without risking unwanted sharing / malicious image corruption.
//
// Traditionally that means the same ~config digest, as computed by m.ImageID;
// but if we pull a layer by TOC, we verify the layer against neither the (compressed) blob digest in the manifest,
// nor against the config’s RootFS.DiffIDs. We don’t really want to do either, to allow partial layer pulls where we never see
// most of the data.
//
// So, if a layer is pulled by TOC (and we do validate against the TOC), the fact that we used the TOC, and the value of the TOC,
// must enter into the image ID computation.
// But for images where no TOC was used, continue to use IDs computed the traditional way, to maximize image reuse on upgrades,
// and to introduce the changed behavior only when partial pulls are used.
//
// Note that it’s not 100% guaranteed that an image pulled by TOC uses an OCI manifest; consider
// (skopeo copy --format v2s2 docker://…/zstd-chunked-image containers-storage:… ). So this is not happening only in the OCI case above.
ordinaryImageID, err := m.ImageID(diffIDs)
if err != nil {
return ""
}
return id
tocIDInput := ""
hasLayerPulledByTOC := false
for i := range m.LayerInfos() {
layerValue := "" // An empty string is not a valid digest, so this is unambiguous with the TOC case.
tocDigest, ok := s.indexToTOCDigest[i] // "" if not a TOC
if ok {
hasLayerPulledByTOC = true
layerValue = tocDigest.String()
}
tocIDInput += layerValue + "|" // "|" can not be present in a TOC digest, so this is an unambiguous separator.
}

if !hasLayerPulledByTOC {
return ordinaryImageID
}
// ordinaryImageID is a digest of a config, which is a JSON value.
// To avoid the risk of collisions, start the input with @ so that the input is not a valid JSON.
return digest.FromString("@With TOC:" + tocIDInput).Hex()
}

// getConfigBlob exists only to let us retrieve the configuration blob so that the manifest package can dig
Expand Down

0 comments on commit d8d3103

Please sign in to comment.