Skip to content

Commit

Permalink
Parse: If transfer syntax is missing, attempt to infer it by peeking …
Browse files Browse the repository at this point in the history
…next 100 bytes. (#330)

This PR attempts to infer missing transfer syntax in dicoms during Parse.

Specifically: 
* When transfer syntax is missing in dicom metadata, attempt to infer the correct transfer syntax by peeking the next 100bytes and trying to read an element without an error. This isn't foolproof, but one option to start with.
* This also makes test updates to support testfiles/ that may not have PixelData.
* This also introduces a write option to write dicoms without transfer syntax elements, in order to write some "roundtrip" unit tests for this behavior on Parse.

I was able to successfully test using some test data from #327, but I need to do some more investigation to see if we can safely add those to our test files (licensing and otherwise).

Things to consider in the future:
* Try deflated little endian explicit as well. 
* Peek more/less than the initial 100 bytes, or move away from a fixed peek.
  • Loading branch information
suyashkumar authored Jun 10, 2024
1 parent 0b4bb9f commit b8bf692
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 42 deletions.
64 changes: 58 additions & 6 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ package dicom

import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"os"

Expand Down Expand Up @@ -170,14 +172,64 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame,
if tsStr == uid.DeflatedExplicitVRLittleEndian {
p.reader.rawReader.SetDeflate()
}
} else {
// No transfer syntax found, warn the user we're proceeding with the
// default Little Endian implicit.
debug.Log("WARN: could not find transfer syntax uid in metadata, proceeding with little endian implicit")
p.SetTransferSyntax(bo, implicit)
return &p, nil
}
p.SetTransferSyntax(bo, implicit)

return &p, nil
// No transfer syntax found, so let's try to infer the transfer syntax by
// trying to read the next element under various transfer syntaxes.
next100, err := p.reader.rawReader.Peek(100)
if errors.Is(err, io.EOF) {
// DICOM is shorter than 100 bytes.
return nil, fmt.Errorf("dicom with missing transfer syntax metadata is shorter than 100 bytes, so cannot infer transfer syntax")
}

syntaxes := []struct {
name string
bo binary.ByteOrder
implicit bool
}{
{
name: "Little Endian Implicit",
bo: binary.LittleEndian,
implicit: true,
},
{
name: "Big Endian Explicit",
bo: binary.BigEndian,
implicit: false,
},
{
name: "Little Endian Explicit",
bo: binary.LittleEndian,
implicit: false,
},
}

for _, syntax := range syntaxes {
if canReadElementFromBytes(next100, optSet, syntax.bo, syntax.implicit) {
debug.Logf("WARN: could not find transfer syntax uid in metadata, proceeding with %v", syntax.name)
p.SetTransferSyntax(syntax.bo, syntax.implicit)
return &p, nil
}
}
// TODO(https://github.com/suyashkumar/dicom/issues/329): consider trying
// deflated parsing as a fallback as well.
return &p, errors.New("dicom missing transfer syntax uid in metadata, and it was not possible to successfully infer it using the next 100 bytes of the dicom")
}

func canReadElementFromBytes(buf []byte, optSet parseOptSet, bo binary.ByteOrder, implicit bool) bool {
next100Reader := bytes.NewReader(buf)
subR := &reader{
rawReader: dicomio.NewReader(bufio.NewReader(next100Reader), bo, int64(len(buf))),
opts: optSet,
}
subR.rawReader.SetTransferSyntax(bo, implicit)
_, err := subR.readElement(nil, nil)
if err == nil {
return true
}
return false
}

// Next parses and returns the next top-level element from the DICOM this Parser points to.
Expand Down
59 changes: 59 additions & 0 deletions parse_internal_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
package dicom

import (
"bytes"
"errors"
"os"
"strings"
"testing"

"github.com/suyashkumar/dicom/pkg/tag"
)

// parse_internal_test.go holds tests that must exist in the dicom package (as
// opposed to dicom_test), in order to access internal entities.

// TestParseUntilEOFConformsToParse runs both the dicom.ParseUntilEOF and the dicom.Parse APIs against each
// testdata file and ensures the outputs are the same.
// This test lives in parse_internal_test.go because this test cannot live in the dicom_test package, due
Expand Down Expand Up @@ -48,6 +55,58 @@ func TestParseUntilEOFConformsToParse(t *testing.T) {
}
}

func TestParse_InfersMissingTransferSyntax(t *testing.T) {
dsWithMissingTS := Dataset{Elements: []*Element{
mustNewElement(tag.MediaStorageSOPClassUID, []string{"1.2.840.10008.5.1.4.1.1.1.2"}),
mustNewElement(tag.MediaStorageSOPInstanceUID, []string{"1.2.3.4.5.6.7"}),
mustNewElement(tag.PatientName, []string{"Bob", "Jones"}),
mustNewElement(tag.Rows, []int{128}),
mustNewElement(tag.FloatingPointValue, []float64{128.10}),
mustNewElement(tag.DimensionIndexPointer, []int{32, 36950}),
mustNewElement(tag.RedPaletteColorLookupTableData, make([]byte, 200)),
}}

cases := []struct {
name string
overrideTransferSyntax string
}{
{
name: "Little Endian Implicit",
overrideTransferSyntax: "1.2.840.10008.1.2",
},
{
name: "Little Endian Explicit",
overrideTransferSyntax: "1.2.840.10008.1.2.1",
},
{
name: "Big Endian Explicit",
overrideTransferSyntax: "1.2.840.10008.1.2.2",
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// Write out Dataset with OverrideMissingTransferSyntax option _and_
// the skipWritingTransferSyntaxForTests to ensure no Transfer Syntax
// element is written to the test dicom. The test later verifies
// that no Transfer Syntax element was written to the metadata.
writtenDICOM := &bytes.Buffer{}
if err := Write(writtenDICOM, dsWithMissingTS, OverrideMissingTransferSyntax(tc.overrideTransferSyntax), skipWritingTransferSyntaxForTests()); err != nil {
t.Errorf("Write(OverrideMissingTransferSyntax(%v)) returned unexpected error: %v", tc.overrideTransferSyntax, err)
}

parsedDS, err := ParseUntilEOF(writtenDICOM, nil)
if err != nil {
t.Fatalf("ParseUntilEOF returned unexpected error when reading written dataset back in: %v", err)
}
_, err = parsedDS.FindElementByTag(tag.TransferSyntaxUID)
if !errors.Is(err, ErrorElementNotFound) {
t.Fatalf("expected test dicom dataset to be missing explicit TransferSyntaxUID tag, but found one. got: %v, want: ErrorElementNotFound", err)
}
})
}
}

func readTestdataFile(t *testing.T, name string) *os.File {
dcm, err := os.Open("./testdata/" + name)
if err != nil {
Expand Down
46 changes: 26 additions & 20 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,37 +100,43 @@ func TestParseFile_SkipPixelData(t *testing.T) {
runForEveryTestFile(t, func(t *testing.T, filename string) {
dataset, err := dicom.ParseFile(filename, nil, dicom.SkipPixelData())
if err != nil {
t.Errorf("Unexpected error parsing dataset: %v", dataset)
t.Errorf("Unexpected error parsing dataset: %v, dataset: %v", err, dataset)
}
// If PixelData present in this DICOM, check if it's populated
// correctly. The current test assumption is that if PixelData is
// missing, it was not originally in the dicom (which we should
// consider revisiting).
el, err := dataset.FindElementByTag(tag.PixelData)
if err != nil {
t.Errorf("Unexpected error when finding PixelData in Dataset: %v", err)
}
pixelData := dicom.MustGetPixelDataInfo(el.Value)
if !pixelData.IntentionallySkipped {
t.Errorf("Expected pixelData.IntentionallySkipped=true, got false")
}
if got := len(pixelData.Frames); got != 0 {
t.Errorf("unexpected frames length. got: %v, want: %v", got, 0)
if err == nil {
pixelData := dicom.MustGetPixelDataInfo(el.Value)
if !pixelData.IntentionallySkipped {
t.Errorf("Expected pixelData.IntentionallySkipped=true, got false")
}
if got := len(pixelData.Frames); got != 0 {
t.Errorf("unexpected frames length. got: %v, want: %v", got, 0)
}
}
})
})
t.Run("WithNOSkipPixelData", func(t *testing.T) {
runForEveryTestFile(t, func(t *testing.T, filename string) {
dataset, err := dicom.ParseFile(filename, nil)
if err != nil {
t.Errorf("Unexpected error parsing dataset: %v", dataset)
t.Errorf("Unexpected error parsing dataset: %v, dataset: %v", err, dataset)
}
// If PixelData present in this DICOM, check if it's populated
// correctly. The current test assumption is that if PixelData is
// missing, it was not originally in the dicom (which we should
// consider revisiting).
el, err := dataset.FindElementByTag(tag.PixelData)
if err != nil {
t.Errorf("Unexpected error when finding PixelData in Dataset: %v", err)
}
pixelData := dicom.MustGetPixelDataInfo(el.Value)
if pixelData.IntentionallySkipped {
t.Errorf("Expected pixelData.IntentionallySkipped=false when SkipPixelData option not present, got true")
}
if len(pixelData.Frames) == 0 {
t.Errorf("unexpected frames length when SkipPixelData=false. got: %v, want: >0", len(pixelData.Frames))
if err == nil {
pixelData := dicom.MustGetPixelDataInfo(el.Value)
if pixelData.IntentionallySkipped {
t.Errorf("Expected pixelData.IntentionallySkipped=false when SkipPixelData option not present, got true")
}
if len(pixelData.Frames) == 0 {
t.Errorf("unexpected frames length when SkipPixelData=false. got: %v, want: >0", len(pixelData.Frames))
}
}
})
})
Expand Down
46 changes: 30 additions & 16 deletions write.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,24 @@ func OverrideMissingTransferSyntax(transferSyntaxUID string) WriteOption {
}
}

// skipWritingTransferSyntaxForTests is a test WriteOption that cause Write to skip
// writing the transfer syntax uid element in the DICOM metadata. When used in
// combination with OverrideMissingTransferSyntax, this can be used to set the
// TransferSyntax for the written dataset without writing the actual transfer
// syntax element to the metadata.
func skipWritingTransferSyntaxForTests() WriteOption {
return func(set *writeOptSet) {
set.skipWritingTransferSyntaxForTests = true
}
}

// writeOptSet represents the flattened option set after all WriteOptions have been applied.
type writeOptSet struct {
skipVRVerification bool
skipValueTypeVerification bool
defaultMissingTransferSyntax bool
overrideMissingTransferSyntaxUID string
skipVRVerification bool
skipValueTypeVerification bool
defaultMissingTransferSyntax bool
overrideMissingTransferSyntaxUID string
skipWritingTransferSyntaxForTests bool
}

func (w *writeOptSet) validate() error {
Expand Down Expand Up @@ -203,21 +215,23 @@ func writeFileHeader(w *dicomio.Writer, ds *Dataset, metaElems []*Element, opts
return err
}

err = writeMetaElem(subWriter, tag.TransferSyntaxUID, ds, &tagsUsed, opts)
if !opts.skipWritingTransferSyntaxForTests {
err = writeMetaElem(subWriter, tag.TransferSyntaxUID, ds, &tagsUsed, opts)

if errors.Is(err, ErrorElementNotFound) && opts.defaultMissingTransferSyntax {
// Write the default transfer syntax
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{uid.ImplicitVRLittleEndian}), opts); err != nil {
return err
}
} else if errors.Is(err, ErrorElementNotFound) && opts.overrideMissingTransferSyntaxUID != "" {
// Write the override transfer syntax
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{opts.overrideMissingTransferSyntaxUID}), opts); err != nil {
if errors.Is(err, ErrorElementNotFound) && opts.defaultMissingTransferSyntax {
// Write the default transfer syntax
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{uid.ImplicitVRLittleEndian}), opts); err != nil {
return err
}
} else if errors.Is(err, ErrorElementNotFound) && opts.overrideMissingTransferSyntaxUID != "" {
// Write the override transfer syntax
if err = writeElement(subWriter, mustNewElement(tag.TransferSyntaxUID, []string{opts.overrideMissingTransferSyntaxUID}), opts); err != nil {
return err
}
} else if err != nil {
// Return the error if none of the above conditions/overrides apply.
return err
}
} else if err != nil {
// Return the error if none of the above conditions/overrides apply.
return err
}

for _, elem := range metaElems {
Expand Down

0 comments on commit b8bf692

Please sign in to comment.