From e7dfaa40625eb17be94965a39add2e88b39e9dd8 Mon Sep 17 00:00:00 2001 From: Suyash Kumar Date: Sat, 1 Apr 2023 22:48:09 -0400 Subject: [PATCH] Add basic Parser.Next based benchmark --- parse.go | 11 ++++++----- read.go | 59 +++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/parse.go b/parse.go index ef15c863..3034a750 100644 --- a/parse.go +++ b/parse.go @@ -72,7 +72,7 @@ func parseInternal(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame, } for !p.reader.rawReader.IsLimitExhausted() { - _, err := p.Next() + elem, err := p.Next() if err != nil { if errors.Is(err, io.EOF) { // exiting on EOF @@ -83,6 +83,7 @@ func parseInternal(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame, // exit on error return p.dataset, err } + p.dataset.Elements = append(p.dataset.Elements, elem) } // Close the frameChannel if needed @@ -130,8 +131,9 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame, optSet := toParseOptSet(opts...) p := Parser{ reader: &reader{ - rawReader: dicomio.NewReader(bufio.NewReader(in), binary.LittleEndian, bytesToRead), - opts: optSet, + rawReader: dicomio.NewReader(bufio.NewReader(in), binary.LittleEndian, bytesToRead), + opts: optSet, + datasetCtx: &Dataset{}, }, frameChannel: frameChannel, } @@ -182,7 +184,7 @@ func (p *Parser) Next() (*Element, error) { } return nil, ErrorEndOfDICOM } - elem, err := p.reader.readElement(&p.dataset, p.frameChannel) + elem, err := p.reader.ReadElement(p.frameChannel) if err != nil { // TODO: tolerate some kinds of errors and continue parsing return nil, err @@ -201,7 +203,6 @@ func (p *Parser) Next() (*Element, error) { p.reader.rawReader.SetCodingSystem(cs) } - p.dataset.Elements = append(p.dataset.Elements, elem) return elem, nil } diff --git a/read.go b/read.go index 337735f9..f8cfb74b 100644 --- a/read.go +++ b/read.go @@ -41,6 +41,11 @@ var ( type reader struct { rawReader dicomio.Reader opts parseOptSet + // datasetCtx is the top-level context Dataset holding only elements that + // may be needed to parse future elements (e.g. like tag.Rows). See + // datasetContextElementPassList for elements that will be automatically + // included in this context. + datasetCtx *Dataset } func (r *reader) readTag() (*tag.Tag, error) { @@ -162,7 +167,7 @@ func (r *reader) readHeader() ([]*Element, error) { // Must read metadata as LittleEndian explicit VR // Read the length of the metadata elements: (0002,0000) MetaElementGroupLength - maybeMetaLen, err := r.readElement(nil, nil) + maybeMetaLen, err := r.readElementInternal(nil, nil) if err != nil { return nil, err } @@ -187,7 +192,7 @@ func (r *reader) readHeader() ([]*Element, error) { } defer r.rawReader.PopLimit() for !r.rawReader.IsLimitExhausted() { - elem, err := r.readElement(nil, nil) + elem, err := r.readElementInternal(nil, nil) if err != nil { // TODO: see if we can skip over malformed elements somehow return nil, err @@ -214,7 +219,7 @@ func (r *reader) readHeader() ([]*Element, error) { if group != 0x0002 { break } - elem, err := r.readElement(nil, nil) + elem, err := r.readElementInternal(nil, nil) if err != nil { // TODO: see if we can skip over malformed elements somehow return nil, err @@ -506,7 +511,7 @@ func (r *reader) readSequence(t tag.Tag, vr string, vl uint32, d *Dataset) (Valu seqElements := &Dataset{} if vl == tag.VLUndefinedLength { for { - subElement, err := r.readElement(seqElements, nil) + subElement, err := r.readElementInternal(seqElements, nil) if err != nil { // Stop reading due to error log.Println("error reading subitem, ", err) @@ -533,7 +538,7 @@ func (r *reader) readSequence(t tag.Tag, vr string, vl uint32, d *Dataset) (Valu return nil, err } for !r.rawReader.IsLimitExhausted() { - subElement, err := r.readElement(seqElements, nil) + subElement, err := r.readElementInternal(seqElements, nil) if err != nil { // TODO: option to ignore errors parsing subelements? return nil, err @@ -559,7 +564,7 @@ func (r *reader) readSequenceItem(t tag.Tag, vr string, vl uint32, d *Dataset) ( if vl == tag.VLUndefinedLength { for { - subElem, err := r.readElement(&seqElements, nil) + subElem, err := r.readElementInternal(&seqElements, nil) if err != nil { return nil, err } @@ -577,7 +582,7 @@ func (r *reader) readSequenceItem(t tag.Tag, vr string, vl uint32, d *Dataset) ( } for !r.rawReader.IsLimitExhausted() { - subElem, err := r.readElement(&seqElements, nil) + subElem, err := r.readElementInternal(&seqElements, nil) if err != nil { return nil, err } @@ -733,17 +738,24 @@ func (r *reader) readInt(t tag.Tag, vr string, vl uint32) (Value, error) { return retVal, err } -// readElement reads the next element. If the next element is a sequence element, +// ReadElement reads the next element. This is the top level function that should +// be called to read elements. If the next element is a sequence element, +// it may result in reading a collection of Elements. +func (r *reader) ReadElement(fc chan<- *frame.Frame) (*Element, error) { + return r.readElementInternal(r.datasetCtx, fc) +} + +// readElementInternal reads the next element. If the next element is a sequence element, // it may result in a collection of Elements. It takes a pointer to the Dataset of // elements read so far, since previously read elements may be needed to parse // certain Elements (like native PixelData). If the Dataset is nil, it is // treated as an empty Dataset. -func (r *reader) readElement(d *Dataset, fc chan<- *frame.Frame) (*Element, error) { +func (r *reader) readElementInternal(datasetCtx *Dataset, fc chan<- *frame.Frame) (*Element, error) { t, err := r.readTag() if err != nil { return nil, err } - debug.Logf("readElement: tag: %s", t.String()) + debug.Logf("readElementInternal: tag: %s", t.String()) readImplicit := r.rawReader.IsImplicit() if *t == tag.Item { @@ -755,20 +767,24 @@ func (r *reader) readElement(d *Dataset, fc chan<- *frame.Frame) (*Element, erro if err != nil { return nil, err } - debug.Logf("readElement: vr: %s", vr) + debug.Logf("readElementInternal: vr: %s", vr) vl, err := r.readVL(readImplicit, *t, vr) if err != nil { return nil, err } - debug.Logf("readElement: vl: %d", vl) + debug.Logf("readElementInternal: vl: %datasetCtx", vl) - val, err := r.readValue(*t, vr, vl, readImplicit, d, fc) + val, err := r.readValue(*t, vr, vl, readImplicit, datasetCtx, fc) if err != nil { log.Println("error reading value ", err) return nil, err } + elem := &Element{Tag: *t, ValueRepresentation: tag.GetVRKind(*t, vr), RawValueRepresentation: vr, ValueLength: vl, Value: val} + addToContextIfNeeded(datasetCtx, elem) + return elem, nil + return &Element{Tag: *t, ValueRepresentation: tag.GetVRKind(*t, vr), RawValueRepresentation: vr, ValueLength: vl, Value: val}, nil } @@ -829,3 +845,20 @@ func (r *reader) readRawItem(shouldSkip bool) ([]byte, bool, error) { func (r *reader) moreToRead() bool { return !r.rawReader.IsLimitExhausted() } + +// datasetContextElementPassList holds the set of DICOM Element Tags that should +// be included in the context Dataset. These are elements that may be needed to +// read downstream elements in the future. +var datasetContextElementPassList = tag.Tags{ + &tag.Rows, + &tag.Columns, + &tag.NumberOfFrames, + &tag.BitsAllocated, + &tag.SamplesPerPixel, +} + +func addToContextIfNeeded(datasetCtx *Dataset, e *Element) { + if datasetContextElementPassList.Contains(&e.Tag) { + datasetCtx.Elements = append(datasetCtx.Elements, e) + } +}