From 12b5fd641b602c842e584a75d2905947e277daa4 Mon Sep 17 00:00:00 2001 From: Martin Fenner Date: Sat, 6 Apr 2024 18:24:39 +0200 Subject: [PATCH] adding API calls with tests --- crossref/crossref.go | 60 ++++++++++++++++++++++++++ crossref/crossref_test.go | 39 +++++++++++++++++ datacite/datacite.go | 81 +++++++++++++++++++++++++++++++++++ datacite/datacite_test.go | 41 ++++++++++++++++++ dateutils/dateutils_test.go | 2 +- doiutils/doiutils.go | 54 +++++++++++++++++++++++ doiutils/doiutils_test.go | 25 +++++++++++ inveniordm/inveniordm.go | 42 ++++++++++++++++++ inveniordm/inveniordm_test.go | 39 +++++++++++++++++ jsonfeed/jsonfeed.go | 43 +++++++++++++++++++ jsonfeed/jsonfeed_test.go | 37 ++++++++++++++++ metadata/metadata_test.go | 2 +- utils/utils.go | 24 ++++++++++- utils/utils_test.go | 24 +++++++++++ 14 files changed, 510 insertions(+), 3 deletions(-) create mode 100644 crossref/crossref.go create mode 100644 crossref/crossref_test.go create mode 100644 datacite/datacite.go create mode 100644 datacite/datacite_test.go create mode 100644 doiutils/doiutils.go create mode 100644 doiutils/doiutils_test.go create mode 100644 inveniordm/inveniordm.go create mode 100644 inveniordm/inveniordm_test.go create mode 100644 jsonfeed/jsonfeed.go create mode 100644 jsonfeed/jsonfeed_test.go diff --git a/crossref/crossref.go b/crossref/crossref.go new file mode 100644 index 0000000..9791a1c --- /dev/null +++ b/crossref/crossref.go @@ -0,0 +1,60 @@ +package crossref + +import ( + "commonmeta/doiutils" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// the envelope for the JSON response from the Crossref API +type Result struct { + Status string `json:"status"` + MessageType string `json:"message-type"` + MessageVersion string `json:"message-version"` + Message Record `json:"message"` +} + +// the JSON response containing the metadata for the DOI +type Record struct { + URL string `json:"URL"` + DOI string `json:"DOI"` + Type string `json:"type"` + Title []string `json:"title"` + Publisher string `json:"publisher"` + Volume string `json:"volume"` + Issue string `json:"issue"` + Page string `json:"page"` +} + +var result Result + +func GetCrossref(pid string) (Record, error) { + doi, err := doiutils.DOIFromUrl(pid) + if err != nil { + return Record{}, err + } + url := "https://api.crossref.org/works/" + doi + client := http.Client{ + Timeout: time.Second * 10, + } + resp, err := client.Get(url) + if err != nil { + return Record{}, err + } + if resp.StatusCode >= 400 { + return Record{}, fmt.Errorf("status code error: %d %s", resp.StatusCode, resp.Status) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return Record{}, err + } + err = json.Unmarshal(body, &result) + if err != nil { + fmt.Println("error:", err) + } + return result.Message, err +} diff --git a/crossref/crossref_test.go b/crossref/crossref_test.go new file mode 100644 index 0000000..123d2f4 --- /dev/null +++ b/crossref/crossref_test.go @@ -0,0 +1,39 @@ +package crossref_test + +import ( + "commonmeta/crossref" + "testing" +) + +func TestGetCrossref(t *testing.T) { + t.Parallel() + + type testCase struct { + doi string + want string + err error + } + + journalArticle := crossref.Record{ + URL: "https://api.crossref.org/works/10.7554/elife.01567", + DOI: "10.7554/elife.01567", + Publisher: "eLife Sciences Publications, Ltd", + } + postedContent := crossref.Record{ + URL: "https://api.crossref.org/works/10.1101/097196", + DOI: "10.1101/097196", + Publisher: "Cold Spring Harbor Laboratory", + } + + testCases := []testCase{ + {doi: journalArticle.DOI, want: journalArticle.Publisher, err: nil}, + {doi: postedContent.DOI, want: postedContent.Publisher, err: nil}, + } + for _, tc := range testCases { + got, err := crossref.GetCrossref(tc.doi) + if tc.want != got.Publisher { + t.Errorf("Get Crossref(%v): want %v, got %v, error %v", + tc.doi, tc.want, got, err) + } + } +} diff --git a/datacite/datacite.go b/datacite/datacite.go new file mode 100644 index 0000000..062e0bd --- /dev/null +++ b/datacite/datacite.go @@ -0,0 +1,81 @@ +package datacite + +import ( + "commonmeta/doiutils" + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// the envelope for the JSON response from the DataCite API +type Result struct { + Data Record `json:"data"` +} + +// the JSON response containing the DataCite metadata +type Record struct { + ID string `json:"id"` + Type string `json:"type"` + Attributes Attributes `json:"attributes"` +} + +type Attributes struct { + DOI string `json:"doi"` + Prefix string `json:"prefix"` + Suffix string `json:"suffix"` + Creators []Creator `json:"creators"` + Publisher string `json:"publisher"` + Container Container `json:"container"` + PublicationYear int `json:"publicationYear"` + Titles []Title `json:"titles"` + Url string `json:"url"` +} + +type Container struct { + ID string `json:"id"` + Name string `json:"name"` +} + +type Creator struct { + Type string `json:"type"` + Identifier string `json:"identifier"` + IdentifierType string `json:"identifierType"` + Name string `json:"name"` +} + +type Title struct { + Title string `json:"title"` + Language string `json:"language"` +} + +var result Result + +func GetDatacite(pid string) (Record, error) { + doi, err := doiutils.DOIFromUrl(pid) + if err != nil { + return Record{}, err + } + url := "https://api.datacite.org/dois/" + doi + client := http.Client{ + Timeout: time.Second * 10, + } + resp, err := client.Get(url) + if err != nil { + return Record{}, err + } + if resp.StatusCode != 200 { + return Record{}, fmt.Errorf("status code error: %d %s", resp.StatusCode, resp.Status) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return Record{}, err + } + err = json.Unmarshal(body, &result) + if err != nil { + fmt.Println("error:", err) + } + return result.Data, err +} diff --git a/datacite/datacite_test.go b/datacite/datacite_test.go new file mode 100644 index 0000000..c85c181 --- /dev/null +++ b/datacite/datacite_test.go @@ -0,0 +1,41 @@ +package datacite_test + +import ( + "commonmeta/datacite" + "testing" +) + +func TestGetDataCite(t *testing.T) { + t.Parallel() + + type testCase struct { + id string + want string + err error + } + + publication := datacite.Record{ + Attributes: datacite.Attributes{ + DOI: "https://doi.org/10.5281/zenodo.5244404", + Url: "https://zenodo.org/record/5244404", + }, + } + presentation := datacite.Record{ + Attributes: datacite.Attributes{ + DOI: "10.5281/zenodo.8173303", + Url: "https://zenodo.org/record/8173303", + }, + } + + testCases := []testCase{ + {id: presentation.Attributes.DOI, want: presentation.Attributes.Url, err: nil}, + {id: publication.Attributes.DOI, want: publication.Attributes.Url, err: nil}, + } + for _, tc := range testCases { + got, err := datacite.GetDatacite(tc.id) + if tc.want != got.Attributes.Url { + t.Errorf("InvenioRDM ID(%v): want %v, got %v, error %v", + tc.id, tc.want, got, err) + } + } +} diff --git a/dateutils/dateutils_test.go b/dateutils/dateutils_test.go index 435d475..6976295 100644 --- a/dateutils/dateutils_test.go +++ b/dateutils/dateutils_test.go @@ -1,7 +1,7 @@ package dateutils_test import ( - "dateutils" + "commonmeta/dateutils" "testing" ) diff --git a/doiutils/doiutils.go b/doiutils/doiutils.go new file mode 100644 index 0000000..cd2dec9 --- /dev/null +++ b/doiutils/doiutils.go @@ -0,0 +1,54 @@ +package doiutils + +import ( + "net/url" + "strings" +) + +// extract DOI from URL +func DOIFromUrl(str string) (string, error) { + u, err := url.Parse(str) + if err != nil { + return "", err + } + if u.Host == "" { + return str, nil + } + if u.Host != "doi.org" || !strings.HasPrefix(u.Path, "/10.") { + return "", nil + } + return strings.TrimLeft(u.Path, "/"), nil +} + +// def doi_from_url(url: Optional[str]) -> Optional[str]: +// """Return a DOI from a URL""" +// if url is None: +// return None + +// f = furl(url) +// # check for allowed scheme if string is a URL +// if f.host is not None and f.scheme not in ["http", "https", "ftp"]: +// return None + +// # url is for a short DOI +// if f.host == "doi.org" and not f.path.segments[0].startswith("10."): +// return short_doi_as_doi(url) + +// # special rules for specific hosts +// if f.host == "onlinelibrary.wiley.com": +// if f.path.segments[-1] in ["epdf"]: +// f.path.segments.pop() +// elif f.host == "www.plosone.org": +// if ( +// f.path.segments[-1] in ["fetchobject.action"] +// and f.args.get("uri", None) is not None +// ): +// f.path = f.args.get("uri") +// path = str(f.path) +// match = re.search( +// r"(10\.\d{4,5}/.+)\Z", +// path, +// ) +// if match is None: +// return None +// return match.group(0).lower() diff --git a/doiutils/doiutils_test.go b/doiutils/doiutils_test.go new file mode 100644 index 0000000..af05bdc --- /dev/null +++ b/doiutils/doiutils_test.go @@ -0,0 +1,25 @@ +package doiutils_test + +import ( + "commonmeta/doiutils" + "testing" +) + +func TestDOIFromUrl(t *testing.T) { + t.Parallel() + type testCase struct { + input string + want string + } + testCases := []testCase{ + {input: "https://doi.org/10.7554/elife.01567", want: "10.7554/elife.01567"}, + {input: "10.1101/097196", want: "10.1101/097196"}, + } + for _, tc := range testCases { + got, err := doiutils.DOIFromUrl(tc.input) + if tc.want != got { + t.Errorf("DOI from Url(%v): want %v, got %v, error %v", + tc.input, tc.want, got, err) + } + } +} diff --git a/inveniordm/inveniordm.go b/inveniordm/inveniordm.go new file mode 100644 index 0000000..8cd3c57 --- /dev/null +++ b/inveniordm/inveniordm.go @@ -0,0 +1,42 @@ +package inveniordm + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// the JSON response containing the InvenioRDM metadata +type Record struct { + ID string `json:"id"` + DOI string `json:"doi"` + Title string `json:"title"` +} + +var result Record + +func GetInvenioRDM(pid string) (Record, error) { + client := http.Client{ + Timeout: time.Second * 10, + } + url := "https://zenodo.org/api/records/" + pid + resp, err := client.Get(url) + if err != nil { + return Record{}, err + } + if resp.StatusCode != 200 { + return Record{}, fmt.Errorf("status code error: %d %s", resp.StatusCode, resp.Status) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return Record{}, err + } + err = json.Unmarshal(body, &result) + if err != nil { + fmt.Println("error:", err) + } + return result, err +} diff --git a/inveniordm/inveniordm_test.go b/inveniordm/inveniordm_test.go new file mode 100644 index 0000000..88ca6b7 --- /dev/null +++ b/inveniordm/inveniordm_test.go @@ -0,0 +1,39 @@ +package inveniordm_test + +import ( + "commonmeta/inveniordm" + "testing" +) + +func TestGetInvenioRDM(t *testing.T) { + t.Parallel() + + type testCase struct { + id string + want string + err error + } + + publication := inveniordm.Record{ + ID: "5244404", + DOI: "10.5281/zenodo.5244404", + Title: "The Origins of SARS-CoV-2: A Critical Review", + } + presentation := inveniordm.Record{ + ID: "8173303", + DOI: "10.5281/zenodo.8173303", + Title: "11 July 2023 (Day 2) CERN – NASA Open Science Summit Sketch Notes", + } + + testCases := []testCase{ + {id: presentation.ID, want: presentation.Title, err: nil}, + {id: publication.ID, want: publication.Title, err: nil}, + } + for _, tc := range testCases { + got, err := inveniordm.GetInvenioRDM(tc.id) + if tc.want != got.Title { + t.Errorf("InvenioRDM ID(%v): want %v, got %v, error %v", + tc.id, tc.want, got, err) + } + } +} diff --git a/jsonfeed/jsonfeed.go b/jsonfeed/jsonfeed.go new file mode 100644 index 0000000..6e9d73c --- /dev/null +++ b/jsonfeed/jsonfeed.go @@ -0,0 +1,43 @@ +package jsonfeed + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// the JSON response containing the JSON Feed metadata +type Record struct { + ID string `json:"id"` + DOI string `json:"doi"` + Title string `json:"title"` + Url string `json:"url"` +} + +var result Record + +func GetJsonfeedItem(pid string) (Record, error) { + client := http.Client{ + Timeout: time.Second * 10, + } + url := "https://api.rogue-scholar.org/posts/" + pid + resp, err := client.Get(url) + if err != nil { + return Record{}, err + } + if resp.StatusCode != 200 { + return Record{}, fmt.Errorf("status code error: %d %s", resp.StatusCode, resp.Status) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if err != nil { + return Record{}, err + } + err = json.Unmarshal(body, &result) + if err != nil { + fmt.Println("error:", err) + } + return result, err +} diff --git a/jsonfeed/jsonfeed_test.go b/jsonfeed/jsonfeed_test.go new file mode 100644 index 0000000..78a7d3e --- /dev/null +++ b/jsonfeed/jsonfeed_test.go @@ -0,0 +1,37 @@ +package jsonfeed_test + +import ( + "commonmeta/jsonfeed" + "testing" +) + +func TestGetJsonfeedItem(t *testing.T) { + t.Parallel() + + type testCase struct { + id string + want string + err error + } + + ghostPost := jsonfeed.Record{ + ID: "5adbb6d4-1fe2-4da2-8cf4-c897f88a02d9", + Title: "INFORMATE: Where Are the Data?", + } + wordpressPost := jsonfeed.Record{ + ID: "4e4bf150-751f-4245-b4ca-fe69e3c3bb24", + Title: "New paper: Curtice et al. (2023) on the first Haplocanthosaurus from Dry Mesa", + } + + testCases := []testCase{ + {id: ghostPost.ID, want: ghostPost.Title, err: nil}, + {id: wordpressPost.ID, want: wordpressPost.Title, err: nil}, + } + for _, tc := range testCases { + got, err := jsonfeed.GetJsonfeedItem(tc.id) + if tc.want != got.Title { + t.Errorf("JSON Feed ID(%v): want %v, got %v, error %v", + tc.id, tc.want, got, err) + } + } +} diff --git a/metadata/metadata_test.go b/metadata/metadata_test.go index b38bd63..d9263b6 100644 --- a/metadata/metadata_test.go +++ b/metadata/metadata_test.go @@ -1,7 +1,7 @@ package metadata_test import ( - "metadata/metadata" + "commonmeta/metadata" "testing" ) diff --git a/utils/utils.go b/utils/utils.go index 4b0a673..79d08bf 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -1,6 +1,28 @@ package utils -import "fmt" +import ( + "fmt" + "net/url" + "strings" +) + +// Normalize URL +func NormalizeUrl(str string, secure bool, lower bool) (string, error) { + u, err := url.Parse(str) + if err != nil { + return "", err + } + if u.Path[len(u.Path)-1] == '/' { + u.Path = u.Path[:len(u.Path)-1] + } + if secure && u.Scheme == "http" { + u.Scheme = "https" + } + if lower { + return strings.ToLower(u.String()), nil + } + return u.String(), nil +} // ISSN as URL func IssnAsUrl(issn string) string { diff --git a/utils/utils_test.go b/utils/utils_test.go index 3f0dead..b57e7c7 100644 --- a/utils/utils_test.go +++ b/utils/utils_test.go @@ -5,6 +5,30 @@ import ( "testing" ) +func TestNormalizeUrl(t *testing.T) { + t.Parallel() + type testCase struct { + input string + secure bool + lower bool + want string + } + testCases := []testCase{ + {input: "http://elifesciences.org/articles/91729/", secure: true, lower: true, want: "https://elifesciences.org/articles/91729"}, + {input: "https://api.crossref.org/works/10.1101/097196", secure: true, lower: true, want: "https://api.crossref.org/works/10.1101/097196"}, + {input: "http://elifesciences.org/articles/91729/", secure: false, lower: true, want: "http://elifesciences.org/articles/91729"}, + {input: "https://elifesciences.org/Articles/91729/", secure: true, lower: false, want: "https://elifesciences.org/Articles/91729"}, + {input: "http://elifesciences.org/Articles/91729/", secure: false, lower: false, want: "http://elifesciences.org/Articles/91729"}, + } + for _, tc := range testCases { + got, err := utils.NormalizeUrl(tc.input, tc.secure, tc.lower) + if tc.want != got { + t.Errorf("Normalize URL(%v): want %v, got %v, error %v", + tc.input, tc.want, got, err) + } + } +} + func TestIssnAsUrl(t *testing.T) { t.Parallel() type testCase struct {