From e54cad8f48e0c884e5fa9bd88c7f7126d17a59d5 Mon Sep 17 00:00:00 2001
From: Carlos del Prado Mota <pradomota90@gmail.com>
Date: Tue, 16 Apr 2024 17:41:51 +0200
Subject: [PATCH] fix: sdk deps & nor-informed generator (#37)

* fix: sdk deps & nor-informed generator

* fix: generator constructor
---
 app/build.gradle                              |   1 +
 .../utils/avro/NotInformedUtilsBridge.java    |  15 +
 .../avro/random/generator/Generator.java      |   6 +-
 .../confluent/avro/random/generator/Main.java |  12 +-
 .../CustomExtensionsGeneratorTest.java        |   6 +-
 lanuza/base.inc                               | 112 +++---
 lanuza/pipelines/build.sh                     |   4 +-
 lanuza/pipelines/run.sh                       |   4 +-
 lanuza/utils/artifact.inc                     |  14 +
 lanuza/utils/docker-compose.inc               |   6 +-
 lanuza/utils/docker.inc                       |  32 +-
 lanuza/utils/github.inc                       |  11 +-
 lanuza/utils/workspace.inc                    | 200 ++++++----
 lanuza/workspaces/artifacts.sh                |  22 +-
 lanuza/workspaces/list.sh                     |  14 +-
 lanuza/workspaces/modified.sh                 |  58 ++-
 lanuza/workspaces/output-files.sh             |   8 +-
 lanuza/workspaces/path.sh                     |  44 +++
 lanuza/workspaces/status.sh                   |   3 +-
 lanuza/workspaces/tags.sh                     |  38 +-
 lanuza/workspaces/version.sh                  |  44 +++
 .../v5.9.0/Contact_Center_CDR_5.9.0.avsc      | 351 ++++++++++++++++++
 .../Contact_Center_CDR/v5.9.0/extensions.json |  25 ++
 23 files changed, 844 insertions(+), 186 deletions(-)
 create mode 100644 app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java
 create mode 100755 lanuza/workspaces/path.sh
 create mode 100755 lanuza/workspaces/version.sh
 create mode 100644 samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc
 create mode 100644 samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json

diff --git a/app/build.gradle b/app/build.gradle
index 08e3b20..3a27996 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -38,6 +38,7 @@ dependencies {
     implementation group: 'com.googlecode.libphonenumber', name: 'libphonenumber', version: '8.12.25-4p'
     implementation group: 'com.github.javafaker', name: 'javafaker', version: '1.0.2'
     implementation group: 'com.googlecode.java-ipv6', name: 'java-ipv6', version: '0.17'
+    implementation group: 'com.google.code.gson', name: 'gson', version: '2.10.1'
 
     testImplementation group: 'org.apache.spark', name: 'spark-sql_2.12', version: '3.3.2'
     testImplementation group: 'commons-validator', name: 'commons-validator', version: '1.6'
diff --git a/app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java b/app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java
new file mode 100644
index 0000000..10fa5a4
--- /dev/null
+++ b/app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java
@@ -0,0 +1,15 @@
+package com.telefonica.baikal.utils.avro;
+
+public class NotInformedUtilsBridge implements NotInformedUtils {
+
+    private static NotInformedUtilsBridge instance = null;
+
+    public static synchronized NotInformedUtilsBridge getInstance() {
+       if (instance == null) {
+           instance = new NotInformedUtilsBridge();
+           return instance;
+       }
+       return instance;
+    }
+
+}
diff --git a/app/src/main/java/io/confluent/avro/random/generator/Generator.java b/app/src/main/java/io/confluent/avro/random/generator/Generator.java
index 0e37ab3..71df76f 100644
--- a/app/src/main/java/io/confluent/avro/random/generator/Generator.java
+++ b/app/src/main/java/io/confluent/avro/random/generator/Generator.java
@@ -18,7 +18,7 @@
 
 import com.mifmif.common.regex.Generex;
 import com.telefonica.baikal.avro.types.CustomLogicalTypes;
-import com.telefonica.baikal.utils.avro.BaikalAvroUtils;
+import com.telefonica.baikal.utils.avro.NotInformedUtilsBridge;
 import org.apache.avro.LogicalType;
 import org.apache.avro.LogicalTypes;
 import org.apache.avro.Schema;
@@ -306,7 +306,7 @@ public Builder schemaFile(File schemaFile) throws IOException {
     public Builder schemaFile(File schemaFile, Boolean useNotInformedSchema) throws IOException {
       if (useNotInformedSchema) {
         String rawSchema = Files.readString(schemaFile.toPath());
-        String notInformedSchema = BaikalAvroUtils.createNotInformedSchema(rawSchema).toString();
+        String notInformedSchema = NotInformedUtilsBridge.getInstance().createNotInformedSchema(rawSchema).toString();
         System.out.println(notInformedSchema);
         topLevelSchema = parser.parse(notInformedSchema);
       } else {
@@ -326,7 +326,7 @@ public Builder schemaString(String schemaString) {
 
     public Builder schemaString(String schemaString, Boolean useNotInformedSchema) {
       if (useNotInformedSchema) {
-        String notInformedSchema = BaikalAvroUtils.createNotInformedSchema(schemaString).toString();
+        String notInformedSchema = NotInformedUtilsBridge.getInstance().createNotInformedSchema(schemaString).toString();
         System.out.println(notInformedSchema);
         topLevelSchema = parser.parse(notInformedSchema);
       } else {
diff --git a/app/src/main/java/io/confluent/avro/random/generator/Main.java b/app/src/main/java/io/confluent/avro/random/generator/Main.java
index a783b5e..640f4aa 100644
--- a/app/src/main/java/io/confluent/avro/random/generator/Main.java
+++ b/app/src/main/java/io/confluent/avro/random/generator/Main.java
@@ -324,19 +324,25 @@ private static Generator getGenerator(String schema, String schemaFile) throws I
         return getGenerator(schema, schemaFile, Optional.empty(), Optional.empty());
     }
 
-    private static Generator getGenerator(String schema, String schemaFile, Optional<Double> malformedNotInformedRate, Optional<Double> malformedColumnRate) throws IOException {
+    private static Generator getGenerator(String schema, String schemaFile, Optional<Double> notInformedColumnRate, Optional<Double> malformedColumnRate) throws IOException {
         if (schema != null) {
             return new Generator.Builder().schemaString(schema)
                     .malformedColumnRate(malformedColumnRate)
+                    .notInformedColumnRate(notInformedColumnRate)
                     .build();
         } else if (!schemaFile.equals("-")) {
             return new Generator.Builder()
-                    .schemaFile(new File(schemaFile), malformedNotInformedRate.isPresent())
+                    .schemaFile(new File(schemaFile), notInformedColumnRate.isPresent())
                     .malformedColumnRate(malformedColumnRate)
+                    .notInformedColumnRate(notInformedColumnRate)
                     .build();
         } else {
             System.err.println("Reading schema from stdin...");
-            return new Generator.Builder().schemaStream(System.in).malformedColumnRate(malformedColumnRate).build();
+            return new Generator.Builder()
+                    .schemaStream(System.in)
+                    .malformedColumnRate(malformedColumnRate)
+                    .notInformedColumnRate(notInformedColumnRate)
+                    .build();
         }
     }
 
diff --git a/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java b/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java
index 6149de7..d802a70 100644
--- a/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java
+++ b/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java
@@ -104,9 +104,9 @@ record = (GenericRecord) generator.generate();
       }
     }
 
-    assertEquals("Wrong string distribution", 0.45, ((double) stringResults.size()) / 100, 0.1);
-    assertEquals("Wrong int distribution", 0.45, ((double) intResults.size()) / 100, 0.1);
-    assertEquals("Wrong not informed distribution", 0.1, ((double) notInformedResults.size()) / 100, 0.1);
+    assertEquals("Wrong string distribution", 0.45, ((double) stringResults.size()) / 100, 0.2);
+    assertEquals("Wrong int distribution", 0.45, ((double) intResults.size()) / 100, 0.2);
+    assertEquals("Wrong not informed distribution", 0.1, ((double) notInformedResults.size()) / 100, 0.2);
     assertNotEquals("Empty not informed values", notInformedResults.size(), 0);
   }
 
diff --git a/lanuza/base.inc b/lanuza/base.inc
index 0b3a6c2..aab6a72 100644
--- a/lanuza/base.inc
+++ b/lanuza/base.inc
@@ -2,7 +2,7 @@
 
 #@IgnoreInspection BashAddShebang
 #
-# LANUZA v4.3.3
+# LANUZA v4.14.0
 #
 # This file provides:
 # - a default control flow
@@ -75,23 +75,37 @@ function fail {
 
 # gets the lanuza root dir
 # https://www.ostricher.com/2014/10/the-right-way-to-get-the-directory-of-a-bash-script/
-function get_root_dir () {
+function get_root_dir {
   local SOURCE DIR
 
-  SOURCE="${BASH_SOURCE[0]}"
-  # While $SOURCE is a symlink, resolve it
-  while [ -h "$SOURCE" ]; do
-    DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
-    SOURCE="$( readlink "$SOURCE" )"
-    # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory
-    [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
-  done
+  # absolute pathname of this script
+  SOURCE="$( _realpath "${BASH_SOURCE[0]}" )"
 
-  # once this file has
+  # print absolute pathname of parent directory
   DIR="$( cd -P "$( dirname "$SOURCE" )/../" && pwd )"
   echo "$DIR"
 }
 
+# gets the canonicalised absolute pathname of the argument, resolving symlinks
+# realpath(1) is part of GNU Coreutils, not always available as preinstalled command in other Unix variants
+function _realpath {
+  local NAME="$1"
+  local DIR
+
+  # While $NAME is a symlink, resolve it
+  while [[ -h "$NAME" ]]; do
+    DIR="$( cd -P "$( dirname "$NAME" )" && pwd )"
+    NAME="$( readlink "$NAME" )"
+    # If $NAME was a relative symlink (so no "/" as prefix), need to resolve it relative to the symlink base directory
+    [[ $NAME != /* ]] && NAME="$DIR/$NAME"
+  done
+
+  # Resolve relative paths in $NAME and print final absolute pathname
+  [[ -d "$NAME" ]] && NAME="$( cd -P "$NAME" && pwd )"
+  DIR="$( cd -P "$( dirname "$NAME" )" && pwd )"
+  echo "${DIR%/}/$( basename "${NAME%/}" )"
+}
+
 function parseArgs {
   local REQUIRED_ARG_NAMES=()
 
@@ -172,7 +186,7 @@ function parseArgs {
 
     # -- run
     for i in "$@"; do
-      parseArg $i
+      parseArg "$i"
       validateArg
       setVar "${ARG_NAME_UPPER}" "$ARG_VALUE"
     done
@@ -223,33 +237,35 @@ function isFunction {
 function gen_lanuza_id() {
   # Using /dev/urandom hangs the process on GitHub Actions, and we do not need such great entropy
   # based on https://gist.github.com/markusfisch/6110640
-	local N B C='89ab'
-
-	for (( N=0; N < 6; ++N )); do
-		B=$(( $RANDOM%256 ))
-
-		case $N in
-			1)
-				printf '4%x' $(( B%16 ))
-				;;
-			3 | 5)
-				printf '%c%x' ${C:$RANDOM%${#C}:1} $(( B%16 ))
-				;;
-			*)
-				printf '%02x' $B
-				;;
-		esac
-	done
-
-	echo
+  local N B C='89ab'
+
+  for (( N=0; N < 6; ++N )); do
+    B=$(( $RANDOM%256 ))
+
+    case $N in
+      1)
+        printf '4%x' $(( B%16 ))
+        ;;
+      3 | 5)
+        printf '%c%x' ${C:$RANDOM%${#C}:1} $(( B%16 ))
+        ;;
+      *)
+        printf '%02x' $B
+        ;;
+    esac
+  done
+
+  echo
 }
 
 function main {
   function cleanup__() {
+    local exit_code=$?
+
     mapfile -t cleanup_functions < <( declare -F | grep -E 'cleanup$' | tr -s ' ' | cut -d ' ' -f3 )
 
     for cleanup in "${cleanup_functions[@]}"; do
-      $cleanup || true
+      $cleanup $exit_code|| true
     done
   }
 
@@ -257,15 +273,29 @@ function main {
   set +x
 
   setVar "LANUZA_ROOT" "$(get_root_dir)"
+
+  # caller 1 # contains the caller script
+  local source=$(_realpath $(caller 1 | tr -s ' ' | cut -d ' ' -f3 ))
+  local rootless_source="${source#"${LANUZA_ROOT}/"}"
+  setVar "LANUZA_SOURCE" "$rootless_source"
+  setVar "LANUZA_SOURCE_FILE" "$(basename $source .sh)"
+
+  # a random number tied to this execution
+  setVar "LANUZA_BUILD_ID" "${LANUZA_BUILD_ID:-$(gen_lanuza_id)}"
+  setVar "LANUZA_UUID" "$(gen_lanuza_id)"
+
+  # Output directory to save lanuza generated files
+  setVar "LANUZA_OUTPUT_DIR" "${LANUZA_OUTPUT_DIR:-"output"}"
+
   # change directory to base directory of the project
   BASE_DIR=$(dirname $BASH_SOURCE)
   cd $BASE_DIR/..;
 
-  local current_dir=$(pwd)
+  local current_dir=$(_realpath $PWD)
   local project_name
   if [[ ${LANUZA_ROOT} == ${current_dir} ]]; then
-    # use the root dir as wht project name
-    project_name=$(basename "$(pwd)")
+    # use the root dir as project name
+    project_name=$(basename "${LANUZA_ROOT}")
   else
     # The relative path from root to project without /
     project_name=$(echo ${current_dir#"${LANUZA_ROOT}/"} | tr '/' _ )
@@ -273,18 +303,6 @@ function main {
 
   setVar "LANUZA_PROJECT" "${project_name}"
 
-  # caller 1 # contains the caller script
-  local source=$(caller 1 | tr -s ' ' | cut -d ' ' -f3 )
-  setVar "LANUZA_SOURCE" "$source"
-  setVar "LANUZA_SOURCE_FILE" "$(basename $source .sh)"
-
-  # a random number tied to this execution
-  setVar "LANUZA_BUILD_ID" "${LANUZA_BUILD_ID:-$(gen_lanuza_id)}"
-  setVar "LANUZA_UUID" "$(gen_lanuza_id)"
-
-  # Output directory to save lanuza generated files
-  setVar "LANUZA_OUTPUT_DIR" "${LANUZA_OUTPUT_DIR:-"output"}"
-
   local f
   # load other utilities
   for f in $(find ${LANUZA_ROOT}/lanuza/utils -name '*.inc'); do
diff --git a/lanuza/pipelines/build.sh b/lanuza/pipelines/build.sh
index b29adf4..ff340f2 100755
--- a/lanuza/pipelines/build.sh
+++ b/lanuza/pipelines/build.sh
@@ -11,8 +11,8 @@ function init() {
 }
 
 function run() {
-  docker-compose build app
-  docker-compose run app ./lanuza/scripts/build.sh
+  docker compose build app
+  docker compose run app ./lanuza/scripts/build.sh
 }
 
 source $(dirname $0)/../base.inc
diff --git a/lanuza/pipelines/run.sh b/lanuza/pipelines/run.sh
index ee878ed..28897e5 100755
--- a/lanuza/pipelines/run.sh
+++ b/lanuza/pipelines/run.sh
@@ -7,8 +7,8 @@ function init() {
 }
 
 function run() {
-  docker-compose build app 1>&2
-  docker-compose run app "$@"
+  docker compose build app 1>&2
+  docker compose run app "$@"
 }
 
 source $(dirname $0)/../base.inc
diff --git a/lanuza/utils/artifact.inc b/lanuza/utils/artifact.inc
index b2da143..2e39230 100644
--- a/lanuza/utils/artifact.inc
+++ b/lanuza/utils/artifact.inc
@@ -81,6 +81,20 @@ function artifact_exists_helm() {
   _artifact_check_with_retries exists "no such manifest"
 }
 
+# Checks if an artifact exists in an OCI registry
+# uses the docker cli to check the manifest
+function artifact_exists_oci() {
+  local image="${1}"
+
+  function exists() {
+    docker manifest inspect "${image}"
+  }
+
+  # Docker error on missing artifact is exit code == 1 and stderr:
+  # no such manifest: ${image}
+  _artifact_check_with_retries exists "no such manifest"
+}
+
 # Retries an exists command a with backoff.
 #
 # The retry count is given by LANUZA_ARTIFACT_RETRIES (default 5), the
diff --git a/lanuza/utils/docker-compose.inc b/lanuza/utils/docker-compose.inc
index 5004e8e..72b84cb 100644
--- a/lanuza/utils/docker-compose.inc
+++ b/lanuza/utils/docker-compose.inc
@@ -146,7 +146,8 @@ function _docker-compose_build() {
     debug "Using the following docker-compose file to specify labels"
     debug "$yaml"
     # Include new yaml in the docker-compose files
-    temp_compose=$(mktemp "${TMPDIR:-/tmp/}"docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX)
+    temp_compose=$(: ${TMPDIR:=/tmp}; mktemp "${TMPDIR%/}/docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX")
+
     # To cleanup at exit
     temp_files+=( $temp_compose )
     echo "$yaml" > $temp_compose
@@ -218,7 +219,8 @@ function _docker-compose_build() {
     debug "$yaml"
 
     # Include new yaml in the docker-compose files
-    temp_compose=$(mktemp "${TMPDIR:-/tmp/}"docker-compose.cache.$LANUZA_BUILD_ID.XXXXXXXXX)
+    temp_compose=$(: ${TMPDIR:=/tmp}; mktemp "${TMPDIR%/}/docker-compose.cache.$LANUZA_BUILD_ID.XXXXXXXXX")
+
     # To cleanup at exit
     temp_files+=( $temp_compose )
     echo "$yaml" > $temp_compose
diff --git a/lanuza/utils/docker.inc b/lanuza/utils/docker.inc
index b57807d..453b8ca 100644
--- a/lanuza/utils/docker.inc
+++ b/lanuza/utils/docker.inc
@@ -34,6 +34,9 @@ fi
 # lanuza builder name to use when building via docker compose build
 export LANUZA_BUILDX_DRIVER=${LANUZA_BUILDX_DRIVER:-lanuza}
 
+# lanuza builder image to use when building via docker compose build
+export LANUZA_BUILDX_DRIVER_IMAGE=${LANUZA_BUILDX_DRIVER_IMAGE:-moby/buildkit:v0.11.6}
+
 # flag to know when should we autocleanup docker compose
 LANUZA_DOCKER_COMPOSE__CLEANUP=0
 
@@ -138,13 +141,19 @@ function _docker_compose_build() {
       local branch
       branch=$(git_get_branch)
       if [[ ! -z ${CI+x} ]] && [[ " ${branches[*]} " =~ " ${branch} " ]]; then
-        build_cache_to[${service}]+="        - ${dest_image}:${branch}"$'\n'
+        local tag_to=$(_docker_compose_normalize_tag "$branch")
+        build_cache_to[${service}]+="        - ${dest_image}:${tag_to}"$'\n'
       fi
     done
   fi
 
   # Only build it there are buildable services
   if [[ ${#services[@]} -ne 0 ]]; then
+    # build automatically with VERSION
+    # TODO: VERSION should be 0.0.0-snapshot, but this is breaking change
+    set -- "build" "--build-arg" "VERSION=${VERSION:-snapshot}" "${@:2}"
+    set -- "build" "--build-arg" "DOCKER_UID=${DOCKER_UID}" "${@:2}"
+    set -- "build" "--build-arg" "DOCKER_GID=${DOCKER_GID}" "${@:2}"
 
     declare -A build_labels
     local date version revision
@@ -176,21 +185,20 @@ function _docker_compose_build() {
     done
 
     # Include new yaml in the docker-compose files
-    temp_compose=$(mktemp "${TMPDIR:-/tmp/}"docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX)
+    temp_compose=$(: ${TMPDIR:=/tmp}; mktemp "${TMPDIR%/}/docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX")
+
     # To cleanup at exit
     temp_files+=( $temp_compose )
-    COMPOSE_FILE=${COMPOSE_FILE:-docker-compose.yml}
-    export COMPOSE_FILE="${COMPOSE_FILE}${COMPOSE_PATH_SEPARATOR}${temp_compose}"
 
     debug "Using the following docker compose file to customize build"
     debug "$yaml"
     echo "$yaml" > $temp_compose
 
-    # build automatically with VERSION
-    # TODO: VERSION should be 0.0.0-snapshot, but this is breaking change
-    set -- "build" "--build-arg" "VERSION=${VERSION:-snapshot}" "${@:2}"
-    set -- "build" "--build-arg" "DOCKER_UID=${DOCKER_UID}" "${@:2}"
-    set -- "build" "--build-arg" "DOCKER_GID=${DOCKER_GID}" "${@:2}"
+    IFS="${COMPOSE_PATH_SEPARATOR}" read -r -a compose_files <<< "${COMPOSE_FILE:-docker-compose.yml}${COMPOSE_PATH_SEPARATOR}${temp_compose}"
+    # reverse order to keep the original COMPOSE_FILE order
+    for ((i=${#compose_files[@]}-1; i>=0; i--)); do
+      set -- "-f" "${compose_files[$i]}" "${@:1}"
+    done
     _docker_setup_builder
   fi
   _docker_compose "$@"
@@ -276,10 +284,14 @@ function _docker_setup_builder() {
   local use="docker buildx use ${LANUZA_BUILDX_DRIVER}"
   debug "${use}"
   if ! command ${use} &> /dev/null ; then
-    local create="docker buildx create --name ${LANUZA_BUILDX_DRIVER} --use --bootstrap"
+    local create="docker buildx create --name ${LANUZA_BUILDX_DRIVER} --use --bootstrap --driver-opt=image=${LANUZA_BUILDX_DRIVER_IMAGE}"
     debug "${create}"
     command ${create} &> /dev/null
   fi
+  # Booting builder before execution to avoid failures when building multiple services in docker compose.
+  local inspect="docker buildx inspect --bootstrap"
+  debug "${inspect}"
+  command ${inspect} &> /dev/null
 }
 
 # Checks for failed containers and writes logs for them the the provided dir
diff --git a/lanuza/utils/github.inc b/lanuza/utils/github.inc
index dbbff87..e2e8f3f 100644
--- a/lanuza/utils/github.inc
+++ b/lanuza/utils/github.inc
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-# Gets the filed modified by a pull request
+# Gets the files modified by a pull request
 function github_get_pr_files() {
   local pr=$1
   # TODO: no more than 3000 files can be identified using GitHub api
@@ -19,3 +19,12 @@ function github_get_pr_files() {
     }
   }' --jq '.data.repository.pullRequest.files.nodes.[].path'
 }
+
+# Gets the files modified between two refs (branches, tags, commits...)
+# Usage: 
+#   github_get_branches_files main feature1 
+function github_get_branches_files() {
+  local BASE=$1
+  local HEAD=$2
+  PAGER= gh.sh api --paginate "/repos/{owner}/{repo}/compare/${BASE}...${HEAD}" --jq '.files.[].filename'
+}
diff --git a/lanuza/utils/workspace.inc b/lanuza/utils/workspace.inc
index 7192d2d..280daff 100644
--- a/lanuza/utils/workspace.inc
+++ b/lanuza/utils/workspace.inc
@@ -17,20 +17,24 @@ function _workspace_init() {
   # compute only if an empty array
   if [[ "${#lanuza_ws_projects[@]}" == 0 ]]; then
     function get_workspaces() {
-      debug "Finding all workspaces..."
-
-      local workspace
-      while IFS= read -r -d '' workspace; do
-        local path name
-        path=$(dirname "${workspace}")
-        name=$(jq -r -e '.name' "${workspace}") || fail "Cannot get mandatory \"name\" property for ${workspace}"
-        debug "  //${name}: ${path}"
-        echo "${name} ${path}"
-      done < <(fs_list --null -u -g 'workspace.lanuza')
+      (
+        set -euo pipefail
+        cd "${LANUZA_ROOT}"
+        debug "Finding all workspaces..."
+
+        local workspace
+        fs_list --no-ignore-dot -g 'workspace.lanuza' | canonical.sh | sort -u | while read -r workspace; do
+          local path name
+          path=$(dirname "${workspace}")
+          name=$(jq -r -e '.name' "${workspace}") || fail "Cannot get mandatory \"name\" property for ${workspace}"
+          debug "  //${name}: ${path}"
+          echo "${name} ${path}"
+        done
+      )
     }
 
     local workspaces
-    workspaces=$(_workspace_cache_get "workspaces" "get_workspaces")
+    workspaces=$(_workspace_cache_get "workspaces" "get_workspaces") || fail "Cannot get workspaces"
 
     # load to our internal associative array
     local line
@@ -48,34 +52,40 @@ function _workspace_init() {
 function _workspace_init_tags() {
   if [[ "${#lanuza_ws_tags[@]}" == 0 ]]; then
     function get_tags() {
-      debug "Finding all tags..."
-
-      for name in "${!lanuza_ws_projects[@]}"; do
-        local path=${lanuza_ws_projects[${name}]}
-
-        local btag=""
-        if [[ -f "${path}/lanuza/pipelines/publish.sh" ]]; then
-          btag="lanuza:pipelines:publish"
-        fi
-
-        local ptag=""
-        if [[ -f "${path}/lanuza/pipelines/build.sh" ]]; then
-          ptag="lanuza:pipelines:build"
-        fi
-
-        tags=$(jq -r  \
-          --arg btag "${btag}" \
-          --arg ptag "${ptag}" \
-          '[ $btag, $ptag ] + try .tags | [ .[] |  select(length > 0) ] | sort | join(",")' \
-          "${path}/workspace.lanuza"
-        )
-        debug "  //${name}: ${tags}"
-        echo "${name} ${tags}"
-      done
+      (
+        set -euo pipefail
+        cd "${LANUZA_ROOT}"
+        debug "Finding all tags..."
+
+        for name in "${!lanuza_ws_projects[@]}"; do
+          local path=${lanuza_ws_projects[${name}]}
+
+          local btag=""
+          if [[ -f "${path}/lanuza/pipelines/publish.sh" ]]; then
+            btag="lanuza:pipelines:publish"
+          fi
+
+          local ptag=""
+          if [[ -f "${path}/lanuza/pipelines/build.sh" ]]; then
+            ptag="lanuza:pipelines:build"
+          fi
+
+          local tags
+          tags=$(jq -r  \
+            --arg btag "${btag}" \
+            --arg ptag "${ptag}" \
+            '[ $btag, $ptag ] + try .tags | [ .[] |  select(length > 0) ] | sort | join(",")' \
+            "${path}/workspace.lanuza"
+          ) || return 1
+
+          debug "  //${name}: ${tags}"
+          echo "${name} ${tags}"
+        done
+      )
     }
 
     local tags
-    tags=$(_workspace_cache_get "tags" "get_tags")
+    tags=$(_workspace_cache_get "tags" "get_tags") || fail "Cannot get tags"
 
     # load to our internal associative array
     local line
@@ -91,9 +101,10 @@ function _workspace_init_tags() {
 # print the names of all the available projects with a workspace
 function workspace_list() {
   local tagString="${1:-""}"
-  # # make the intersection for both ordered sets to get the modified files
+  # make the intersection for both ordered sets to get the modified files
   (
-    cd "$LANUZA_ROOT"
+    set -euo pipefail
+    cd "${LANUZA_ROOT}"
     _workspace_init
     if [[ "$tagString" != "" ]]; then
       _workspace_init_tags
@@ -123,7 +134,8 @@ function workspace_get_sha() {
   name=${1:-$(workspace_name)}
   mode=${2:-"standard"}
   (
-    cd "$LANUZA_ROOT"
+    set -euo pipefail
+    cd "${LANUZA_ROOT}"
 
     local sha
 
@@ -151,7 +163,8 @@ function workspace_path() {
 # Runs a command in all workspaces
 function workspace_run() {
   (
-    cd "$LANUZA_ROOT"
+    set -euo pipefail
+    cd "${LANUZA_ROOT}"
 
     local pipeline=$1
 
@@ -176,7 +189,8 @@ function workspace_exec() {
   local pipeline=$2
   local path
   (
-    cd "$LANUZA_ROOT"
+    set -euo pipefail
+    cd "${LANUZA_ROOT}"
 
     _workspace_init
     _workspace_assert_workspace_exists ${name}
@@ -191,9 +205,26 @@ function workspace_exec() {
   )
 }
 
+# echoes the VERSION env var declared for a publish pipeline in a workspace
+function workspace_version() {
+  local name path 
+  name=${1:-$(workspace_name)}
+  _workspace_init
+  _workspace_assert_workspace_exists ${name}
+  path=${lanuza_ws_projects[${name}]}
+  (
+    set -e
+    cd "${path}"
+    if [[ -f "./lanuza/pipelines/publish.sh" ]]; then
+      _workspace_load_publish_init
+    fi
+    echo "${VERSION:-""}"
+  )
+}
+
 # echoes the current namespace manifest with env from publish pipeline interpolated
 function workspace_manifest() {
-  local name manifest path local_path
+  local name manifest path
   name=${1:-$(workspace_name)}
   _workspace_init
   _workspace_assert_workspace_exists ${name}
@@ -201,36 +232,41 @@ function workspace_manifest() {
   (
     set -e
     cd "${path}"
-    manifest=$(cat "./workspace.lanuza")
-    # try to laod the publish pipeline to get its env vars
     if [[ -f "./lanuza/pipelines/publish.sh" ]]; then
-      debug "Loading \"${name}\" publish pipeline init function for manifest env var interpolation"
-      # remove any init function that may have been defined because this method
-      # is called from another lanuza script
-      unset -f init
-      # remove the lanuza base source to only get function definitions
-      # and load the pipeline in the subshell
-      source <(cat ./lanuza/pipelines/publish.sh | sed '/source $(dirname $0)\/..\/base.inc/d')
-      # call the init function to get all en vars in our shell session
-      if isFunction "init"; then
-        # list of env vars to clean to avoid side-effects when calling from
-        # componentes
-        # TODO: This list should be created on-demand by parsing init contents and see what
-        # variables are getting the default or maybe exposing a configuration env_var per pipeline
-        local clean=("VERSION" "TAG")
-        local env_var
-        for env_var in "${clean[@]}"; do
-          unset "${env_var}"
-        done
-        init
-      fi
+      _workspace_load_publish_init
     fi
     # load manifest and interpolate the env vars in the current shell
     # TODO: inherited values will be interpolated also
+    manifest=$(cat "./workspace.lanuza")
     echo $(envsubst <<< "${manifest}")
   )
 }
 
+
+# try to load the publish pipeline to get its env vars
+function _workspace_load_publish_init() {
+  debug "Loading \"${name}\" publish pipeline init function"
+  # remove any init function that may have been defined because this method
+  # is called from another lanuza script
+  unset -f init
+  # remove the lanuza base source to only get function definitions
+  # and load the pipeline in the subshell
+  source <(cat ./lanuza/pipelines/publish.sh | sed '/source $(dirname $0)\/..\/base.inc/d' | sed '/source "$(dirname $0)"\/..\/base.inc/d' | sed '/source "$(dirname "$0")"\/..\/base.inc/d')
+  # call the init function to get all en vars in our shell session
+  if isFunction "init"; then
+    # list of env vars to clean to avoid side-effects when calling from
+    # componentes
+    # TODO: This list should be created on-demand by parsing init contents and see what
+    # variables are getting the default or maybe exposing a configuration env_var per pipeline
+    local clean=("VERSION" "TAG")
+    local env_var
+    for env_var in "${clean[@]}"; do
+      unset "${env_var}"
+    done
+    init
+  fi
+}
+
 # echoes the current namespace tags
 function workspace_tags() {
   local name
@@ -286,7 +322,7 @@ function workspace_files() {
   mode=${2:-"standard"}
   circular=${3:-""} # simple recursive call stack
 
-  local path
+  local path list_path
   local stack="${circular} -> //${name}"
   # check if our manual simple call stack contains the current workspace
   # to detect circular dependencies
@@ -304,21 +340,37 @@ function workspace_files() {
 
       path=${lanuza_ws_projects[${name}]}
 
+      # if path == '.' we are in the root dir, and we need to get all files without telling
+      # rg that the path is ".", because it will prefix all results with ./
+      if [[ "${path}" == "." ]]; then
+        list_path=""
+      else
+        list_path="${path}"
+      fi
+
       debug " ${stack}"
       if [[ "${mode}" == "pr" ]]; then
-        # In "pr" mode, we get all workspace files. This way we can get all component files that maybe be ignored (test, etc... )
-        # and allow CI processes to run testing because it may exists test modified files that are ignored
-        fs_list -u ${path} | canonical.sh
+        # In "pr" mode, we get all workspace files not using the .ignore files in a canonical (resolving symlinks) format,
+        # to allow comparing with the modified files in the PR
+        # This way we can get all component files that maybe be ignored (test, etc... ) by users
+        # and allow CI processes to run testing because it may exists test modified files that are ignored to
+        # compute the SHA version in a published artifact, but must not be ignored to run tests
+        fs_list --no-ignore-dot ${list_path} | canonical.sh
         # switch to canonical mode to process deps, where we dont care about test modified files
         mode="canonical"
       elif [[ "${mode}" == "component" ]]; then
-        fs_list -u ${path}
+        # In "component" mode, we get all workspace files not using the .ignore files
+        # This way we can get all component files that maybe be ignored (test, etc... ) by users
+        # and compute things like the whole component sha (including test files)
+        fs_list --no-ignore-dot ${list_path}
         mode="standard"
       elif [[ "${mode}" == "canonical" ]]; then
-        fs_list ${path} | canonical.sh
+        # In "canonical" mode, we get all workspace files resolving symlinks
+        fs_list ${list_path} | canonical.sh
         mode="canonical"
       else
-        fs_list ${path}
+        # In "standard" mode, we get all workspace files honoring the .***ignore files
+        fs_list ${list_path}
         mode="standard"
       fi
 
@@ -342,9 +394,9 @@ function workspace_files() {
         # deactivate .ignore files, to allow referencing any file
         debug " ${stack} -> ${globs[@]}"
         if [[ "${mode}" == "canonical" ]]; then
-          fs_list -u "${globs[@]}" | canonical.sh || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps"
+          fs_list -u --hidden "${globs[@]}" | canonical.sh || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps"
         else
-          fs_list -u "${globs[@]}" || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps"
+          fs_list -u --hidden "${globs[@]}" || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps"
         fi
       fi
     ) | sort -u # return sorted and without dupes
@@ -383,7 +435,7 @@ function _workspace_cache_get() {
     mkdir -p "${LANUZA_WORKSPACE_CACHE_DIR}"
     # to avoid creating the file when the $getter fails
     local output
-    output=$($getter)
+    output=$($getter) || return 1
     echo "$output" > "${file}"
   fi
   cat "${file}"
diff --git a/lanuza/workspaces/artifacts.sh b/lanuza/workspaces/artifacts.sh
index 832295d..79f6a9a 100755
--- a/lanuza/workspaces/artifacts.sh
+++ b/lanuza/workspaces/artifacts.sh
@@ -6,8 +6,8 @@
 # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation
 
 ARG_DEFS=(
-  # comma separated list of workspaces
-  "[--workspace=(.+)]"
+  # comma separated list of workspaces or - to read from stdin
+  "[--workspace=(.+|-)]"
   # comma separated list of tags to filter workspaces (OR)
   "[--tags=(.+)]"
   "[--format=(text|json)]"
@@ -19,17 +19,19 @@ function init() {
 }
 
 function run() {
-  local workspaces
+  local workspaces 
 
   if [[ -z ${WORKSPACE+x} ]]; then
     # no WORKSPACE specified, use builtin API to filter by tags
     workspaces=$(workspace_list "${TAGS}")
   elif [[ "${TAGS}" == "" ]]; then
     # no TAGS. use the specified WORKSPACE
-    workspaces=$(echo ${WORKSPACE} | tr ',' '\n' | sort -u)
+    workspaces=$(read_workspaces)
   else
     # both TAGS and WORKSPACE specified. filter manually
-    workspaces=$(comm -12 <(workspace_list "${TAGS}") <(echo ${WORKSPACE} | tr ',' '\n' | sort -u ))
+    local input_workspaces
+    input_workspaces=$(read_workspaces)
+    workspaces=$(comm -12 <(workspace_list "${TAGS}") <(echo "${input_workspaces}" | sort -u ))
   fi
 
   local workspace
@@ -52,4 +54,14 @@ function for_workspace() {
         'try .artifacts[] + {"workspace": $WORKSPACE }'
 }
 
+function read_workspaces() {
+  local workspaces
+  if [[ "${WORKSPACE}" == "-" ]]; then
+    workspaces=$(cat)
+  else
+    workspaces=$(echo ${WORKSPACE} | tr ',' '\n' | sort -u)
+  fi
+  echo "${workspaces}"
+}
+
 source $(dirname $0)/../base.inc
diff --git a/lanuza/workspaces/list.sh b/lanuza/workspaces/list.sh
index 0f4816e..03074b7 100755
--- a/lanuza/workspaces/list.sh
+++ b/lanuza/workspaces/list.sh
@@ -1,4 +1,4 @@
-#!/usr/bin/env -S -u TAGS bash
+#!/usr/bin/env -S -u TAGS -u FORMAT bash
 
 # list all workspace names
 #
@@ -8,10 +8,20 @@
 ARG_DEFS=(
   # comma separated list of tags to filter workspaces (OR)
   "[--tags=(.+)]"
+  "[--format=(text|json)]"
 )
 
+function init() {
+  TAGS="${TAGS:-""}"
+  FORMAT="${FORMAT:-text}"
+}
+
 function run() {
-  workspace_list "${TAGS:-""}"
+  if [[ "${FORMAT}" == "json" ]]; then
+    workspace_list "${TAGS}" | jq -cnR '[inputs | select(length>0)]'
+  else
+    workspace_list "${TAGS}"
+  fi
 }
 
 source $(dirname $0)/../base.inc
diff --git a/lanuza/workspaces/modified.sh b/lanuza/workspaces/modified.sh
index a108b2d..838e96d 100755
--- a/lanuza/workspaces/modified.sh
+++ b/lanuza/workspaces/modified.sh
@@ -1,4 +1,4 @@
-#!/usr/bin/env -S -u PR -u TAGS bash
+#!/usr/bin/env -S -u PR -u TAGS -u FORMAT -u WS_MODE bash
 
 # list all workspaces modified in a pull request
 #
@@ -7,17 +7,43 @@
 # but keep GITHUB_TOKEN as it's a credential and not a parameter
 
 ARG_DEFS=(
-  "--pr=(.+)"
+  # GitHub PR number. Ex: 123
+  "[--pr=(.+)]"
+  # can be any valid git ref (branch, tag, commit SHA) pushed to GitHub. Ex: main
+  "[--base=(.+)]"
+  # can be any valid git ref (branch, tag, commit SHA) pushed to GitHub. Ex: feature1
+  "[--head=(.+)]"
   # comma separated list of tags to filter workspaces (OR)
   "[--tags=(.+)]"
   "--github-token=(.+)"
+  "[--format=(text|json)]"
+  "[--ws-mode=(canonical|pr|component|standard)]"
 )
 
+function init() {
+  WS_MODE=${WS_MODE:-"pr"}
+  FORMAT="${FORMAT:-text}"
+}
+
 function run() {
  # declare local and use later to allow bubbling up errors in bash.
-  local pr_files
-  echoerr "Getting files for PR ${PR}"
-  pr_files=$(github_get_pr_files "${PR}")
+  local remote_files
+
+  # check if PR is Defined
+  if [[ -z ${PR+x} ]]; then
+    # no PR specified, if no BASE and HEAD are specified, fail
+    if [[ -z ${BASE+x} || -z ${HEAD+x} ]]; then
+      fail "Either --pr or --base and --head must be provided"
+    fi
+    remote_files=$(github_get_branches_files "${BASE}" "${HEAD}")
+  else
+    # PR specified. use the specified PR
+    # if PR and BASE or HEAD are specified, fail
+    if [[ -n ${BASE+x} || -n ${HEAD+x} ]]; then
+      fail "Either --pr or --base and --head must be provided"
+    fi
+    remote_files=$(github_get_pr_files "${PR}")
+  fi
 
   # GitHub, when a submodule is updated, only returns the submodule folder modified
   # When a submodule is updated, get all the current modified folder files and add to the
@@ -37,15 +63,15 @@ function run() {
         local submodule_files=$(fs_list "${submodule}")
         # remove the submodule from the list.
         # Use # as sed separator as submodule can be a path with /, the default separator
-        pr_files=$(echo "${pr_files}" | sed "s#${submodule}##" )
+        remote_files=$(echo "${remote_files}" | sed "s#${submodule}##" )
         # add all the submodule files to the list
-        pr_files="${pr_files}"$'\n'"${submodule_files}"
+        remote_files="${remote_files}"$'\n'"${submodule_files}"
       fi
     done <<< "${submodules}"
-  done <<< "${pr_files}"
+  done <<< "${remote_files}"
 
   # remove possible empty lines and sort, to prepare for later comparison
-  pr_files=$(echo "${pr_files}" | sed '/^$/d' | sort)
+  remote_files=$(echo "${remote_files}" | sed '/^$/d' | sort)
 
   # get and echo later to bubble up errors up
   local workspaces
@@ -53,6 +79,7 @@ function run() {
 
   # keep in the main shell with process substitution to allow writting vars from main shell
   local workspace
+  local output=""
   while read workspace; do
     [[ "${workspace}" == "" ]] && continue
     local ws_files modified_files count
@@ -60,10 +87,10 @@ function run() {
     # GitHub does not know about symlinks, so we have to translate our
     # files to its canonical name to check if they have been modified (canonical)
     # and also get files without honoring .ignore files for that specific component
-    ws_files=$(workspace_files "${workspace}" "pr")
+    ws_files=$(workspace_files "${workspace}" "${WS_MODE}")
 
     # make the intersection for both ordered sets to get the modified files
-    modified_files=$(comm -12 <(echo "${ws_files}") <(echo "${pr_files}"))
+    modified_files=$(comm -12 <(echo "${ws_files}") <(echo "${remote_files}"))
 
     if [[ "${modified_files}" != "" ]]; then
       # echo "" | wc -l => returs 1, so
@@ -77,10 +104,17 @@ function run() {
     echoerr "Found ${count} modified files in workspace \"${workspace}\":" ${modified_files}
 
     if [[ ${count} != "0" ]]; then
-      echo "${workspace}"
+      output="${output}${workspace}"$'\n'
     fi
 
   done <<< "${workspaces}"
+
+  if [[ "${FORMAT}" == "json" ]]; then
+    # sort and remove empty lines and convert to json array
+    echo "${output}" | sort -u | sed '/^[[:space:]]*$/d' | jq -cnR '[inputs | select(length>0)]'
+  else
+    echo "${output}" | sort -u | sed '/^[[:space:]]*$/d'
+  fi
 }
 
 source $(dirname $0)/../base.inc
diff --git a/lanuza/workspaces/output-files.sh b/lanuza/workspaces/output-files.sh
index 3ea0694..194b6e5 100755
--- a/lanuza/workspaces/output-files.sh
+++ b/lanuza/workspaces/output-files.sh
@@ -6,8 +6,8 @@
 # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation
 
 ARG_DEFS=(
-  # workspaces
-  "[--workspace=(.+)]"
+  # comma separated list of workspaces or - to read from stdin
+  "[--workspace=(.+|-)]"
   # the artifact type files to return.
   "[--type=(output|.+)]"
   # compress the involved files and return the final filename
@@ -17,9 +17,11 @@ ARG_DEFS=(
 
 function run() {
   local artifacts workspaces files
+
   if [[ -z ${WORKSPACE+x} ]]; then
-    # no WORKSPACE specified
     workspaces=$(workspace_list)
+  elif [[ "${WORKSPACE}" == "-" ]]; then
+    workspaces=$(cat)
   else
     workspaces=$(echo ${WORKSPACE} | tr ',' '\n' | sort -u)
   fi
diff --git a/lanuza/workspaces/path.sh b/lanuza/workspaces/path.sh
new file mode 100755
index 0000000..6e3bedd
--- /dev/null
+++ b/lanuza/workspaces/path.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env -S -u WORKSPACE -u FORMAT bash
+
+# returns the path for a set of workspaces
+#
+# Uses the shebang line to unset env vars that may cause interference before base.inc sourcing
+# https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation
+
+ARG_DEFS=(
+  # comma separated list of workspaces or - to read from stdin
+  "[--workspace=(.+|-)]"
+  "[--format=(text|json)]"
+)
+
+function init() {
+  export FORMAT="${FORMAT:-text}"
+}
+
+function run() {
+  local workspaces 
+
+  if [[ -z ${WORKSPACE+x} ]]; then
+    workspaces=$(workspace_list)
+  elif [[ "${WORKSPACE}" == "-" ]]; then
+    workspaces=$(cat)
+  else
+    workspaces=$(echo ${WORKSPACE} | tr ',' '\n')
+  fi
+
+  local data=""
+  while read workspace; do
+    # add a newline between workspaces tags
+    [[ "${workspace}" != "" ]] && data="${data}{\"${workspace}\": \"$(workspace_path "${workspace}")\"}"$'\n'
+  done <<< "$workspaces"
+  
+  local output
+  output=$(echo "${data}" | jq -rcs add)
+  if [[ "${FORMAT}" == "json" ]]; then
+    echo "${output}"
+  else
+    echo "${output}" | jq -r 'to_entries[] | [.key, .value] | @tsv' | column -s$'\t' -t
+  fi
+}
+
+source $(dirname $0)/../base.inc
diff --git a/lanuza/workspaces/status.sh b/lanuza/workspaces/status.sh
index 154a9d2..f065dfe 100755
--- a/lanuza/workspaces/status.sh
+++ b/lanuza/workspaces/status.sh
@@ -7,7 +7,8 @@
 # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation
 
 ARG_DEFS=(
-  "[--workspace=(.+)]"
+  # comma separated list of workspaces or - to read from stdin
+  "[--workspace=(.+|-)]"
   # comma separated list of tags to filter workspaces (OR)
   "[--tags=(.+)]"
   "[--filter=(published|missing|unknown|failed)]"
diff --git a/lanuza/workspaces/tags.sh b/lanuza/workspaces/tags.sh
index 73b0f04..a988fae 100755
--- a/lanuza/workspaces/tags.sh
+++ b/lanuza/workspaces/tags.sh
@@ -1,4 +1,4 @@
-#!/usr/bin/env -S -u WORKSPACE bash
+#!/usr/bin/env -S -u WORKSPACE -u FORMAT bash
 
 # gets all tags for a set of workspaces
 #
@@ -6,32 +6,38 @@
 # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation
 
 ARG_DEFS=(
-  # comma separated list of workspaces
-  "[--workspace=(.+)]"
+  # comma separated list of workspaces or - to read from stdin
+  "[--workspace=(.+|-)]"
+  "[--format=(text|json)]"
 )
 
+function init() {
+  FORMAT="${FORMAT:-text}"
+}
+
 function run() {
   local workspaces
 
   if [[ -z ${WORKSPACE+x} ]]; then
     workspaces=$(workspace_list)
+  elif [[ "${WORKSPACE}" == "-" ]]; then
+    workspaces=$(cat)
   else
     workspaces=$(echo ${WORKSPACE} | tr ',' '\n')
   fi
 
-  (
-    set -e
-    local workspace
-    while read workspace; do
-      if [[ "${workspace}" != "" ]]; then
-        local tags
-        tags=$(workspace_tags "${workspace}")
-        if [[ "${tags}" != "" ]]; then
-          echo "${tags}"
-        fi
-      fi
-    done <<< "$workspaces"
-  ) | sort -u
+  local output=""
+  while read workspace; do
+    # add a newline between workspaces tags
+    [[ "${workspace}" != "" ]] && output="${output}$(workspace_tags "${workspace}")"$'\n'
+  done <<< "$workspaces"
+
+  if [[ "${FORMAT}" == "json" ]]; then
+    # sort and remove empty lines and convert to json array
+    echo "${output}" | sort -u | sed '/^[[:space:]]*$/d' | jq -cnR '[inputs | select(length>0)]'
+  else
+    echo "${output}" | sort -u | sed '/^[[:space:]]*$/d'
+  fi
 }
 
 source $(dirname $0)/../base.inc
diff --git a/lanuza/workspaces/version.sh b/lanuza/workspaces/version.sh
new file mode 100755
index 0000000..d94298b
--- /dev/null
+++ b/lanuza/workspaces/version.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env -S -u WORKSPACE -u FORMAT bash
+
+# returns VERSION for a set of workspaces
+#
+# Uses the shebang line to unset env vars that may cause interference before base.inc sourcing
+# https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation
+
+ARG_DEFS=(
+  # comma separated list of workspaces or - to read from stdin
+  "[--workspace=(.+|-)]"
+  "[--format=(text|json)]"
+)
+
+function init() {
+  export FORMAT="${FORMAT:-text}"
+}
+
+function run() {
+  local workspaces 
+
+  if [[ -z ${WORKSPACE+x} ]]; then
+    workspaces=$(workspace_list)
+  elif [[ "${WORKSPACE}" == "-" ]]; then
+    workspaces=$(cat)
+  else
+    workspaces=$(echo ${WORKSPACE} | tr ',' '\n')
+  fi
+
+  local data=""
+  while read workspace; do
+    # add a newline between workspaces tags
+    [[ "${workspace}" != "" ]] && data="${data}{\"${workspace}\": \"$(workspace_version "${workspace}")\"}"$'\n'
+  done <<< "$workspaces"
+  
+  local output
+  output=$(echo "${data}" | jq -rcs 'add | with_entries( select( .value != "" ) )')
+  if [[ "${FORMAT}" == "json" ]]; then
+    echo "${output}"
+  else
+    echo "${output}" | jq -r 'to_entries[] | [.key, .value] | @tsv' | column -s$'\t' -t
+  fi
+}
+
+source $(dirname $0)/../base.inc
diff --git a/samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc b/samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc
new file mode 100644
index 0000000..7250817
--- /dev/null
+++ b/samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc
@@ -0,0 +1,351 @@
+{
+  "namespace": "com.plainAVRO",
+  "name": "Contact_Center_CDR",
+  "type": "record",
+  "doc": "Data records that contain information about each interaction (such as calls or interactions/contacts through other channels/apps) received by a Contact Center",
+  "x-fp-version": "5.9.0",
+  "fields": [
+    {
+      "name": "OPERATOR_ID",
+      "aliases": [
+        "operator_id"
+      ],
+      "type": "string",
+      "doc": "Global Operator Identifier"
+    },
+    {
+      "name": "INTERACTION_TM",
+      "aliases": [
+        "interaction_tm"
+      ],
+      "type": {
+        "type": "string",
+        "logicalType": "datetime"
+      },
+      "doc": "Year, month, day and time of the interaction (contact action)"
+    },
+    {
+      "name": "CUSTOMER_ID",
+      "aliases": [
+        "customer_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Customer ID (if known when the call occurs).\nAs much as possible, each customer will maintain the same identifier within any business (fixed, mobile, cloud, IoT, ...), which will allow to build a complete vision of the customer. If a client has a prepaid number and a postpaid and/or hybrid number, the client ID should be unique (common for both lines)."
+    },
+    {
+      "name": "SUBSCRIBER_ID",
+      "aliases": [
+        "subscriber_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Unique identifier of the line associated to the call (in the case of lines already registered), as it comes in the origin system"
+    },
+    {
+      "name": "INTERACTION_ID",
+      "aliases": [
+        "interaction_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "ID/Ref number of the \"event\"/contact action (as it is registered in call center systems). Typically used for later tracking/monitoring of status"
+    },
+    {
+      "name": "ACTIVATION_DT",
+      "aliases": [
+        "activation_dt"
+      ],
+      "type": [
+        "null",
+        {
+          "type": "string",
+          "logicalType": "iso-date"
+        }
+      ],
+      "doc": "(Fixed or mobile) Line activation date"
+    },
+    {
+      "name": "ADMINISTRA_ID",
+      "aliases": [
+        "administra_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Unique identifier of the Broadband or TV circuit code"
+    },
+    {
+      "name": "SOURCE_SYSTEM_ID",
+      "aliases": [
+        "source_system_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "System or origin/type of the event, as registered in the call center platform (eg: Tkt_Remedy, Novum, etc.)"
+    },
+    {
+      "name": "AGENT_ID",
+      "aliases": [
+        "agent_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Id of the \"agent\" handling the call (or contact action)"
+    },
+    {
+      "name": "AGENT_GROUP_ID",
+      "aliases": [
+        "agent_group_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Id of the \"agent group\" (to which the agent handling the call belongs)"
+    },
+    {
+      "name": "EXT_USER_ID",
+      "aliases": [
+        "ext_user_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Id of the \"user\" who makes the \"call\"/contact in the external/origin system (in the case of clients from Novum, TV platform, etc. which may have internal IDs of clients). It can be also the \"username\" or e-mail"
+    },
+    {
+      "name": "CALLER_PHONE_ID",
+      "aliases": [
+        "caller_phone_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Phone number (in the case of contact via phone call), without international prefix, used in the contact action. It may not coincide with the phone number associated to the line or SUBSCRIBER_ID/CUSTOMER_ID"
+    },
+    {
+      "name": "IMEI_ID",
+      "aliases": [
+        "imei_id"
+      ],
+      "type": [
+        "null",
+        {
+          "type": "string",
+          "logicalType": "imei"
+        }
+      ],
+      "doc": "If the user interaction is a call from a mobile phone, the IMEI of the device used to make the call.\nInternational Mobile Equipment Identity\nIMPORTANT: IMEI must be normalized, which means:\n* Invalid imeis must be removed (ie. Imeis including letters)\n* Text format\n* Complete to 15 digits by adding zeros at the left\n* Remove control digit (position #15)"
+    },
+    {
+      "name": "DEST_PHONE_ID",
+      "aliases": [
+        "dest_phone_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "In the case of a Contact Center call, the telephone number that receives the call (the number associated with the call center)"
+    },
+    {
+      "name": "DEST_BASE_PHONE_ID",
+      "aliases": [
+        "dest_base_phone_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "In the case of a Contact Center call, the \"base telephone number\" that receives the call (the \"base number\" associated with DEST_PHONE_ID)."
+    },
+    {
+      "name": "LINE_PHONE_ID",
+      "aliases": [
+        "line_phone_id"
+      ],
+      "type": "string",
+      "doc": "Phone number to which the call/interaction is associated (may be different to the phone number used by the caller). It is the phone number associated to the line or CUSTOMER_ID/SUBSCRIBER_ID.\nIn the case of mobile lines, it will be the MSISDN without international prefix"
+    },
+    {
+      "name": "DURATION_QT",
+      "aliases": [
+        "duration_qt"
+      ],
+      "type": [
+        "null",
+        {
+          "type": "string",
+          "logicalType": "duration"
+        }
+      ],
+      "doc": "Duration of the interaction (ISO-8601 value that contains a complete duration representation). In the case of a Contact Center calls, the duration of the call. In other cases, the session duration, etc."
+    },
+    {
+      "name": "CONTACT_IND",
+      "aliases": [
+        "contact_ind"
+      ],
+      "type": [
+        "null",
+        "boolean"
+      ],
+      "doc": "In the case of a Contact Center call, indicates whether the call was or not answered. I.e.: If it lasts less than X seconds, it is considered that the call was not answered, if lasts more or equal than X seconds, it is considered that the customer spoke with someone."
+    },
+    {
+      "name": "HOLD_TIME_QT",
+      "aliases": [
+        "hold_time_qt"
+      ],
+      "type": [
+        "null",
+        {
+          "type": "string",
+          "logicalType": "duration"
+        }
+      ],
+      "doc": "Hold time of the interaction (ISO-8601 value that contains a complete duration representation). In the case of call center calls, hold time during the call."
+    },
+    {
+      "name": "INTERACTION_TYPE_ID",
+      "aliases": [
+        "interaction_type_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Type/topic of the call (i.e: a complaint about billing, purchasing a bundle, etc)"
+    },
+    {
+      "name": "TROUBLE_TICKET_ID",
+      "aliases": [
+        "trouble_ticket_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Identifier of the trouble ticket associated with the interaction, when necessary."
+    },
+    {
+      "name": "REASON_ID",
+      "aliases": [
+        "reason_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "Code of the reason of the interaction (in the case of contact center calls, there is a hierarchy)"
+    },
+    {
+      "name": "CSI_QT",
+      "aliases": [
+        "csi_qt"
+      ],
+      "type": [
+        "null",
+        "float"
+      ],
+      "doc": "The result of the customer satisfaction survey where applicable"
+    },
+    {
+      "name": "CHANNEL_ID",
+      "aliases": [
+        "channel_id"
+      ],
+      "type": [
+        "null",
+        "string"
+      ],
+      "doc": "In the case of multi-channel contact center, identifies the channel used by the user to interact with the Company"
+    },
+    {
+      "name": "BILLABLE_CALL_IND",
+      "aliases": [
+        "billable_call_ind"
+      ],
+      "type": [
+        "null",
+        {
+          "name": "BILLABLE_CALL_IND_ENUM",
+          "type": "enum",
+          "symbols": [
+            "Free",
+            "Billable"
+          ]
+        }
+      ],
+      "doc": "Indicates if if the call is billable or free (if known)"
+    },
+    {
+      "name": "USER_4P_ID",
+      "aliases": [
+        "user_4p_id"
+      ],
+      "type": [
+        "null",
+        {
+          "type": "string",
+          "x-fp-user-id": true
+        }
+      ],
+      "doc": "Identifier of the user in 4th Platform (as returned by the OB in the 4th Platform APIs)"
+    },
+    {
+      "name": "CALLER_PHONE_WITH_PREFIX_ID",
+      "aliases": [
+        "caller_phone_with_prefix_id"
+      ],
+      "type": [
+        "null",
+        {
+          "type": "string",
+          "logicalType": "phone-number"
+        }
+      ],
+      "doc": "Phone number (in the case of contact via phone call), WITH_INTERNATIONAL_PREFIX, used in the contact action. It may not coincide with the phone number associated to the line or SUBSCRIBER_ID/CUSTOMER_ID"
+    },
+    {
+      "name": "LINE_PHONE_WITH_PREFIX_ID",
+      "aliases": [
+        "line_phone_with_prefix_id"
+      ],
+      "type": [
+        "null",
+        {
+          "type": "string",
+          "logicalType": "phone-number",
+          "x-fp-identifier": "phone-number"
+        }
+      ],
+      "doc": "Phone number for which the call/interaction is associated (may be different to phone used by the caller). It is the phone number associated to the line or CUSTOMER_ID/SUBSCRIBER_ID.\n       In the case of mobile lines, it will be the MSISDN WITH international prefix."
+    },
+    {
+      "name": "DAY_DT",
+      "aliases": [
+        "day_dt"
+      ],
+      "type": {
+        "type": "string",
+        "logicalType": "iso-date"
+      },
+      "doc": "Year, month and day of the interaction"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json b/samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json
new file mode 100644
index 0000000..c22845d
--- /dev/null
+++ b/samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json
@@ -0,0 +1,25 @@
+{
+  "namespace": "com.plainAVRO",
+  "name": "Contact_Center_CDR",
+  "type": "record",
+  "doc": "Data records that contain information about each interaction (such as calls or interactions/contacts through other channels/apps) received by a Contact Center",
+  "x-fp-version": "5.9.0",
+  "fields": [
+    {
+      "name": "DAY_DT",
+      "aliases": [
+        "day_dt"
+      ],
+      "type": {
+        "type": "string",
+        "logicalType": "iso-date",
+        "arg.properties": {
+          "range": {
+            "start": "${DATE_RANGE_START}",
+            "end": "${DATE_RANGE_END}"
+          }
+        }
+      }
+    }
+  ]
+}
\ No newline at end of file