From e54cad8f48e0c884e5fa9bd88c7f7126d17a59d5 Mon Sep 17 00:00:00 2001 From: Carlos del Prado Mota Date: Tue, 16 Apr 2024 17:41:51 +0200 Subject: [PATCH] fix: sdk deps & nor-informed generator (#37) * fix: sdk deps & nor-informed generator * fix: generator constructor --- app/build.gradle | 1 + .../utils/avro/NotInformedUtilsBridge.java | 15 + .../avro/random/generator/Generator.java | 6 +- .../confluent/avro/random/generator/Main.java | 12 +- .../CustomExtensionsGeneratorTest.java | 6 +- lanuza/base.inc | 112 +++--- lanuza/pipelines/build.sh | 4 +- lanuza/pipelines/run.sh | 4 +- lanuza/utils/artifact.inc | 14 + lanuza/utils/docker-compose.inc | 6 +- lanuza/utils/docker.inc | 32 +- lanuza/utils/github.inc | 11 +- lanuza/utils/workspace.inc | 200 ++++++---- lanuza/workspaces/artifacts.sh | 22 +- lanuza/workspaces/list.sh | 14 +- lanuza/workspaces/modified.sh | 58 ++- lanuza/workspaces/output-files.sh | 8 +- lanuza/workspaces/path.sh | 44 +++ lanuza/workspaces/status.sh | 3 +- lanuza/workspaces/tags.sh | 38 +- lanuza/workspaces/version.sh | 44 +++ .../v5.9.0/Contact_Center_CDR_5.9.0.avsc | 351 ++++++++++++++++++ .../Contact_Center_CDR/v5.9.0/extensions.json | 25 ++ 23 files changed, 844 insertions(+), 186 deletions(-) create mode 100644 app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java create mode 100755 lanuza/workspaces/path.sh create mode 100755 lanuza/workspaces/version.sh create mode 100644 samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc create mode 100644 samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json diff --git a/app/build.gradle b/app/build.gradle index 08e3b20..3a27996 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -38,6 +38,7 @@ dependencies { implementation group: 'com.googlecode.libphonenumber', name: 'libphonenumber', version: '8.12.25-4p' implementation group: 'com.github.javafaker', name: 'javafaker', version: '1.0.2' implementation group: 'com.googlecode.java-ipv6', name: 'java-ipv6', version: '0.17' + implementation group: 'com.google.code.gson', name: 'gson', version: '2.10.1' testImplementation group: 'org.apache.spark', name: 'spark-sql_2.12', version: '3.3.2' testImplementation group: 'commons-validator', name: 'commons-validator', version: '1.6' diff --git a/app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java b/app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java new file mode 100644 index 0000000..10fa5a4 --- /dev/null +++ b/app/src/main/java/com/telefonica/baikal/utils/avro/NotInformedUtilsBridge.java @@ -0,0 +1,15 @@ +package com.telefonica.baikal.utils.avro; + +public class NotInformedUtilsBridge implements NotInformedUtils { + + private static NotInformedUtilsBridge instance = null; + + public static synchronized NotInformedUtilsBridge getInstance() { + if (instance == null) { + instance = new NotInformedUtilsBridge(); + return instance; + } + return instance; + } + +} diff --git a/app/src/main/java/io/confluent/avro/random/generator/Generator.java b/app/src/main/java/io/confluent/avro/random/generator/Generator.java index 0e37ab3..71df76f 100644 --- a/app/src/main/java/io/confluent/avro/random/generator/Generator.java +++ b/app/src/main/java/io/confluent/avro/random/generator/Generator.java @@ -18,7 +18,7 @@ import com.mifmif.common.regex.Generex; import com.telefonica.baikal.avro.types.CustomLogicalTypes; -import com.telefonica.baikal.utils.avro.BaikalAvroUtils; +import com.telefonica.baikal.utils.avro.NotInformedUtilsBridge; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; @@ -306,7 +306,7 @@ public Builder schemaFile(File schemaFile) throws IOException { public Builder schemaFile(File schemaFile, Boolean useNotInformedSchema) throws IOException { if (useNotInformedSchema) { String rawSchema = Files.readString(schemaFile.toPath()); - String notInformedSchema = BaikalAvroUtils.createNotInformedSchema(rawSchema).toString(); + String notInformedSchema = NotInformedUtilsBridge.getInstance().createNotInformedSchema(rawSchema).toString(); System.out.println(notInformedSchema); topLevelSchema = parser.parse(notInformedSchema); } else { @@ -326,7 +326,7 @@ public Builder schemaString(String schemaString) { public Builder schemaString(String schemaString, Boolean useNotInformedSchema) { if (useNotInformedSchema) { - String notInformedSchema = BaikalAvroUtils.createNotInformedSchema(schemaString).toString(); + String notInformedSchema = NotInformedUtilsBridge.getInstance().createNotInformedSchema(schemaString).toString(); System.out.println(notInformedSchema); topLevelSchema = parser.parse(notInformedSchema); } else { diff --git a/app/src/main/java/io/confluent/avro/random/generator/Main.java b/app/src/main/java/io/confluent/avro/random/generator/Main.java index a783b5e..640f4aa 100644 --- a/app/src/main/java/io/confluent/avro/random/generator/Main.java +++ b/app/src/main/java/io/confluent/avro/random/generator/Main.java @@ -324,19 +324,25 @@ private static Generator getGenerator(String schema, String schemaFile) throws I return getGenerator(schema, schemaFile, Optional.empty(), Optional.empty()); } - private static Generator getGenerator(String schema, String schemaFile, Optional malformedNotInformedRate, Optional malformedColumnRate) throws IOException { + private static Generator getGenerator(String schema, String schemaFile, Optional notInformedColumnRate, Optional malformedColumnRate) throws IOException { if (schema != null) { return new Generator.Builder().schemaString(schema) .malformedColumnRate(malformedColumnRate) + .notInformedColumnRate(notInformedColumnRate) .build(); } else if (!schemaFile.equals("-")) { return new Generator.Builder() - .schemaFile(new File(schemaFile), malformedNotInformedRate.isPresent()) + .schemaFile(new File(schemaFile), notInformedColumnRate.isPresent()) .malformedColumnRate(malformedColumnRate) + .notInformedColumnRate(notInformedColumnRate) .build(); } else { System.err.println("Reading schema from stdin..."); - return new Generator.Builder().schemaStream(System.in).malformedColumnRate(malformedColumnRate).build(); + return new Generator.Builder() + .schemaStream(System.in) + .malformedColumnRate(malformedColumnRate) + .notInformedColumnRate(notInformedColumnRate) + .build(); } } diff --git a/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java b/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java index 6149de7..d802a70 100644 --- a/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java +++ b/app/src/test/java/io/confluent/avro/random/generator/CustomExtensionsGeneratorTest.java @@ -104,9 +104,9 @@ record = (GenericRecord) generator.generate(); } } - assertEquals("Wrong string distribution", 0.45, ((double) stringResults.size()) / 100, 0.1); - assertEquals("Wrong int distribution", 0.45, ((double) intResults.size()) / 100, 0.1); - assertEquals("Wrong not informed distribution", 0.1, ((double) notInformedResults.size()) / 100, 0.1); + assertEquals("Wrong string distribution", 0.45, ((double) stringResults.size()) / 100, 0.2); + assertEquals("Wrong int distribution", 0.45, ((double) intResults.size()) / 100, 0.2); + assertEquals("Wrong not informed distribution", 0.1, ((double) notInformedResults.size()) / 100, 0.2); assertNotEquals("Empty not informed values", notInformedResults.size(), 0); } diff --git a/lanuza/base.inc b/lanuza/base.inc index 0b3a6c2..aab6a72 100644 --- a/lanuza/base.inc +++ b/lanuza/base.inc @@ -2,7 +2,7 @@ #@IgnoreInspection BashAddShebang # -# LANUZA v4.3.3 +# LANUZA v4.14.0 # # This file provides: # - a default control flow @@ -75,23 +75,37 @@ function fail { # gets the lanuza root dir # https://www.ostricher.com/2014/10/the-right-way-to-get-the-directory-of-a-bash-script/ -function get_root_dir () { +function get_root_dir { local SOURCE DIR - SOURCE="${BASH_SOURCE[0]}" - # While $SOURCE is a symlink, resolve it - while [ -h "$SOURCE" ]; do - DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" - SOURCE="$( readlink "$SOURCE" )" - # If $SOURCE was a relative symlink (so no "/" as prefix, need to resolve it relative to the symlink base directory - [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" - done + # absolute pathname of this script + SOURCE="$( _realpath "${BASH_SOURCE[0]}" )" - # once this file has + # print absolute pathname of parent directory DIR="$( cd -P "$( dirname "$SOURCE" )/../" && pwd )" echo "$DIR" } +# gets the canonicalised absolute pathname of the argument, resolving symlinks +# realpath(1) is part of GNU Coreutils, not always available as preinstalled command in other Unix variants +function _realpath { + local NAME="$1" + local DIR + + # While $NAME is a symlink, resolve it + while [[ -h "$NAME" ]]; do + DIR="$( cd -P "$( dirname "$NAME" )" && pwd )" + NAME="$( readlink "$NAME" )" + # If $NAME was a relative symlink (so no "/" as prefix), need to resolve it relative to the symlink base directory + [[ $NAME != /* ]] && NAME="$DIR/$NAME" + done + + # Resolve relative paths in $NAME and print final absolute pathname + [[ -d "$NAME" ]] && NAME="$( cd -P "$NAME" && pwd )" + DIR="$( cd -P "$( dirname "$NAME" )" && pwd )" + echo "${DIR%/}/$( basename "${NAME%/}" )" +} + function parseArgs { local REQUIRED_ARG_NAMES=() @@ -172,7 +186,7 @@ function parseArgs { # -- run for i in "$@"; do - parseArg $i + parseArg "$i" validateArg setVar "${ARG_NAME_UPPER}" "$ARG_VALUE" done @@ -223,33 +237,35 @@ function isFunction { function gen_lanuza_id() { # Using /dev/urandom hangs the process on GitHub Actions, and we do not need such great entropy # based on https://gist.github.com/markusfisch/6110640 - local N B C='89ab' - - for (( N=0; N < 6; ++N )); do - B=$(( $RANDOM%256 )) - - case $N in - 1) - printf '4%x' $(( B%16 )) - ;; - 3 | 5) - printf '%c%x' ${C:$RANDOM%${#C}:1} $(( B%16 )) - ;; - *) - printf '%02x' $B - ;; - esac - done - - echo + local N B C='89ab' + + for (( N=0; N < 6; ++N )); do + B=$(( $RANDOM%256 )) + + case $N in + 1) + printf '4%x' $(( B%16 )) + ;; + 3 | 5) + printf '%c%x' ${C:$RANDOM%${#C}:1} $(( B%16 )) + ;; + *) + printf '%02x' $B + ;; + esac + done + + echo } function main { function cleanup__() { + local exit_code=$? + mapfile -t cleanup_functions < <( declare -F | grep -E 'cleanup$' | tr -s ' ' | cut -d ' ' -f3 ) for cleanup in "${cleanup_functions[@]}"; do - $cleanup || true + $cleanup $exit_code|| true done } @@ -257,15 +273,29 @@ function main { set +x setVar "LANUZA_ROOT" "$(get_root_dir)" + + # caller 1 # contains the caller script + local source=$(_realpath $(caller 1 | tr -s ' ' | cut -d ' ' -f3 )) + local rootless_source="${source#"${LANUZA_ROOT}/"}" + setVar "LANUZA_SOURCE" "$rootless_source" + setVar "LANUZA_SOURCE_FILE" "$(basename $source .sh)" + + # a random number tied to this execution + setVar "LANUZA_BUILD_ID" "${LANUZA_BUILD_ID:-$(gen_lanuza_id)}" + setVar "LANUZA_UUID" "$(gen_lanuza_id)" + + # Output directory to save lanuza generated files + setVar "LANUZA_OUTPUT_DIR" "${LANUZA_OUTPUT_DIR:-"output"}" + # change directory to base directory of the project BASE_DIR=$(dirname $BASH_SOURCE) cd $BASE_DIR/..; - local current_dir=$(pwd) + local current_dir=$(_realpath $PWD) local project_name if [[ ${LANUZA_ROOT} == ${current_dir} ]]; then - # use the root dir as wht project name - project_name=$(basename "$(pwd)") + # use the root dir as project name + project_name=$(basename "${LANUZA_ROOT}") else # The relative path from root to project without / project_name=$(echo ${current_dir#"${LANUZA_ROOT}/"} | tr '/' _ ) @@ -273,18 +303,6 @@ function main { setVar "LANUZA_PROJECT" "${project_name}" - # caller 1 # contains the caller script - local source=$(caller 1 | tr -s ' ' | cut -d ' ' -f3 ) - setVar "LANUZA_SOURCE" "$source" - setVar "LANUZA_SOURCE_FILE" "$(basename $source .sh)" - - # a random number tied to this execution - setVar "LANUZA_BUILD_ID" "${LANUZA_BUILD_ID:-$(gen_lanuza_id)}" - setVar "LANUZA_UUID" "$(gen_lanuza_id)" - - # Output directory to save lanuza generated files - setVar "LANUZA_OUTPUT_DIR" "${LANUZA_OUTPUT_DIR:-"output"}" - local f # load other utilities for f in $(find ${LANUZA_ROOT}/lanuza/utils -name '*.inc'); do diff --git a/lanuza/pipelines/build.sh b/lanuza/pipelines/build.sh index b29adf4..ff340f2 100755 --- a/lanuza/pipelines/build.sh +++ b/lanuza/pipelines/build.sh @@ -11,8 +11,8 @@ function init() { } function run() { - docker-compose build app - docker-compose run app ./lanuza/scripts/build.sh + docker compose build app + docker compose run app ./lanuza/scripts/build.sh } source $(dirname $0)/../base.inc diff --git a/lanuza/pipelines/run.sh b/lanuza/pipelines/run.sh index ee878ed..28897e5 100755 --- a/lanuza/pipelines/run.sh +++ b/lanuza/pipelines/run.sh @@ -7,8 +7,8 @@ function init() { } function run() { - docker-compose build app 1>&2 - docker-compose run app "$@" + docker compose build app 1>&2 + docker compose run app "$@" } source $(dirname $0)/../base.inc diff --git a/lanuza/utils/artifact.inc b/lanuza/utils/artifact.inc index b2da143..2e39230 100644 --- a/lanuza/utils/artifact.inc +++ b/lanuza/utils/artifact.inc @@ -81,6 +81,20 @@ function artifact_exists_helm() { _artifact_check_with_retries exists "no such manifest" } +# Checks if an artifact exists in an OCI registry +# uses the docker cli to check the manifest +function artifact_exists_oci() { + local image="${1}" + + function exists() { + docker manifest inspect "${image}" + } + + # Docker error on missing artifact is exit code == 1 and stderr: + # no such manifest: ${image} + _artifact_check_with_retries exists "no such manifest" +} + # Retries an exists command a with backoff. # # The retry count is given by LANUZA_ARTIFACT_RETRIES (default 5), the diff --git a/lanuza/utils/docker-compose.inc b/lanuza/utils/docker-compose.inc index 5004e8e..72b84cb 100644 --- a/lanuza/utils/docker-compose.inc +++ b/lanuza/utils/docker-compose.inc @@ -146,7 +146,8 @@ function _docker-compose_build() { debug "Using the following docker-compose file to specify labels" debug "$yaml" # Include new yaml in the docker-compose files - temp_compose=$(mktemp "${TMPDIR:-/tmp/}"docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX) + temp_compose=$(: ${TMPDIR:=/tmp}; mktemp "${TMPDIR%/}/docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX") + # To cleanup at exit temp_files+=( $temp_compose ) echo "$yaml" > $temp_compose @@ -218,7 +219,8 @@ function _docker-compose_build() { debug "$yaml" # Include new yaml in the docker-compose files - temp_compose=$(mktemp "${TMPDIR:-/tmp/}"docker-compose.cache.$LANUZA_BUILD_ID.XXXXXXXXX) + temp_compose=$(: ${TMPDIR:=/tmp}; mktemp "${TMPDIR%/}/docker-compose.cache.$LANUZA_BUILD_ID.XXXXXXXXX") + # To cleanup at exit temp_files+=( $temp_compose ) echo "$yaml" > $temp_compose diff --git a/lanuza/utils/docker.inc b/lanuza/utils/docker.inc index b57807d..453b8ca 100644 --- a/lanuza/utils/docker.inc +++ b/lanuza/utils/docker.inc @@ -34,6 +34,9 @@ fi # lanuza builder name to use when building via docker compose build export LANUZA_BUILDX_DRIVER=${LANUZA_BUILDX_DRIVER:-lanuza} +# lanuza builder image to use when building via docker compose build +export LANUZA_BUILDX_DRIVER_IMAGE=${LANUZA_BUILDX_DRIVER_IMAGE:-moby/buildkit:v0.11.6} + # flag to know when should we autocleanup docker compose LANUZA_DOCKER_COMPOSE__CLEANUP=0 @@ -138,13 +141,19 @@ function _docker_compose_build() { local branch branch=$(git_get_branch) if [[ ! -z ${CI+x} ]] && [[ " ${branches[*]} " =~ " ${branch} " ]]; then - build_cache_to[${service}]+=" - ${dest_image}:${branch}"$'\n' + local tag_to=$(_docker_compose_normalize_tag "$branch") + build_cache_to[${service}]+=" - ${dest_image}:${tag_to}"$'\n' fi done fi # Only build it there are buildable services if [[ ${#services[@]} -ne 0 ]]; then + # build automatically with VERSION + # TODO: VERSION should be 0.0.0-snapshot, but this is breaking change + set -- "build" "--build-arg" "VERSION=${VERSION:-snapshot}" "${@:2}" + set -- "build" "--build-arg" "DOCKER_UID=${DOCKER_UID}" "${@:2}" + set -- "build" "--build-arg" "DOCKER_GID=${DOCKER_GID}" "${@:2}" declare -A build_labels local date version revision @@ -176,21 +185,20 @@ function _docker_compose_build() { done # Include new yaml in the docker-compose files - temp_compose=$(mktemp "${TMPDIR:-/tmp/}"docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX) + temp_compose=$(: ${TMPDIR:=/tmp}; mktemp "${TMPDIR%/}/docker-compose.labels.$LANUZA_BUILD_ID.XXXXXXXXX") + # To cleanup at exit temp_files+=( $temp_compose ) - COMPOSE_FILE=${COMPOSE_FILE:-docker-compose.yml} - export COMPOSE_FILE="${COMPOSE_FILE}${COMPOSE_PATH_SEPARATOR}${temp_compose}" debug "Using the following docker compose file to customize build" debug "$yaml" echo "$yaml" > $temp_compose - # build automatically with VERSION - # TODO: VERSION should be 0.0.0-snapshot, but this is breaking change - set -- "build" "--build-arg" "VERSION=${VERSION:-snapshot}" "${@:2}" - set -- "build" "--build-arg" "DOCKER_UID=${DOCKER_UID}" "${@:2}" - set -- "build" "--build-arg" "DOCKER_GID=${DOCKER_GID}" "${@:2}" + IFS="${COMPOSE_PATH_SEPARATOR}" read -r -a compose_files <<< "${COMPOSE_FILE:-docker-compose.yml}${COMPOSE_PATH_SEPARATOR}${temp_compose}" + # reverse order to keep the original COMPOSE_FILE order + for ((i=${#compose_files[@]}-1; i>=0; i--)); do + set -- "-f" "${compose_files[$i]}" "${@:1}" + done _docker_setup_builder fi _docker_compose "$@" @@ -276,10 +284,14 @@ function _docker_setup_builder() { local use="docker buildx use ${LANUZA_BUILDX_DRIVER}" debug "${use}" if ! command ${use} &> /dev/null ; then - local create="docker buildx create --name ${LANUZA_BUILDX_DRIVER} --use --bootstrap" + local create="docker buildx create --name ${LANUZA_BUILDX_DRIVER} --use --bootstrap --driver-opt=image=${LANUZA_BUILDX_DRIVER_IMAGE}" debug "${create}" command ${create} &> /dev/null fi + # Booting builder before execution to avoid failures when building multiple services in docker compose. + local inspect="docker buildx inspect --bootstrap" + debug "${inspect}" + command ${inspect} &> /dev/null } # Checks for failed containers and writes logs for them the the provided dir diff --git a/lanuza/utils/github.inc b/lanuza/utils/github.inc index dbbff87..e2e8f3f 100644 --- a/lanuza/utils/github.inc +++ b/lanuza/utils/github.inc @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Gets the filed modified by a pull request +# Gets the files modified by a pull request function github_get_pr_files() { local pr=$1 # TODO: no more than 3000 files can be identified using GitHub api @@ -19,3 +19,12 @@ function github_get_pr_files() { } }' --jq '.data.repository.pullRequest.files.nodes.[].path' } + +# Gets the files modified between two refs (branches, tags, commits...) +# Usage: +# github_get_branches_files main feature1 +function github_get_branches_files() { + local BASE=$1 + local HEAD=$2 + PAGER= gh.sh api --paginate "/repos/{owner}/{repo}/compare/${BASE}...${HEAD}" --jq '.files.[].filename' +} diff --git a/lanuza/utils/workspace.inc b/lanuza/utils/workspace.inc index 7192d2d..280daff 100644 --- a/lanuza/utils/workspace.inc +++ b/lanuza/utils/workspace.inc @@ -17,20 +17,24 @@ function _workspace_init() { # compute only if an empty array if [[ "${#lanuza_ws_projects[@]}" == 0 ]]; then function get_workspaces() { - debug "Finding all workspaces..." - - local workspace - while IFS= read -r -d '' workspace; do - local path name - path=$(dirname "${workspace}") - name=$(jq -r -e '.name' "${workspace}") || fail "Cannot get mandatory \"name\" property for ${workspace}" - debug " //${name}: ${path}" - echo "${name} ${path}" - done < <(fs_list --null -u -g 'workspace.lanuza') + ( + set -euo pipefail + cd "${LANUZA_ROOT}" + debug "Finding all workspaces..." + + local workspace + fs_list --no-ignore-dot -g 'workspace.lanuza' | canonical.sh | sort -u | while read -r workspace; do + local path name + path=$(dirname "${workspace}") + name=$(jq -r -e '.name' "${workspace}") || fail "Cannot get mandatory \"name\" property for ${workspace}" + debug " //${name}: ${path}" + echo "${name} ${path}" + done + ) } local workspaces - workspaces=$(_workspace_cache_get "workspaces" "get_workspaces") + workspaces=$(_workspace_cache_get "workspaces" "get_workspaces") || fail "Cannot get workspaces" # load to our internal associative array local line @@ -48,34 +52,40 @@ function _workspace_init() { function _workspace_init_tags() { if [[ "${#lanuza_ws_tags[@]}" == 0 ]]; then function get_tags() { - debug "Finding all tags..." - - for name in "${!lanuza_ws_projects[@]}"; do - local path=${lanuza_ws_projects[${name}]} - - local btag="" - if [[ -f "${path}/lanuza/pipelines/publish.sh" ]]; then - btag="lanuza:pipelines:publish" - fi - - local ptag="" - if [[ -f "${path}/lanuza/pipelines/build.sh" ]]; then - ptag="lanuza:pipelines:build" - fi - - tags=$(jq -r \ - --arg btag "${btag}" \ - --arg ptag "${ptag}" \ - '[ $btag, $ptag ] + try .tags | [ .[] | select(length > 0) ] | sort | join(",")' \ - "${path}/workspace.lanuza" - ) - debug " //${name}: ${tags}" - echo "${name} ${tags}" - done + ( + set -euo pipefail + cd "${LANUZA_ROOT}" + debug "Finding all tags..." + + for name in "${!lanuza_ws_projects[@]}"; do + local path=${lanuza_ws_projects[${name}]} + + local btag="" + if [[ -f "${path}/lanuza/pipelines/publish.sh" ]]; then + btag="lanuza:pipelines:publish" + fi + + local ptag="" + if [[ -f "${path}/lanuza/pipelines/build.sh" ]]; then + ptag="lanuza:pipelines:build" + fi + + local tags + tags=$(jq -r \ + --arg btag "${btag}" \ + --arg ptag "${ptag}" \ + '[ $btag, $ptag ] + try .tags | [ .[] | select(length > 0) ] | sort | join(",")' \ + "${path}/workspace.lanuza" + ) || return 1 + + debug " //${name}: ${tags}" + echo "${name} ${tags}" + done + ) } local tags - tags=$(_workspace_cache_get "tags" "get_tags") + tags=$(_workspace_cache_get "tags" "get_tags") || fail "Cannot get tags" # load to our internal associative array local line @@ -91,9 +101,10 @@ function _workspace_init_tags() { # print the names of all the available projects with a workspace function workspace_list() { local tagString="${1:-""}" - # # make the intersection for both ordered sets to get the modified files + # make the intersection for both ordered sets to get the modified files ( - cd "$LANUZA_ROOT" + set -euo pipefail + cd "${LANUZA_ROOT}" _workspace_init if [[ "$tagString" != "" ]]; then _workspace_init_tags @@ -123,7 +134,8 @@ function workspace_get_sha() { name=${1:-$(workspace_name)} mode=${2:-"standard"} ( - cd "$LANUZA_ROOT" + set -euo pipefail + cd "${LANUZA_ROOT}" local sha @@ -151,7 +163,8 @@ function workspace_path() { # Runs a command in all workspaces function workspace_run() { ( - cd "$LANUZA_ROOT" + set -euo pipefail + cd "${LANUZA_ROOT}" local pipeline=$1 @@ -176,7 +189,8 @@ function workspace_exec() { local pipeline=$2 local path ( - cd "$LANUZA_ROOT" + set -euo pipefail + cd "${LANUZA_ROOT}" _workspace_init _workspace_assert_workspace_exists ${name} @@ -191,9 +205,26 @@ function workspace_exec() { ) } +# echoes the VERSION env var declared for a publish pipeline in a workspace +function workspace_version() { + local name path + name=${1:-$(workspace_name)} + _workspace_init + _workspace_assert_workspace_exists ${name} + path=${lanuza_ws_projects[${name}]} + ( + set -e + cd "${path}" + if [[ -f "./lanuza/pipelines/publish.sh" ]]; then + _workspace_load_publish_init + fi + echo "${VERSION:-""}" + ) +} + # echoes the current namespace manifest with env from publish pipeline interpolated function workspace_manifest() { - local name manifest path local_path + local name manifest path name=${1:-$(workspace_name)} _workspace_init _workspace_assert_workspace_exists ${name} @@ -201,36 +232,41 @@ function workspace_manifest() { ( set -e cd "${path}" - manifest=$(cat "./workspace.lanuza") - # try to laod the publish pipeline to get its env vars if [[ -f "./lanuza/pipelines/publish.sh" ]]; then - debug "Loading \"${name}\" publish pipeline init function for manifest env var interpolation" - # remove any init function that may have been defined because this method - # is called from another lanuza script - unset -f init - # remove the lanuza base source to only get function definitions - # and load the pipeline in the subshell - source <(cat ./lanuza/pipelines/publish.sh | sed '/source $(dirname $0)\/..\/base.inc/d') - # call the init function to get all en vars in our shell session - if isFunction "init"; then - # list of env vars to clean to avoid side-effects when calling from - # componentes - # TODO: This list should be created on-demand by parsing init contents and see what - # variables are getting the default or maybe exposing a configuration env_var per pipeline - local clean=("VERSION" "TAG") - local env_var - for env_var in "${clean[@]}"; do - unset "${env_var}" - done - init - fi + _workspace_load_publish_init fi # load manifest and interpolate the env vars in the current shell # TODO: inherited values will be interpolated also + manifest=$(cat "./workspace.lanuza") echo $(envsubst <<< "${manifest}") ) } + +# try to load the publish pipeline to get its env vars +function _workspace_load_publish_init() { + debug "Loading \"${name}\" publish pipeline init function" + # remove any init function that may have been defined because this method + # is called from another lanuza script + unset -f init + # remove the lanuza base source to only get function definitions + # and load the pipeline in the subshell + source <(cat ./lanuza/pipelines/publish.sh | sed '/source $(dirname $0)\/..\/base.inc/d' | sed '/source "$(dirname $0)"\/..\/base.inc/d' | sed '/source "$(dirname "$0")"\/..\/base.inc/d') + # call the init function to get all en vars in our shell session + if isFunction "init"; then + # list of env vars to clean to avoid side-effects when calling from + # componentes + # TODO: This list should be created on-demand by parsing init contents and see what + # variables are getting the default or maybe exposing a configuration env_var per pipeline + local clean=("VERSION" "TAG") + local env_var + for env_var in "${clean[@]}"; do + unset "${env_var}" + done + init + fi +} + # echoes the current namespace tags function workspace_tags() { local name @@ -286,7 +322,7 @@ function workspace_files() { mode=${2:-"standard"} circular=${3:-""} # simple recursive call stack - local path + local path list_path local stack="${circular} -> //${name}" # check if our manual simple call stack contains the current workspace # to detect circular dependencies @@ -304,21 +340,37 @@ function workspace_files() { path=${lanuza_ws_projects[${name}]} + # if path == '.' we are in the root dir, and we need to get all files without telling + # rg that the path is ".", because it will prefix all results with ./ + if [[ "${path}" == "." ]]; then + list_path="" + else + list_path="${path}" + fi + debug " ${stack}" if [[ "${mode}" == "pr" ]]; then - # In "pr" mode, we get all workspace files. This way we can get all component files that maybe be ignored (test, etc... ) - # and allow CI processes to run testing because it may exists test modified files that are ignored - fs_list -u ${path} | canonical.sh + # In "pr" mode, we get all workspace files not using the .ignore files in a canonical (resolving symlinks) format, + # to allow comparing with the modified files in the PR + # This way we can get all component files that maybe be ignored (test, etc... ) by users + # and allow CI processes to run testing because it may exists test modified files that are ignored to + # compute the SHA version in a published artifact, but must not be ignored to run tests + fs_list --no-ignore-dot ${list_path} | canonical.sh # switch to canonical mode to process deps, where we dont care about test modified files mode="canonical" elif [[ "${mode}" == "component" ]]; then - fs_list -u ${path} + # In "component" mode, we get all workspace files not using the .ignore files + # This way we can get all component files that maybe be ignored (test, etc... ) by users + # and compute things like the whole component sha (including test files) + fs_list --no-ignore-dot ${list_path} mode="standard" elif [[ "${mode}" == "canonical" ]]; then - fs_list ${path} | canonical.sh + # In "canonical" mode, we get all workspace files resolving symlinks + fs_list ${list_path} | canonical.sh mode="canonical" else - fs_list ${path} + # In "standard" mode, we get all workspace files honoring the .***ignore files + fs_list ${list_path} mode="standard" fi @@ -342,9 +394,9 @@ function workspace_files() { # deactivate .ignore files, to allow referencing any file debug " ${stack} -> ${globs[@]}" if [[ "${mode}" == "canonical" ]]; then - fs_list -u "${globs[@]}" | canonical.sh || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps" + fs_list -u --hidden "${globs[@]}" | canonical.sh || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps" else - fs_list -u "${globs[@]}" || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps" + fs_list -u --hidden "${globs[@]}" || fail "Invalid glob \"${globs[@]}\" in \"${name}\" workspace deps" fi fi ) | sort -u # return sorted and without dupes @@ -383,7 +435,7 @@ function _workspace_cache_get() { mkdir -p "${LANUZA_WORKSPACE_CACHE_DIR}" # to avoid creating the file when the $getter fails local output - output=$($getter) + output=$($getter) || return 1 echo "$output" > "${file}" fi cat "${file}" diff --git a/lanuza/workspaces/artifacts.sh b/lanuza/workspaces/artifacts.sh index 832295d..79f6a9a 100755 --- a/lanuza/workspaces/artifacts.sh +++ b/lanuza/workspaces/artifacts.sh @@ -6,8 +6,8 @@ # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation ARG_DEFS=( - # comma separated list of workspaces - "[--workspace=(.+)]" + # comma separated list of workspaces or - to read from stdin + "[--workspace=(.+|-)]" # comma separated list of tags to filter workspaces (OR) "[--tags=(.+)]" "[--format=(text|json)]" @@ -19,17 +19,19 @@ function init() { } function run() { - local workspaces + local workspaces if [[ -z ${WORKSPACE+x} ]]; then # no WORKSPACE specified, use builtin API to filter by tags workspaces=$(workspace_list "${TAGS}") elif [[ "${TAGS}" == "" ]]; then # no TAGS. use the specified WORKSPACE - workspaces=$(echo ${WORKSPACE} | tr ',' '\n' | sort -u) + workspaces=$(read_workspaces) else # both TAGS and WORKSPACE specified. filter manually - workspaces=$(comm -12 <(workspace_list "${TAGS}") <(echo ${WORKSPACE} | tr ',' '\n' | sort -u )) + local input_workspaces + input_workspaces=$(read_workspaces) + workspaces=$(comm -12 <(workspace_list "${TAGS}") <(echo "${input_workspaces}" | sort -u )) fi local workspace @@ -52,4 +54,14 @@ function for_workspace() { 'try .artifacts[] + {"workspace": $WORKSPACE }' } +function read_workspaces() { + local workspaces + if [[ "${WORKSPACE}" == "-" ]]; then + workspaces=$(cat) + else + workspaces=$(echo ${WORKSPACE} | tr ',' '\n' | sort -u) + fi + echo "${workspaces}" +} + source $(dirname $0)/../base.inc diff --git a/lanuza/workspaces/list.sh b/lanuza/workspaces/list.sh index 0f4816e..03074b7 100755 --- a/lanuza/workspaces/list.sh +++ b/lanuza/workspaces/list.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env -S -u TAGS bash +#!/usr/bin/env -S -u TAGS -u FORMAT bash # list all workspace names # @@ -8,10 +8,20 @@ ARG_DEFS=( # comma separated list of tags to filter workspaces (OR) "[--tags=(.+)]" + "[--format=(text|json)]" ) +function init() { + TAGS="${TAGS:-""}" + FORMAT="${FORMAT:-text}" +} + function run() { - workspace_list "${TAGS:-""}" + if [[ "${FORMAT}" == "json" ]]; then + workspace_list "${TAGS}" | jq -cnR '[inputs | select(length>0)]' + else + workspace_list "${TAGS}" + fi } source $(dirname $0)/../base.inc diff --git a/lanuza/workspaces/modified.sh b/lanuza/workspaces/modified.sh index a108b2d..838e96d 100755 --- a/lanuza/workspaces/modified.sh +++ b/lanuza/workspaces/modified.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env -S -u PR -u TAGS bash +#!/usr/bin/env -S -u PR -u TAGS -u FORMAT -u WS_MODE bash # list all workspaces modified in a pull request # @@ -7,17 +7,43 @@ # but keep GITHUB_TOKEN as it's a credential and not a parameter ARG_DEFS=( - "--pr=(.+)" + # GitHub PR number. Ex: 123 + "[--pr=(.+)]" + # can be any valid git ref (branch, tag, commit SHA) pushed to GitHub. Ex: main + "[--base=(.+)]" + # can be any valid git ref (branch, tag, commit SHA) pushed to GitHub. Ex: feature1 + "[--head=(.+)]" # comma separated list of tags to filter workspaces (OR) "[--tags=(.+)]" "--github-token=(.+)" + "[--format=(text|json)]" + "[--ws-mode=(canonical|pr|component|standard)]" ) +function init() { + WS_MODE=${WS_MODE:-"pr"} + FORMAT="${FORMAT:-text}" +} + function run() { # declare local and use later to allow bubbling up errors in bash. - local pr_files - echoerr "Getting files for PR ${PR}" - pr_files=$(github_get_pr_files "${PR}") + local remote_files + + # check if PR is Defined + if [[ -z ${PR+x} ]]; then + # no PR specified, if no BASE and HEAD are specified, fail + if [[ -z ${BASE+x} || -z ${HEAD+x} ]]; then + fail "Either --pr or --base and --head must be provided" + fi + remote_files=$(github_get_branches_files "${BASE}" "${HEAD}") + else + # PR specified. use the specified PR + # if PR and BASE or HEAD are specified, fail + if [[ -n ${BASE+x} || -n ${HEAD+x} ]]; then + fail "Either --pr or --base and --head must be provided" + fi + remote_files=$(github_get_pr_files "${PR}") + fi # GitHub, when a submodule is updated, only returns the submodule folder modified # When a submodule is updated, get all the current modified folder files and add to the @@ -37,15 +63,15 @@ function run() { local submodule_files=$(fs_list "${submodule}") # remove the submodule from the list. # Use # as sed separator as submodule can be a path with /, the default separator - pr_files=$(echo "${pr_files}" | sed "s#${submodule}##" ) + remote_files=$(echo "${remote_files}" | sed "s#${submodule}##" ) # add all the submodule files to the list - pr_files="${pr_files}"$'\n'"${submodule_files}" + remote_files="${remote_files}"$'\n'"${submodule_files}" fi done <<< "${submodules}" - done <<< "${pr_files}" + done <<< "${remote_files}" # remove possible empty lines and sort, to prepare for later comparison - pr_files=$(echo "${pr_files}" | sed '/^$/d' | sort) + remote_files=$(echo "${remote_files}" | sed '/^$/d' | sort) # get and echo later to bubble up errors up local workspaces @@ -53,6 +79,7 @@ function run() { # keep in the main shell with process substitution to allow writting vars from main shell local workspace + local output="" while read workspace; do [[ "${workspace}" == "" ]] && continue local ws_files modified_files count @@ -60,10 +87,10 @@ function run() { # GitHub does not know about symlinks, so we have to translate our # files to its canonical name to check if they have been modified (canonical) # and also get files without honoring .ignore files for that specific component - ws_files=$(workspace_files "${workspace}" "pr") + ws_files=$(workspace_files "${workspace}" "${WS_MODE}") # make the intersection for both ordered sets to get the modified files - modified_files=$(comm -12 <(echo "${ws_files}") <(echo "${pr_files}")) + modified_files=$(comm -12 <(echo "${ws_files}") <(echo "${remote_files}")) if [[ "${modified_files}" != "" ]]; then # echo "" | wc -l => returs 1, so @@ -77,10 +104,17 @@ function run() { echoerr "Found ${count} modified files in workspace \"${workspace}\":" ${modified_files} if [[ ${count} != "0" ]]; then - echo "${workspace}" + output="${output}${workspace}"$'\n' fi done <<< "${workspaces}" + + if [[ "${FORMAT}" == "json" ]]; then + # sort and remove empty lines and convert to json array + echo "${output}" | sort -u | sed '/^[[:space:]]*$/d' | jq -cnR '[inputs | select(length>0)]' + else + echo "${output}" | sort -u | sed '/^[[:space:]]*$/d' + fi } source $(dirname $0)/../base.inc diff --git a/lanuza/workspaces/output-files.sh b/lanuza/workspaces/output-files.sh index 3ea0694..194b6e5 100755 --- a/lanuza/workspaces/output-files.sh +++ b/lanuza/workspaces/output-files.sh @@ -6,8 +6,8 @@ # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation ARG_DEFS=( - # workspaces - "[--workspace=(.+)]" + # comma separated list of workspaces or - to read from stdin + "[--workspace=(.+|-)]" # the artifact type files to return. "[--type=(output|.+)]" # compress the involved files and return the final filename @@ -17,9 +17,11 @@ ARG_DEFS=( function run() { local artifacts workspaces files + if [[ -z ${WORKSPACE+x} ]]; then - # no WORKSPACE specified workspaces=$(workspace_list) + elif [[ "${WORKSPACE}" == "-" ]]; then + workspaces=$(cat) else workspaces=$(echo ${WORKSPACE} | tr ',' '\n' | sort -u) fi diff --git a/lanuza/workspaces/path.sh b/lanuza/workspaces/path.sh new file mode 100755 index 0000000..6e3bedd --- /dev/null +++ b/lanuza/workspaces/path.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env -S -u WORKSPACE -u FORMAT bash + +# returns the path for a set of workspaces +# +# Uses the shebang line to unset env vars that may cause interference before base.inc sourcing +# https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation + +ARG_DEFS=( + # comma separated list of workspaces or - to read from stdin + "[--workspace=(.+|-)]" + "[--format=(text|json)]" +) + +function init() { + export FORMAT="${FORMAT:-text}" +} + +function run() { + local workspaces + + if [[ -z ${WORKSPACE+x} ]]; then + workspaces=$(workspace_list) + elif [[ "${WORKSPACE}" == "-" ]]; then + workspaces=$(cat) + else + workspaces=$(echo ${WORKSPACE} | tr ',' '\n') + fi + + local data="" + while read workspace; do + # add a newline between workspaces tags + [[ "${workspace}" != "" ]] && data="${data}{\"${workspace}\": \"$(workspace_path "${workspace}")\"}"$'\n' + done <<< "$workspaces" + + local output + output=$(echo "${data}" | jq -rcs add) + if [[ "${FORMAT}" == "json" ]]; then + echo "${output}" + else + echo "${output}" | jq -r 'to_entries[] | [.key, .value] | @tsv' | column -s$'\t' -t + fi +} + +source $(dirname $0)/../base.inc diff --git a/lanuza/workspaces/status.sh b/lanuza/workspaces/status.sh index 154a9d2..f065dfe 100755 --- a/lanuza/workspaces/status.sh +++ b/lanuza/workspaces/status.sh @@ -7,7 +7,8 @@ # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation ARG_DEFS=( - "[--workspace=(.+)]" + # comma separated list of workspaces or - to read from stdin + "[--workspace=(.+|-)]" # comma separated list of tags to filter workspaces (OR) "[--tags=(.+)]" "[--filter=(published|missing|unknown|failed)]" diff --git a/lanuza/workspaces/tags.sh b/lanuza/workspaces/tags.sh index 73b0f04..a988fae 100755 --- a/lanuza/workspaces/tags.sh +++ b/lanuza/workspaces/tags.sh @@ -1,4 +1,4 @@ -#!/usr/bin/env -S -u WORKSPACE bash +#!/usr/bin/env -S -u WORKSPACE -u FORMAT bash # gets all tags for a set of workspaces # @@ -6,32 +6,38 @@ # https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation ARG_DEFS=( - # comma separated list of workspaces - "[--workspace=(.+)]" + # comma separated list of workspaces or - to read from stdin + "[--workspace=(.+|-)]" + "[--format=(text|json)]" ) +function init() { + FORMAT="${FORMAT:-text}" +} + function run() { local workspaces if [[ -z ${WORKSPACE+x} ]]; then workspaces=$(workspace_list) + elif [[ "${WORKSPACE}" == "-" ]]; then + workspaces=$(cat) else workspaces=$(echo ${WORKSPACE} | tr ',' '\n') fi - ( - set -e - local workspace - while read workspace; do - if [[ "${workspace}" != "" ]]; then - local tags - tags=$(workspace_tags "${workspace}") - if [[ "${tags}" != "" ]]; then - echo "${tags}" - fi - fi - done <<< "$workspaces" - ) | sort -u + local output="" + while read workspace; do + # add a newline between workspaces tags + [[ "${workspace}" != "" ]] && output="${output}$(workspace_tags "${workspace}")"$'\n' + done <<< "$workspaces" + + if [[ "${FORMAT}" == "json" ]]; then + # sort and remove empty lines and convert to json array + echo "${output}" | sort -u | sed '/^[[:space:]]*$/d' | jq -cnR '[inputs | select(length>0)]' + else + echo "${output}" | sort -u | sed '/^[[:space:]]*$/d' + fi } source $(dirname $0)/../base.inc diff --git a/lanuza/workspaces/version.sh b/lanuza/workspaces/version.sh new file mode 100755 index 0000000..d94298b --- /dev/null +++ b/lanuza/workspaces/version.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env -S -u WORKSPACE -u FORMAT bash + +# returns VERSION for a set of workspaces +# +# Uses the shebang line to unset env vars that may cause interference before base.inc sourcing +# https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#env-invocation + +ARG_DEFS=( + # comma separated list of workspaces or - to read from stdin + "[--workspace=(.+|-)]" + "[--format=(text|json)]" +) + +function init() { + export FORMAT="${FORMAT:-text}" +} + +function run() { + local workspaces + + if [[ -z ${WORKSPACE+x} ]]; then + workspaces=$(workspace_list) + elif [[ "${WORKSPACE}" == "-" ]]; then + workspaces=$(cat) + else + workspaces=$(echo ${WORKSPACE} | tr ',' '\n') + fi + + local data="" + while read workspace; do + # add a newline between workspaces tags + [[ "${workspace}" != "" ]] && data="${data}{\"${workspace}\": \"$(workspace_version "${workspace}")\"}"$'\n' + done <<< "$workspaces" + + local output + output=$(echo "${data}" | jq -rcs 'add | with_entries( select( .value != "" ) )') + if [[ "${FORMAT}" == "json" ]]; then + echo "${output}" + else + echo "${output}" | jq -r 'to_entries[] | [.key, .value] | @tsv' | column -s$'\t' -t + fi +} + +source $(dirname $0)/../base.inc diff --git a/samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc b/samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc new file mode 100644 index 0000000..7250817 --- /dev/null +++ b/samples/datasets/Contact_Center_CDR/v5.9.0/Contact_Center_CDR_5.9.0.avsc @@ -0,0 +1,351 @@ +{ + "namespace": "com.plainAVRO", + "name": "Contact_Center_CDR", + "type": "record", + "doc": "Data records that contain information about each interaction (such as calls or interactions/contacts through other channels/apps) received by a Contact Center", + "x-fp-version": "5.9.0", + "fields": [ + { + "name": "OPERATOR_ID", + "aliases": [ + "operator_id" + ], + "type": "string", + "doc": "Global Operator Identifier" + }, + { + "name": "INTERACTION_TM", + "aliases": [ + "interaction_tm" + ], + "type": { + "type": "string", + "logicalType": "datetime" + }, + "doc": "Year, month, day and time of the interaction (contact action)" + }, + { + "name": "CUSTOMER_ID", + "aliases": [ + "customer_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Customer ID (if known when the call occurs).\nAs much as possible, each customer will maintain the same identifier within any business (fixed, mobile, cloud, IoT, ...), which will allow to build a complete vision of the customer. If a client has a prepaid number and a postpaid and/or hybrid number, the client ID should be unique (common for both lines)." + }, + { + "name": "SUBSCRIBER_ID", + "aliases": [ + "subscriber_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Unique identifier of the line associated to the call (in the case of lines already registered), as it comes in the origin system" + }, + { + "name": "INTERACTION_ID", + "aliases": [ + "interaction_id" + ], + "type": [ + "null", + "string" + ], + "doc": "ID/Ref number of the \"event\"/contact action (as it is registered in call center systems). Typically used for later tracking/monitoring of status" + }, + { + "name": "ACTIVATION_DT", + "aliases": [ + "activation_dt" + ], + "type": [ + "null", + { + "type": "string", + "logicalType": "iso-date" + } + ], + "doc": "(Fixed or mobile) Line activation date" + }, + { + "name": "ADMINISTRA_ID", + "aliases": [ + "administra_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Unique identifier of the Broadband or TV circuit code" + }, + { + "name": "SOURCE_SYSTEM_ID", + "aliases": [ + "source_system_id" + ], + "type": [ + "null", + "string" + ], + "doc": "System or origin/type of the event, as registered in the call center platform (eg: Tkt_Remedy, Novum, etc.)" + }, + { + "name": "AGENT_ID", + "aliases": [ + "agent_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Id of the \"agent\" handling the call (or contact action)" + }, + { + "name": "AGENT_GROUP_ID", + "aliases": [ + "agent_group_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Id of the \"agent group\" (to which the agent handling the call belongs)" + }, + { + "name": "EXT_USER_ID", + "aliases": [ + "ext_user_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Id of the \"user\" who makes the \"call\"/contact in the external/origin system (in the case of clients from Novum, TV platform, etc. which may have internal IDs of clients). It can be also the \"username\" or e-mail" + }, + { + "name": "CALLER_PHONE_ID", + "aliases": [ + "caller_phone_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Phone number (in the case of contact via phone call), without international prefix, used in the contact action. It may not coincide with the phone number associated to the line or SUBSCRIBER_ID/CUSTOMER_ID" + }, + { + "name": "IMEI_ID", + "aliases": [ + "imei_id" + ], + "type": [ + "null", + { + "type": "string", + "logicalType": "imei" + } + ], + "doc": "If the user interaction is a call from a mobile phone, the IMEI of the device used to make the call.\nInternational Mobile Equipment Identity\nIMPORTANT: IMEI must be normalized, which means:\n* Invalid imeis must be removed (ie. Imeis including letters)\n* Text format\n* Complete to 15 digits by adding zeros at the left\n* Remove control digit (position #15)" + }, + { + "name": "DEST_PHONE_ID", + "aliases": [ + "dest_phone_id" + ], + "type": [ + "null", + "string" + ], + "doc": "In the case of a Contact Center call, the telephone number that receives the call (the number associated with the call center)" + }, + { + "name": "DEST_BASE_PHONE_ID", + "aliases": [ + "dest_base_phone_id" + ], + "type": [ + "null", + "string" + ], + "doc": "In the case of a Contact Center call, the \"base telephone number\" that receives the call (the \"base number\" associated with DEST_PHONE_ID)." + }, + { + "name": "LINE_PHONE_ID", + "aliases": [ + "line_phone_id" + ], + "type": "string", + "doc": "Phone number to which the call/interaction is associated (may be different to the phone number used by the caller). It is the phone number associated to the line or CUSTOMER_ID/SUBSCRIBER_ID.\nIn the case of mobile lines, it will be the MSISDN without international prefix" + }, + { + "name": "DURATION_QT", + "aliases": [ + "duration_qt" + ], + "type": [ + "null", + { + "type": "string", + "logicalType": "duration" + } + ], + "doc": "Duration of the interaction (ISO-8601 value that contains a complete duration representation). In the case of a Contact Center calls, the duration of the call. In other cases, the session duration, etc." + }, + { + "name": "CONTACT_IND", + "aliases": [ + "contact_ind" + ], + "type": [ + "null", + "boolean" + ], + "doc": "In the case of a Contact Center call, indicates whether the call was or not answered. I.e.: If it lasts less than X seconds, it is considered that the call was not answered, if lasts more or equal than X seconds, it is considered that the customer spoke with someone." + }, + { + "name": "HOLD_TIME_QT", + "aliases": [ + "hold_time_qt" + ], + "type": [ + "null", + { + "type": "string", + "logicalType": "duration" + } + ], + "doc": "Hold time of the interaction (ISO-8601 value that contains a complete duration representation). In the case of call center calls, hold time during the call." + }, + { + "name": "INTERACTION_TYPE_ID", + "aliases": [ + "interaction_type_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Type/topic of the call (i.e: a complaint about billing, purchasing a bundle, etc)" + }, + { + "name": "TROUBLE_TICKET_ID", + "aliases": [ + "trouble_ticket_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Identifier of the trouble ticket associated with the interaction, when necessary." + }, + { + "name": "REASON_ID", + "aliases": [ + "reason_id" + ], + "type": [ + "null", + "string" + ], + "doc": "Code of the reason of the interaction (in the case of contact center calls, there is a hierarchy)" + }, + { + "name": "CSI_QT", + "aliases": [ + "csi_qt" + ], + "type": [ + "null", + "float" + ], + "doc": "The result of the customer satisfaction survey where applicable" + }, + { + "name": "CHANNEL_ID", + "aliases": [ + "channel_id" + ], + "type": [ + "null", + "string" + ], + "doc": "In the case of multi-channel contact center, identifies the channel used by the user to interact with the Company" + }, + { + "name": "BILLABLE_CALL_IND", + "aliases": [ + "billable_call_ind" + ], + "type": [ + "null", + { + "name": "BILLABLE_CALL_IND_ENUM", + "type": "enum", + "symbols": [ + "Free", + "Billable" + ] + } + ], + "doc": "Indicates if if the call is billable or free (if known)" + }, + { + "name": "USER_4P_ID", + "aliases": [ + "user_4p_id" + ], + "type": [ + "null", + { + "type": "string", + "x-fp-user-id": true + } + ], + "doc": "Identifier of the user in 4th Platform (as returned by the OB in the 4th Platform APIs)" + }, + { + "name": "CALLER_PHONE_WITH_PREFIX_ID", + "aliases": [ + "caller_phone_with_prefix_id" + ], + "type": [ + "null", + { + "type": "string", + "logicalType": "phone-number" + } + ], + "doc": "Phone number (in the case of contact via phone call), WITH_INTERNATIONAL_PREFIX, used in the contact action. It may not coincide with the phone number associated to the line or SUBSCRIBER_ID/CUSTOMER_ID" + }, + { + "name": "LINE_PHONE_WITH_PREFIX_ID", + "aliases": [ + "line_phone_with_prefix_id" + ], + "type": [ + "null", + { + "type": "string", + "logicalType": "phone-number", + "x-fp-identifier": "phone-number" + } + ], + "doc": "Phone number for which the call/interaction is associated (may be different to phone used by the caller). It is the phone number associated to the line or CUSTOMER_ID/SUBSCRIBER_ID.\n In the case of mobile lines, it will be the MSISDN WITH international prefix." + }, + { + "name": "DAY_DT", + "aliases": [ + "day_dt" + ], + "type": { + "type": "string", + "logicalType": "iso-date" + }, + "doc": "Year, month and day of the interaction" + } + ] +} \ No newline at end of file diff --git a/samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json b/samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json new file mode 100644 index 0000000..c22845d --- /dev/null +++ b/samples/extensions/Contact_Center_CDR/v5.9.0/extensions.json @@ -0,0 +1,25 @@ +{ + "namespace": "com.plainAVRO", + "name": "Contact_Center_CDR", + "type": "record", + "doc": "Data records that contain information about each interaction (such as calls or interactions/contacts through other channels/apps) received by a Contact Center", + "x-fp-version": "5.9.0", + "fields": [ + { + "name": "DAY_DT", + "aliases": [ + "day_dt" + ], + "type": { + "type": "string", + "logicalType": "iso-date", + "arg.properties": { + "range": { + "start": "${DATE_RANGE_START}", + "end": "${DATE_RANGE_END}" + } + } + } + } + ] +} \ No newline at end of file