From aba760d22d86c6d8d3212c785b0d6b2931ad0763 Mon Sep 17 00:00:00 2001 From: Kendra Swanson Date: Tue, 13 Feb 2024 11:16:13 -0500 Subject: [PATCH 1/2] rename cross-lib to batch-builder --- .gitignore | 4 +- batch-builder/Dockerfile.ruby_env | 7 ++ batch-builder/README.md | 45 ++++++++++++ batch-builder/run_enki_on_all.sh | 109 ++++++++++++++++++++++++++++++ batch-builder/update_books.rb | 53 +++++++++++++++ batch-builder/updater.sh | 11 +++ 6 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 batch-builder/Dockerfile.ruby_env create mode 100644 batch-builder/README.md create mode 100755 batch-builder/run_enki_on_all.sh create mode 100755 batch-builder/update_books.rb create mode 100755 batch-builder/updater.sh diff --git a/.gitignore b/.gitignore index f8d083a6..67ee1df2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,8 @@ /build-concourse/webhosting-*.yml /build-concourse/corgi-*.yml -cross-lib/book-data -cross-lib/logs +batch-builder/book-data +batch-builder/logs /bakery-src/scripts/build /bakery-src/scripts/cops_bakery_scripts.egg-info diff --git a/batch-builder/Dockerfile.ruby_env b/batch-builder/Dockerfile.ruby_env new file mode 100644 index 00000000..8f5b37b1 --- /dev/null +++ b/batch-builder/Dockerfile.ruby_env @@ -0,0 +1,7 @@ +FROM ruby:3.2-slim as base +WORKDIR /code + +RUN apt-get update && \ + apt-get install -y curl + +COPY [batch-builder, ./batch-builder] diff --git a/batch-builder/README.md b/batch-builder/README.md new file mode 100644 index 00000000..1a2806b3 --- /dev/null +++ b/batch-builder/README.md @@ -0,0 +1,45 @@ +## Set of files involved in running enki on all books + + +### Data + +`book-data/USER_ubl.txt` - Unapproved Book List (UBL). User-maintained file. Gives user the option to supply books not in the ABL. When `update_books.rb/updater.sh` runs, it draws from this file as well as the [ABL](https://github.com/openstax/content-manager-approved-books/blob/main/approved-book-list.json). + +Format: +``` +repo-name-1 +repo-name-1 +... +``` +To ensure data in the UBL is included in `AUTO_books.txt`, be sure to rerun the updater after editing this file. + +`book-data/AUTO_books.txt` - List of books. Format: same as `USER_ubl.txt`. Autogenerated file. Generated by `update_books.rb/updater.sh`, and combines data from ABL and UBL. + +Note: All data files should end with a single newline. + +### Scripts + +All scripts are meant to be run from Enki root (eg. by calling `./batch-builder/updater.sh` from enki folder). + +`update_books.rb` - Updates or creates the list of books in `AUTO_books.txt`. Requires Ruby to be installed. + +`updater.sh` - Runs `update_books.rb` without dependencies installed (bundles Docker `build` and `run`). + +`run_enki_on_all.sh` - Iterates over books in `AUTO_books.txt`, running enki to generate each. Outputs status of each run & the time to stdout. Book files are found in `/data/-`. + +Options: +- --command: A [step or set of steps](../step-config.json) to run on all books . Example: `all-pdf` +- --data-dir: A file location where you want your finished books to go. Defaults to `./data` . Example: `./data/v1` +- --echo: Outputs to the terminal as well as the log the regular output from enki +- --continue: Skips books that have an associated log file (useful for if a run was interrupted) + +Examples: +``` +./batch-builder/run_enki_on_all.sh --command all-pdf +./batch-builder/run_enki_on_all.sh --command all-epub --data-dir ./data/v1 --echo --continue +STOP_AT_STEP=step-bake ./batch-builder/run_enki_on_all.sh --command all-web +``` + +## Logs + +When `run_enki_on_all.sh` is called, it redirects the output from each book's run to a log file. The naming system for log files is `-.txt`. diff --git a/batch-builder/run_enki_on_all.sh b/batch-builder/run_enki_on_all.sh new file mode 100755 index 00000000..28240a9f --- /dev/null +++ b/batch-builder/run_enki_on_all.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +set -e + +trap 'exit 1' INT + +# Setup +while [ -n "$1" ]; do + case "$1" in + --command) + shift; arg_command=$1 + ;; + --data-dir) + shift; arg_data_dir=$1 + ;; + --echo) do_echo=true ;; + --continue) do_continue=true ;; + *) + echo "Invalid argument $1" + exit 1 + ;; + esac + shift +done + +[[ $arg_command ]] || ( echo "ERROR: A command was not provided. Typical examples are 'all-pdf' or 'all-web' or 'all-epub'" && exit 1 ) + +[[ $arg_data_dir ]] || arg_data_dir="./data" + +root="batch-builder" +all_books="$root/book-data/AUTO_books.txt" +test -f $all_books || ( echo "ERROR: Book list not found at ${all_books}" && exit 1 ) + +mkdir -p $root/logs/ + +# Helpers +# https://stackoverflow.com/a/20983251 +echo_green() { echo -e "$(tput setaf 2)$*$(tput sgr0)"; } +echo_red() { echo -e "$(tput setaf 1)$*$(tput sgr0)"; } + +format_time() { + if [[ $(uname -s) = "Darwin" ]]; then + echo "$(date -u -r $1 +%T)" + elif [[ $(uname -s) = "Linux" ]]; then + echo "$(date --date="@$1" +%H:%M:%S)" + else + echo "WARNING: Unrecognized operating system. Unable to format datetime." + fi +} + +get_slug_list_for_repo () { + meta_inf=$(curl -Ss "https://raw.githubusercontent.com/openstax/$1/main/META-INF/books.xml") + filtered=$(echo $meta_inf | grep -oE 'slug="([a-zA-Z0-9\-]+)"' | sed 's/slug=//g' | sed 's/\"//g') + echo $filtered +} + +# Nicely handle an enki run +run_and_log_enki () { + start_time=$(date +%s) + book_destination=$arg_data_dir/$2-$1 + cmd="./enki --data-dir $book_destination --command $1 --repo $2 --ref main" + echo "running: $cmd" + log="$root/logs/$2-$1.txt" + echo "volumes are: $(get_slug_list_for_repo $2)" + if [[ $do_continue && -f "$log" ]]; then + echo "Skipping because log file exists. To build anyway, unset --continue flag." + return 0 + fi + if [[ $do_echo ]]; then + SKIP_DOCKER_BUILD=1 $cmd 2>&1 | tee "$log" + exit=${PIPESTATUS[0]} + else + SKIP_DOCKER_BUILD=1 $cmd &> "$log" + exit=$? + fi + stop_time=$(date +%s) + elapsed_formatted=$( format_time $(($stop_time-$start_time)) ) + echo " time to build $elapsed_formatted" + if [ $exit == 0 ]; then + echo_green " ==> SUCCESS: $2" + echo " Book can be found at $book_destination" + else + echo_red " ==> FAILED with $exit: $2" + echo " For more information see $log" + fi + return $exit +} + +# Read book list & collect data on runs +total_start=$(date +%s) +failed_count=0 +success_count=0 +# Build container once +DOCKER_DEFAULT_PLATFORM=linux/amd64 DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 docker build --tag enki-for-builder --file ./Dockerfile ./. +while read -r line; do + repo=${line%%' '*} + # https://stackoverflow.com/a/35208546 + echo "" | run_and_log_enki $arg_command $repo \ + && success_count=$(($success_count+1)) || failed_count=$(($failed_count+1)) +done < <(cat "$all_books") +total_end=$(date +%s) +elapsed_formatted=$( format_time $(($total_end-$total_start)) ) + +# Final report: +echo " +Yeehaw! You've built E V E R Y T H I N G. Total runtime was $elapsed_formatted +You can find the built books in $arg_data_dir +Successes $success_count failures $failed_count +" diff --git a/batch-builder/update_books.rb b/batch-builder/update_books.rb new file mode 100755 index 00000000..74f0f01e --- /dev/null +++ b/batch-builder/update_books.rb @@ -0,0 +1,53 @@ +#!/usr/bin/env ruby + +require 'json' +require 'set' + +# Files +base_dir = 'batch-builder/book-data' +out_file = "#{base_dir}/AUTO_books.txt" +ubl = "#{base_dir}/USER_ubl.txt" +`mkdir -p #{base_dir}; touch #{out_file}; touch #{ubl}` + + +# Refresh ABL (bash) +abl_hash = JSON.parse( + `curl -Ss https://raw.githubusercontent.com/openstax/content-manager-approved-books/main/approved-book-list.json` +) + +books=Set[] +# Write unapproved books +if File.exist?(ubl) + File.readlines(ubl).each do |line| + books.add(line) + end +end + +# Write ABL +def write_abl_with_slugs(abl_hash:, books:) # TODO: delete this method? + abl_hash['approved_books'].each do |book| + repo = book['repository_name'] + book['versions'].sort_by{ |version| + version['commit_metadata']['committed_at'] + }.reverse!.first['commit_metadata']['books'].each do |volume| + books.add("#{repo} #{volume['slug']}\n") + end + end +end + +def write_abl_without_slugs(abl_hash:, books:) + abl_hash['approved_books'].each do |book| + books.add("#{book['repository_name']}\n") + end +end + +# Exchange for write_abl_with_slugs if we need slugs in future? +write_abl_without_slugs(abl_hash: abl_hash, books: books) + +# Add files +File.delete(out_file) +books.each do |book| + File.write(out_file, book, mode: 'a') +end + + diff --git a/batch-builder/updater.sh b/batch-builder/updater.sh new file mode 100755 index 00000000..5163b980 --- /dev/null +++ b/batch-builder/updater.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Run this to update the book list without having ruby installed. +root_dir=batch-builder +docker build --tag batch-builder --file $root_dir/Dockerfile.ruby_env $root_dir/. +docker run \ + --rm -it \ + --mount type=bind,source=$(pwd)/batch-builder,target=/code/batch-builder \ + --name batch-builder-builder \ + batch-builder \ + $root_dir/update_books.rb From b4fb591b418607e32c2414755a5c94e7b69cfcd8 Mon Sep 17 00:00:00 2001 From: Kendra Swanson Date: Tue, 13 Feb 2024 11:19:34 -0500 Subject: [PATCH 2/2] commit deleted files too :upside_down_smiley_face: --- cross-lib/Dockerfile.ruby_env | 7 --- cross-lib/README.md | 45 -------------- cross-lib/run_enki_on_all.sh | 109 ---------------------------------- cross-lib/update_books.rb | 53 ----------------- cross-lib/updater.sh | 11 ---- 5 files changed, 225 deletions(-) delete mode 100644 cross-lib/Dockerfile.ruby_env delete mode 100644 cross-lib/README.md delete mode 100755 cross-lib/run_enki_on_all.sh delete mode 100755 cross-lib/update_books.rb delete mode 100755 cross-lib/updater.sh diff --git a/cross-lib/Dockerfile.ruby_env b/cross-lib/Dockerfile.ruby_env deleted file mode 100644 index 00e2ffda..00000000 --- a/cross-lib/Dockerfile.ruby_env +++ /dev/null @@ -1,7 +0,0 @@ -FROM ruby:3.2-slim as base -WORKDIR /code - -RUN apt-get update && \ - apt-get install -y curl - -COPY [cross-lib, ./cross-lib] diff --git a/cross-lib/README.md b/cross-lib/README.md deleted file mode 100644 index 023bf097..00000000 --- a/cross-lib/README.md +++ /dev/null @@ -1,45 +0,0 @@ -## Set of files involved in running enki on all books - - -### Data - -`book-data/USER_ubl.txt` - Unapproved Book List (UBL). User-maintained file. Gives user the option to supply books not in the ABL. When `update_books.rb/updater.sh` runs, it draws from this file as well as the [ABL](https://github.com/openstax/content-manager-approved-books/blob/main/approved-book-list.json). - -Format: -``` -repo-name-1 -repo-name-1 -... -``` -To ensure data in the UBL is included in `AUTO_books.txt`, be sure to rerun the updater after editing this file. - -`book-data/AUTO_books.txt` - List of books. Format: same as `USER_ubl.txt`. Autogenerated file. Generated by `update_books.rb/updater.sh`, and combines data from ABL and UBL. - -Note: All data files should end with a single newline. - -### Scripts - -All scripts are meant to be run from Enki root (eg. by calling `./cross-lib/updater.sh` from enki folder). - -`update_books.rb` - Updates or creates the list of books in `AUTO_books.txt`. Requires Ruby to be installed. - -`updater.sh` - Runs `update_books.rb` without dependencies installed (bundles Docker `build` and `run`). - -`run_enki_on_all.sh` - Iterates over books in `AUTO_books.txt`, running enki to generate each. Outputs status of each run & the time to stdout. Book files are found in `/data/-`. - -Options: -- --command: A [step or set of steps](../step-config.json) to run on all books . Example: `all-pdf` -- --data-dir: A file location where you want your finished books to go. Defaults to `./data` . Example: `./data/v1` -- --echo: Outputs to the terminal as well as the log the regular output from enki -- --continue: Skips books that have an associated log file (useful for if a run was interrupted) - -Examples: -``` -./cross-lib/run_enki_on_all.sh --command all-pdf -./cross-lib/run_enki_on_all.sh --command all-epub --data-dir ./data/v1 --echo --continue -STOP_AT_STEP=step-bake ./cross-lib/run_enki_on_all.sh --command all-web -``` - -## Logs - -When `run_enki_on_all.sh` is called, it redirects the output from each book's run to a log file. The naming system for log files is `-.txt`. diff --git a/cross-lib/run_enki_on_all.sh b/cross-lib/run_enki_on_all.sh deleted file mode 100755 index e512bb6d..00000000 --- a/cross-lib/run_enki_on_all.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash - -set -e - -trap 'exit 1' INT - -# Setup -while [ -n "$1" ]; do - case "$1" in - --command) - shift; arg_command=$1 - ;; - --data-dir) - shift; arg_data_dir=$1 - ;; - --echo) do_echo=true ;; - --continue) do_continue=true ;; - *) - echo "Invalid argument $1" - exit 1 - ;; - esac - shift -done - -[[ $arg_command ]] || ( echo "ERROR: A command was not provided. Typical examples are 'all-pdf' or 'all-web' or 'all-epub'" && exit 1 ) - -[[ $arg_data_dir ]] || arg_data_dir="./data" - -root="cross-lib" -all_books="$root/book-data/AUTO_books.txt" -test -f $all_books || ( echo "ERROR: Book list not found at ${all_books}" && exit 1 ) - -mkdir -p $root/logs/ - -# Helpers -# https://stackoverflow.com/a/20983251 -echo_green() { echo -e "$(tput setaf 2)$*$(tput sgr0)"; } -echo_red() { echo -e "$(tput setaf 1)$*$(tput sgr0)"; } - -format_time() { - if [[ $(uname -s) = "Darwin" ]]; then - echo "$(date -u -r $1 +%T)" - elif [[ $(uname -s) = "Linux" ]]; then - echo "$(date --date="@$1" +%H:%M:%S)" - else - echo "WARNING: Unrecognized operating system. Unable to format datetime." - fi -} - -get_slug_list_for_repo () { - meta_inf=$(curl -Ss "https://raw.githubusercontent.com/openstax/$1/main/META-INF/books.xml") - filtered=$(echo $meta_inf | grep -oE 'slug="([a-zA-Z0-9\-]+)"' | sed 's/slug=//g' | sed 's/\"//g') - echo $filtered -} - -# Nicely handle an enki run -run_and_log_enki () { - start_time=$(date +%s) - book_destination=$arg_data_dir/$2-$1 - cmd="./enki --data-dir $book_destination --command $1 --repo $2 --ref main" - echo "running: $cmd" - log="$root/logs/$2-$1.txt" - echo "volumes are: $(get_slug_list_for_repo $2)" - if [[ $do_continue && -f "$log" ]]; then - echo "Skipping because log file exists. To build anyway, unset --continue flag." - return 0 - fi - if [[ $do_echo ]]; then - SKIP_DOCKER_BUILD=1 $cmd 2>&1 | tee "$log" - exit=${PIPESTATUS[0]} - else - SKIP_DOCKER_BUILD=1 $cmd &> "$log" - exit=$? - fi - stop_time=$(date +%s) - elapsed_formatted=$( format_time $(($stop_time-$start_time)) ) - echo " time to build $elapsed_formatted" - if [ $exit == 0 ]; then - echo_green " ==> SUCCESS: $2" - echo " Book can be found at $book_destination" - else - echo_red " ==> FAILED with $exit: $2" - echo " For more information see $log" - fi - return $exit -} - -# Read book list & collect data on runs -total_start=$(date +%s) -failed_count=0 -success_count=0 -# Build container once -DOCKER_DEFAULT_PLATFORM=linux/amd64 DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 docker build --tag enki-for-builder --file ./Dockerfile ./. -while read -r line; do - repo=${line%%' '*} - # https://stackoverflow.com/a/35208546 - echo "" | run_and_log_enki $arg_command $repo \ - && success_count=$(($success_count+1)) || failed_count=$(($failed_count+1)) -done < <(cat "$all_books") -total_end=$(date +%s) -elapsed_formatted=$( format_time $(($total_end-$total_start)) ) - -# Final report: -echo " -Yeehaw! You've built E V E R Y T H I N G. Total runtime was $elapsed_formatted -You can find the built books in $arg_data_dir -Successes $success_count failures $failed_count -" diff --git a/cross-lib/update_books.rb b/cross-lib/update_books.rb deleted file mode 100755 index 734318d4..00000000 --- a/cross-lib/update_books.rb +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env ruby - -require 'json' -require 'set' - -# Files -base_dir = 'cross-lib/book-data' -out_file = "#{base_dir}/AUTO_books.txt" -ubl = "#{base_dir}/USER_ubl.txt" -`mkdir -p #{base_dir}; touch #{out_file}; touch #{ubl}` - - -# Refresh ABL (bash) -abl_hash = JSON.parse( - `curl -Ss https://raw.githubusercontent.com/openstax/content-manager-approved-books/main/approved-book-list.json` -) - -books=Set[] -# Write unapproved books -if File.exist?(ubl) - File.readlines(ubl).each do |line| - books.add(line) - end -end - -# Write ABL -def write_abl_with_slugs(abl_hash:, books:) # TODO: delete this method? - abl_hash['approved_books'].each do |book| - repo = book['repository_name'] - book['versions'].sort_by{ |version| - version['commit_metadata']['committed_at'] - }.reverse!.first['commit_metadata']['books'].each do |volume| - books.add("#{repo} #{volume['slug']}\n") - end - end -end - -def write_abl_without_slugs(abl_hash:, books:) - abl_hash['approved_books'].each do |book| - books.add("#{book['repository_name']}\n") - end -end - -# Exchange for write_abl_with_slugs if we need slugs in future? -write_abl_without_slugs(abl_hash: abl_hash, books: books) - -# Add files -File.delete(out_file) -books.each do |book| - File.write(out_file, book, mode: 'a') -end - - diff --git a/cross-lib/updater.sh b/cross-lib/updater.sh deleted file mode 100755 index 9fe9ab48..00000000 --- a/cross-lib/updater.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -# Run this to update the book list without having ruby installed. -root_dir=cross-lib -docker build --tag cross-lib --file $root_dir/Dockerfile.ruby_env $root_dir/. -docker run \ - --rm -it \ - --mount type=bind,source=$(pwd)/cross-lib,target=/code/cross-lib \ - --name cross-lib-builder \ - cross-lib \ - $root_dir/update_books.rb