Skip to content

Commit

Permalink
add/update scripts for export flat jobs
Browse files Browse the repository at this point in the history
  • Loading branch information
trautmane committed Jun 1, 2024
1 parent ebe6e2c commit 4e56074
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 5 deletions.
2 changes: 1 addition & 1 deletion run_scripts/multi-sem/wafer-53-center7/00_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export FLINTSTONE="/groups/flyTEM/flyTEM/render/spark/spark-janelia/flintstone.s
# --------------------------------------------------------------------
export N5_SAMPLE_PATH="/nrs/hess/data/hess_wafer_53/export/${RENDER_OWNER}.n5"
export N5_HEIGHT_FIELDS_DOWNSAMPLING_FACTORS="2,2,1"
export N5_FLAT_DATASET_ROOT="/flat/${RAW_SLAB}"
export N5_FLAT_DATASET_ROOT="/flat_clahe/${RAW_SLAB}"
export N5_FLAT_RAW_DATASET="${N5_FLAT_DATASET_ROOT}/raw/s0"
export N5_SURFACE_ROOT="/surface-align/run_${RUN_TIMESTAMP}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,22 @@ if (( $# != 2 )); then
fi

RAW_SLAB="${1}"
N_NODES="${2}" # wafer 53, s070 took 17 minutes with 30 nodes
N_NODES="${2}" # wafer_53_center7: slab s251 took 49 minutes with 3 nodes, slab s402 took 19 minutes with 9 nodes

ABSOLUTE_SCRIPT=$(readlink -m "${0}")
SCRIPT_DIR=$(dirname "${ABSOLUTE_SCRIPT}")
source "${SCRIPT_DIR}/00_config.sh" "${RAW_SLAB}"

validateDirectoriesExist "${N5_SAMPLE_PATH}${N5_ALIGNED_SLAB_DATASET}/s0" "${N5_SAMPLE_PATH}${N5_HEIGHT_FIELDS_FIX_DATASET}"
N5_RAW_S0_DATASET="${N5_ALIGNED_SLAB_DATASET}_norm-layer-clahe/s0"
validateDirectoriesExist "${N5_SAMPLE_PATH}${N5_RAW_S0_DATASET}" "${N5_SAMPLE_PATH}${N5_HEIGHT_FIELDS_FIX_DATASET}"

FULL_FLAT_DATASET_PATH="${N5_SAMPLE_PATH}${N5_FLAT_RAW_DATASET}"
if [[ -d ${FULL_FLAT_DATASET_PATH} ]]; then
echo "
ERROR: ${FULL_FLAT_DATASET_PATH} exists
For runs after new height field fixes, move the existing data to be deleted like this:
mv ${N5_SAMPLE_PATH}${N5_FLAT_DATASET_ROOT} /nrs/flyem/render/n5/delete_me
mv ${N5_SAMPLE_PATH}${N5_HEIGHT_FIELDS_FIX_DATASET} /nrs/hess/data/hess_wafer_53/export/hess_wafer_53_center7.n5/delete_me
"
exit 1
fi
Expand All @@ -34,7 +35,7 @@ ARGV="\
--n5RawPath=${N5_SAMPLE_PATH} \
--n5FieldPath=${N5_SAMPLE_PATH} \
--n5OutputPath=${N5_SAMPLE_PATH} \
--n5RawDataset=${N5_ALIGNED_SLAB_DATASET}/s0 \
--n5RawDataset=${N5_RAW_S0_DATASET} \
--n5FieldGroup=${N5_HEIGHT_FIELDS_FIX_DATASET} \
--n5OutDataset=${N5_FLAT_RAW_DATASET} \
--padding=3 \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

set -e

ABSOLUTE_SCRIPT=$(readlink -m "$0")
SCRIPT_DIR=$(dirname "${ABSOLUTE_SCRIPT}")

source "${SCRIPT_DIR}"/00_config.sh "NA"

umask 0002

DATASET_CSV="$1"
N_NODES="20"
export RUNTIME="233:59" # batches with ? slabs took between ? and ? hours to complete

if [[ ! -f ${DATASET_CSV} ]]; then
echo "ERROR: csv file ${DATASET_CSV} not found"
exit 1
fi

#-----------------------------------------------------------
# Spark executor setup with 11 cores per worker ...

export N_EXECUTORS_PER_NODE=2
export N_CORES_PER_EXECUTOR=5
# To distribute work evenly, recommended number of tasks/partitions is 3 times the number of cores.
#N_TASKS_PER_EXECUTOR_CORE=3
export N_OVERHEAD_CORES_PER_WORKER=1
#N_CORES_PER_WORKER=$(( (N_EXECUTORS_PER_NODE * N_CORES_PER_EXECUTOR) + N_OVERHEAD_CORES_PER_WORKER ))
export N_CORES_DRIVER=1

#-----------------------------------------------------------
RUN_TIME=$(date +"%Y%m%d_%H%M%S")
CLASS="org.janelia.saalfeldlab.hotknife.SparkExportFlattenedVolumeMultiSEMBatch"

ARGV="\
--n5RootPath=${N5_SAMPLE_PATH} \
--datasetCsv=${DATASET_CSV} \
--padding=3 \
--blockSize=128,128,64 \
--downsample"

LOG_DIR="logs/71_export_flat"
LOG_FILE="${LOG_DIR}/export_flat.${RUN_TIME}.out"
mkdir -p ${LOG_DIR}

# use shell group to tee all output to log file
{

echo "Running with arguments:
${ARGV}
"
/groups/flyTEM/flyTEM/render/spark/spark-janelia/flintstone.sh $N_NODES $HOT_KNIFE_JAR $CLASS $ARGV
} 2>&1 | tee -a "${LOG_FILE}"

Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

set -e

ABSOLUTE_SCRIPT=$(readlink -m "$0")
SCRIPT_DIR=$(dirname "${ABSOLUTE_SCRIPT}")

source "${SCRIPT_DIR}"/../00_config.sh "NA"

SLABS_PER_FILE=31 # 31 slabs should take about 6 hours to complete, 402 / 31 = 13 files
COUNT=0

for SLAB in ${ALL_SLABS}; do

source "${SCRIPT_DIR}"/../00_config.sh "${SLAB}"

if ! (( COUNT % SLABS_PER_FILE )); then
C_VAL=$(printf '%05d' ${COUNT})
CSV_FILE="flat_dataset.${C_VAL}.csv"
echo -n "" > "${CSV_FILE}"
fi

# /render/slab_000_to_009/s002_m395_align_no35_horiz_avgshd_ic___20240504_084955_norm-layer-clahe/s0
RAW_DATASET="${N5_ALIGNED_SLAB_DATASET}_norm-layer-clahe/s0"
if [[ ! -d ${N5_SAMPLE_PATH}${RAW_DATASET} ]]; then
echo "ERROR: ${N5_SAMPLE_PATH}${RAW_DATASET} does not exist"
exit 1
fi

# /heightfields_fix/slab_000_to_009/s002_m395
if [[ ! -d ${N5_SAMPLE_PATH}${N5_HEIGHT_FIELDS_FIX_DATASET} ]]; then
echo "ERROR: ${N5_SAMPLE_PATH}${N5_HEIGHT_FIELDS_FIX_DATASET} does not exist"
exit 1
fi

# /flat_clahe/s002_m395/raw/s0
OUT_DATASET="${N5_FLAT_DATASET_ROOT}/raw/s0"
if [[ -d ${N5_SAMPLE_PATH}${OUT_DATASET} ]]; then
echo "ERROR: ${N5_SAMPLE_PATH}${OUT_DATASET} already exists"
exit 1
fi

echo "${RAW_DATASET},${N5_HEIGHT_FIELDS_FIX_DATASET},${OUT_DATASET}" >> "${CSV_FILE}"

COUNT=$((COUNT+=1))

done

ls -alh flat_dataset.*.csv

0 comments on commit 4e56074

Please sign in to comment.