Skip to content

Commit

Permalink
Merge pull request #114 from flux-framework/feature/unittests
Browse files Browse the repository at this point in the history
Changes to add unit test and Internal Testing
  • Loading branch information
JaeseungYeom committed Jul 30, 2024
2 parents fac9b4d + f83e792 commit b889f38
Show file tree
Hide file tree
Showing 79 changed files with 4,467 additions and 105 deletions.
3 changes: 2 additions & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@ SpaceBeforeParens: Always
TabWidth: '4'
UseTab: Never
AlwaysBreakAfterReturnType: None
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterDefinitionReturnType: None
ContinuationIndentWidth: Always
17 changes: 14 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ compile_flags.txt

# local editor config dirs
.vscode
.idea
.idea*
.clangd

.cache
cmake-build-debug
# ignore installable version of dyadrun
dyadrun
flux_barrier
Expand All @@ -112,7 +113,17 @@ flux_barrier

# Python stuff
**/__pycache__/
**/build
**/build*
**/*.egg-info
/install/
/dyad-env/
env
hydra_log
docs/demos/ecp_feb_2023/c_cons
docs/demos/ecp_feb_2023/cpp_cons
docs/demos/ecp_feb_2023/c_prod
docs/demos/ecp_feb_2023/cpp_prod
tests/integration/dlio_benchmark/logs
scripts/checkpoints
scripts/logs
tests/integration/dlio_benchmark/perf_analysis/.ipynb_checkpoints
12 changes: 10 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ set(DYAD_PACKAGE_VERSION_MAJOR "${DYAD_VERSION_MAJOR}.${DYAD_VERSION_MINOR}")
set(DYAD_PACKAGE_VERSION_MINOR "${DYAD_VERSION_PATCH}")
set(DYAD_PACKAGE_STRING "${DYAD_PACKAGE_NAME} ${DYAD_PACKAGE_VERSION}")
set(DYAD_PACKAGE_TARNAME "${DYAD_PACKAGE}")

project(dyad LANGUAGES C CXX)

# Convenience defines
string(TOUPPER "${PROJECT_NAME}" UPPER_PROJECT_NAME)
string(TOLOWER "${PROJECT_NAME}" LOWER_PROJECT_NAME)
set(DYAD_PROJECT_DIR ${CMAKE_CURRENT_SOURCE_DIR})

#------------------------------------------------------------------------------
# Internal Includes for header and libraries
Expand Down Expand Up @@ -110,7 +110,7 @@ set(DYAD_LOGGER "NONE" CACHE STRING "Logger to use for DYAD")
set_property(CACHE DYAD_LOGGER PROPERTY STRINGS FLUX CPP_LOGGER NONE)
set(DYAD_LOGGER_LEVEL "NONE" CACHE STRING "Logging level to use for DYAD")
set_property(CACHE DYAD_LOGGER_LEVEL PROPERTY STRINGS DEBUG INFO WARN ERROR NONE)

option(DYAD_ENABLE_TESTS "Enable dyad tests" OFF)

#------------------------------------------------------------------------------
# Compiler setup
Expand Down Expand Up @@ -459,6 +459,8 @@ string(APPEND _str
" DYAD_ENABLE_UCX_DATA: ${DYAD_ENABLE_UCX_DATA}\n")
string(APPEND _str
" DYAD_ENABLE_UCX_DATA_RMA: ${DYAD_ENABLE_UCX_DATA_RMA}\n")
string(APPEND _str
" DYAD_ENABLE_TESTS: ${DYAD_ENABLE_TESTS}\n")
string(APPEND _str
" DYAD_PROFILER: ${DYAD_PROFILER}\n")
string(APPEND _str
Expand Down Expand Up @@ -513,3 +515,9 @@ install(FILES "${CMAKE_BINARY_DIR}/dyad_module.lua.install"
RENAME "${DYAD_MODULEFILE_NAME}"
DESTINATION
"${DYAD_INSTALL_SYSCONFDIR}")


if (DYAD_ENABLE_TESTS)
enable_testing()
add_subdirectory(tests)
endif ()
11 changes: 7 additions & 4 deletions pydyad/pydyad/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,12 @@ def __del__(self):
self.dyad_bindings_obj = None


class DTLMode(enum.IntEnum):
DYAD_DTL_UCX = 0
DYAD_DTL_FLUX_RPC = 1
class DTLMode(enum.Enum):
DYAD_DTL_UCX = "UCX"
DYAD_DTL_FLUX_RPC = "FLUX_RPC"

def __str__(self):
return self.value

class DTLCommMode(enum.IntEnum):
DYAD_COMM_NONE = 0
Expand Down Expand Up @@ -252,7 +255,7 @@ def init(
prod_managed_path.encode() if prod_managed_path is not None else None,
cons_managed_path.encode() if cons_managed_path is not None else None,
ctypes.c_bool(relative_to_managed_path),
dtl_mode.encode() if dtl_mode is not None else None,
str(dtl_mode).encode() if dtl_mode is not None else None,
ctypes.c_int(dtl_comm_mode),
ctypes.c_void_p(flux_handle)
)
Expand Down
37 changes: 37 additions & 0 deletions pydyad/pydyad/hdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from pydyad.bindings import Dyad

from pathlib import Path

import h5py


class DyadFile(h5py.File):

def __init__(self, fname, mode, file=None, dyad_ctx=None, metadata_wrapper=None):
# According to H5PY, the first positional argument to File.__init__ is fname
self.fname = fname
if not isinstance(self.fname, Path):
self.fname = Path(fname)
self.fname = self.fname.expanduser().resolve()
self.m = mode
if dyad_ctx is None:
raise NameError("'dyad_ctx' argument not provided to pydyad.hdf.File constructor")
self.dyad_ctx = dyad_ctx
if self.m in ("r"):
if (self.dyad_ctx.cons_path is not None and
self.dyad_ctx.cons_path in self.fname.parents):
if metadata_wrapper:
self.dyad_ctx.consume_w_metadata(str(self.fname), metadata_wrapper)
else:
dyad_ctx.consume(str(self.fname))
if file:
super().__init__(file, mode)
else:
super().__init__(fname, mode)

def close(self):
super().close()
if self.m in ("w", "r+"):
if (self.dyad_ctx.prod_path is not None and
self.dyad_ctx.prod_path in self.fname.parents):
self.dyad_ctx.produce(str(self.fname))
4 changes: 2 additions & 2 deletions pydyad/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ classifier =
python_requires = >=3.7
install_requires =
numpy
# dlio_profiler_py @ git+https://github.com/hariharan-devarajan/dlio-profiler.git
pydftracer==1.0.2
h5py
pydftracer==1.0.2
1 change: 1 addition & 0 deletions scripts/Testing/Temporary/CTestCostData.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
---
8 changes: 8 additions & 0 deletions scripts/corona.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
test_case=$1
NUM_NODES=$2
PPN=$3
source ./setup-env.sh ${test_case} $NUM_NODES $PPN
rm *.core flux.log
rm -rf logs/* profiler/*
flux alloc -q $QUEUE -t $TIME -N $NUM_NODES -o per-resource.count=${BROKERS_PER_NODE} --exclusive --broker-opts=--setattr=log-filename=./logs/flux.log ./run.sh $test_case $NUM_NODES $PPN
63 changes: 63 additions & 0 deletions scripts/dspaces/aggregate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from pathlib import Path
import re
import argparse
import pandas as pd


def process_single_run_csvs(dir_path):
dirname = dir_path.name
match_obj = re.match(r"(?P<test_name>[a-zA-Z]+)_(?P<num_nodes>[0-9]+)_(?P<ppn>[0-9]+)", dirname)
if match_obj is None:
raise RuntimeError("Cannot parse directory name")
num_nodes = int(match_obj.group("num_nodes"))
ppn = int(match_obj.group("ppn"))
csv_files = list(dir_path.glob("*.csv"))
df = pd.concat(map(pd.read_csv, csv_files), ignore_index=True)
num_ops = len(df)
df = df.drop(columns=["var_name", "version"])
df = df.groupby("rank").agg("sum")
return {
"test_name": match_obj.group("test_name"),
"num_nodes": num_nodes,
"ppn": ppn,
"num_mdata_ops": num_ops,
"data_size": df["data_size"].sum(),
"mdata_time_ns": df["mdata_time_ns"].max(),
"data_time_ns": df["data_time_ns"].max(),
}


def build_result_dataframe(testdir):
top_level_rundir_name = testdir.parent.name
test_dir_name = testdir.name
output_df_name = "{}_{}.csv".format(top_level_rundir_name, test_dir_name)
print("Building", output_df_name)
df_rows = []
for subdir in testdir.iterdir():
if subdir.is_dir():
print("Getting data for", str(subdir))
df_row = process_single_run_csvs(subdir)
df_rows.append(df_row)
output_df = pd.DataFrame(data=df_rows)
return output_df_name, output_df


def main():
parser = argparse.ArgumentParser("Aggregate data for test")
parser.add_argument("testdir", type=Path,
help="Path to the test directory to collect data for")
parser.add_argument("--dump_dir", "-d", type=Path,
help="Directory to dump the resulting CSV into")
args = parser.parse_args()
csv_name, df = build_result_dataframe(args.testdir.expanduser().resolve())
dump_dir = args.dump_dir.expanduser().resolve()
if not dump_dir.is_dir():
print("Creating non-existant dump directory {}".format(str(dump_dir)))
dump_dir.mkdir(parents=True)
full_csv_name = dump_dir / csv_name
df.to_csv(str(full_csv_name))
print("Wrote data to {}".format(str(full_csv_name)))


if __name__ == "__main__":
main()
75 changes: 75 additions & 0 deletions scripts/dspaces/collect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from pathlib import Path
import re
import argparse
import json


def validate_log(out_file):
with open(str(out_file), "r") as f:
for line in f:
if line.startswith("[DSPACES_TEST]"):
return line
return None


def validate_dir(path):
dirname = path.name
match_obj = re.match(r"(?P<test_name>[a-zA-Z]+)_(?P<num_nodes>[0-9]+)_(?P<ppn>[0-9]+)", dirname)
if match_obj is None:
raise RuntimeError("Cannot parse directory name")
test_name = match_obj.group("test_name")
num_nodes = int(match_obj.group("num_nodes"))
ppn = int(match_obj.group("ppn"))
# num_tasks = num_nodes * ppn
out_file = path / "run.out"
if not out_file.is_file():
raise RuntimeError("Could not find run.out for {}".format(path))
perf_line = validate_log(out_file)
if perf_line is None:
raise RuntimeError("Run for {} failed because we don't have perf numbers".format(path))
return {
"test_name": test_name,
"num_nodes": num_nodes,
"ppn": ppn,
"perf": perf_line,
}


def validate_rundir(td):
print("Validating tests in {}:".format(td.name))
subdirs = [sd for sd in td.iterdir() if sd.is_dir()]
perf_entries = []
for sd in subdirs:
print(" * Validating {}:".format(sd.name), end=" ")
try:
new_perf = validate_dir(sd)
perf_entries.append(new_perf)
print("GOOD")
except RuntimeError as e:
print("BAD")
raise e
return perf_entries


def main():
parser = argparse.ArgumentParser("Validate runs")
parser.add_argument("testdir", type=Path,
help="Top-level directory representing the results of a single iteration of the testing")
parser.add_argument("--dump_file", "-d", type=Path, default=None,
help="Path to JSON file where we want to dump performance results")
args = parser.parse_args()
perf_entries = validate_rundir(args.testdir.expanduser().resolve())
if args.dump_file is not None:
dump_file = args.dump_file.expanduser().resolve()
if not dump_file.name.endswith(".json"):
raise ValueError("Invalid file suffix for JSON file")
if not dump_file.parent.is_dir():
dump_file.parent.mkdir(parents=True)
with open(str(dump_file), "w") as f:
json.dump(perf_entries, f, indent=4, sort_keys=True)
else:
print(json.dumps(perf_entries, sort_keys=True, indent=4))


if __name__ == "__main__":
main()
33 changes: 33 additions & 0 deletions scripts/dspaces/corona.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash

test_case=$1
num_nodes=$2
ppn=$3
timing_root_dir=$4
use_alloc=$5

num_iters=16
num_files=16
request_size=65536
hg_conn_str="ofi+verbs"

extra_flux_flags="--setattr=system.bank=ice4hpc"

source ./setup-env.sh

timing_dir=$timing_root_dir/${test_case}_${num_nodes}_${ppn}

if [ -d $timing_dir ]; then
echo "Dump directory $timing_dir already exists"
exit 1
fi

mkdir -p $timing_dir

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )

if $use_alloc; then
flux alloc -q $QUEUE -t $TIME -N $num_nodes --exclusive $extra_flux_flags ./run.sh $test_case $num_nodes $ppn $num_iters $num_files $request_size $hg_conn_str $timing_dir $SCRIPT_DIR
else
flux batch -q $QUEUE -t $TIME -N $num_nodes --output=$timing_dir/run.out --error=$timing_dir/run.err --exclusive $extra_flux_flags ./run.sh $test_case $num_nodes $ppn $num_iters $num_files $request_size $hg_conn_str $timing_dir $SCRIPT_DIR
fi
20 changes: 20 additions & 0 deletions scripts/dspaces/dspaces_start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

echo "## Config file for DataSpaces server
ndim = 1
dims = $1
max_versions = $2
num_apps = 1" > dataspaces.conf

# Use provided number of nodes instead of auto-obtained number
# dspaces_num_nodes=$(flux resource info | grep -oP "\d+ Nodes" | grep -oP "^\d+")

flux submit -N $3 --cores=$(( $3*1 )) --tasks-per-node=$4 dspaces_server $5

# Wait for DataSpaces configuration file to be created.
# If we don't do this, the DataSpaces clients will either crash or hang
sleep 1s
while [ ! -f conf.ds ]; do
sleep 1s
done
sleep 3s
5 changes: 5 additions & 0 deletions scripts/dspaces/dspaces_stop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

flux run --ntasks=1 terminator

# rm conf.ds dataspaces.conf
Loading

0 comments on commit b889f38

Please sign in to comment.