Difference between revisions of "U3ETAS Configurations"
From SCECpedia
Jump to navigationJump to search| Line 1: | Line 1: | ||
Notes for configuring UCERF3 ETAS | Notes for configuring UCERF3 ETAS | ||
| + | |||
| + | == Run U3E on Stampede 3 == | ||
| + | * use $WORK/ucerf3 directory | ||
| + | * Get event ID from comcat | ||
| + | * Update ./gen_qw_config.sh. Add the correct event id | ||
| + | * request an interactive node "$idev" | ||
| + | * ./gen_qw_config.sh | ||
| + | * This generated a time-stamped subdirectory. Edit the slurm scripts in this directory before submitting jobs | ||
| + | * Edit | ||
| + | <pre> | ||
| + | c454-021[skx](1003)$ cat Stampede3_mods.md | ||
| + | Change 1: stampede3 sbatch | ||
| + | |||
| + | #!/bin/bash | ||
| + | |||
| + | #SBATCH -t 02:00:00 | ||
| + | #SBATCH --nodes 14 | ||
| + | #SBATCH --ntasks 14 | ||
| + | #SBATCH --cpus-per-task=48 | ||
| + | #SBATCH -p skx-dev | ||
| + | #SBATCH --mem 0 | ||
| + | #SBATCH --job-name=u3etas | ||
| + | #SBATCH --output=%j_%x.out | ||
| + | #SBATCH --error=%j_%x.err | ||
| + | #SBATCH --mail-user=maechlin@usc.edu | ||
| + | #SBATCH --mail-type=ALL | ||
| + | #SBATCH --export=ALL | ||
| + | #SBATCH --account=DS-Cybershake | ||
| + | |||
| + | # Report on status | ||
| + | |||
| + | NP=${SLURM_NTASKS} | ||
| + | echo "Running on $NP Frontera nodes: $NP" | ||
| + | echo "SLURM_NTASKS=$SLURM_NTASKS" | ||
| + | echo "NP=$NP" | ||
| + | |||
| + | echo "Job ID: $SLURM_JOB_ID" | ||
| + | echo "Job name: $SLURM_JOB_NAME" | ||
| + | echo "Node list: $SLURM_NODELIST" | ||
| + | echo "Number of tasks: $SLURM_NTASKS" | ||
| + | echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK" | ||
| + | |||
| + | echo "SLURM_NTASKS=$SLURM_NTASKS" | ||
| + | echo "NP=$NP" | ||
| + | |||
| + | Change 2: JAVA and MPJ parameters | ||
| + | |||
| + | ## JAVA/MPJ PARAMETERS ## | ||
| + | |||
| + | # maxmimum memory in gigabytes. should be close to, but not over, total memory available | ||
| + | MEM_GIGS=144 | ||
| + | |||
| + | # number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available | ||
| + | THREADS=18 | ||
| + | |||
| + | # FMPJ_HOME directory, fine to use mine | ||
| + | MPJ_HOME=$WORK/mpj-express | ||
| + | |||
| + | Change 3: hostfile name from fmpjg to mpj | ||
| + | |||
| + | NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}" | ||
| + | |||
| + | Change 4: export command | ||
| + | |||
| + | export MPJ_HOME=$WORK/mpj-express | ||
| + | export PATH=$PATH:$FMPJ_HOME/bin | ||
| + | |||
| + | Change 5: Run command between the date commands | ||
| + | |||
| + | echo "RUNNING MPJ" | ||
| + | t1=$(date +%s) # epoch start time in seconds | ||
| + | |||
| + | # Run the job | ||
| + | mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \ | ||
| + | --min-dispatch "$MIN_DISPATCH" \ | ||
| + | --max-dispatch "$MAX_DISPATCH" \ | ||
| + | --threads "$THREADS" \ | ||
| + | $TEMP_OPTION \ | ||
| + | $SCRATCH_OPTION \ | ||
| + | $CLEAN_OPTION \ | ||
| + | --end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \ | ||
| + | "$ETAS_CONF_JSON" | ||
| + | ret=$? | ||
| + | |||
| + | date | ||
| + | |||
| + | # End time | ||
| + | t2=$(date +%s) | ||
| + | |||
| + | # Time difference calculation (arithmetic expression directly, no need for bc) | ||
| + | numSec=$((t2 - t1)) | ||
| + | |||
| + | # Format as HH:MM:SS | ||
| + | runTime=$(date -ud "@$numSec" +%T)cd | ||
| + | |||
| + | echo "Time on compute nodes: $runTime ($numSec seconds)" | ||
| + | exit $ret | ||
| + | |||
| + | Change 6: Update plot_.slurm | ||
| + | |||
| + | #SBATCH -t 01:00:00 | ||
| + | #SBATCH -N 1 | ||
| + | #SBATCH -p skx-dev | ||
| + | #SBATCH --output=%j_%x.out | ||
| + | #SBATCH --error=%j_%x.err | ||
| + | #SBATCH --mail-user=maechlin@usc.edu | ||
| + | #SBATCH --mail-type=ALL | ||
| + | #SBATCH --export=ALL | ||
| + | #SBATCH --account=DS-Cybershake | ||
| + | </pre> | ||
== Common Parameter Settings == | == Common Parameter Settings == | ||
Revision as of 21:13, 11 July 2025
Notes for configuring UCERF3 ETAS
Contents
Run U3E on Stampede 3
- use $WORK/ucerf3 directory
- Get event ID from comcat
- Update ./gen_qw_config.sh. Add the correct event id
- request an interactive node "$idev"
- ./gen_qw_config.sh
- This generated a time-stamped subdirectory. Edit the slurm scripts in this directory before submitting jobs
- Edit
c454-021[skx](1003)$ cat Stampede3_mods.md
Change 1: stampede3 sbatch
#!/bin/bash
#SBATCH -t 02:00:00
#SBATCH --nodes 14
#SBATCH --ntasks 14
#SBATCH --cpus-per-task=48
#SBATCH -p skx-dev
#SBATCH --mem 0
#SBATCH --job-name=u3etas
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake
# Report on status
NP=${SLURM_NTASKS}
echo "Running on $NP Frontera nodes: $NP"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"
echo "Job ID: $SLURM_JOB_ID"
echo "Job name: $SLURM_JOB_NAME"
echo "Node list: $SLURM_NODELIST"
echo "Number of tasks: $SLURM_NTASKS"
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"
Change 2: JAVA and MPJ parameters
## JAVA/MPJ PARAMETERS ##
# maxmimum memory in gigabytes. should be close to, but not over, total memory available
MEM_GIGS=144
# number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available
THREADS=18
# FMPJ_HOME directory, fine to use mine
MPJ_HOME=$WORK/mpj-express
Change 3: hostfile name from fmpjg to mpj
NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"
Change 4: export command
export MPJ_HOME=$WORK/mpj-express
export PATH=$PATH:$FMPJ_HOME/bin
Change 5: Run command between the date commands
echo "RUNNING MPJ"
t1=$(date +%s) # epoch start time in seconds
# Run the job
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
--min-dispatch "$MIN_DISPATCH" \
--max-dispatch "$MAX_DISPATCH" \
--threads "$THREADS" \
$TEMP_OPTION \
$SCRATCH_OPTION \
$CLEAN_OPTION \
--end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
"$ETAS_CONF_JSON"
ret=$?
date
# End time
t2=$(date +%s)
# Time difference calculation (arithmetic expression directly, no need for bc)
numSec=$((t2 - t1))
# Format as HH:MM:SS
runTime=$(date -ud "@$numSec" +%T)cd
echo "Time on compute nodes: $runTime ($numSec seconds)"
exit $ret
Change 6: Update plot_.slurm
#SBATCH -t 01:00:00
#SBATCH -N 1
#SBATCH -p skx-dev
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake
Common Parameter Settings
--output-dir "$ETAS_SIM_DIR/${TimeStamp}" \
--event-id "$EVENTID" \
--num-simulations "$NUM_SIM" \
--days-before 7 \
--etas-k-cov 1.5 \
--finite-surf-shakemap \
--finite-surf-shakemap-min-mag 5.0 \
--max-point-src-mag 6 \
--random-seed "$(date +"%Y")" \
--duration-years 1 \
--binary-output \
--hpc-site TACC_FRONTERA \
--nodes "$NUM_NODE" \
--hours 12 \
--queue normal
Stampede3 Modifications to Slurm Script
Change 1: stampede3 sbatch
#!/bin/bash
#SBATCH -t 02:00:00
#SBATCH --nodes 14
#SBATCH --ntasks 14
#SBATCH --cpus-per-task=48
#SBATCH -p skx-dev
#SBATCH --mem 0
#SBATCH --job-name=u3etas
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake
# Report on status
NP=${SLURM_NTASKS}
echo "Running on $NP Frontera nodes: $NP"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"
echo "Job ID: $SLURM_JOB_ID"
echo "Job name: $SLURM_JOB_NAME"
echo "Node list: $SLURM_NODELIST"
echo "Number of tasks: $SLURM_NTASKS"
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"
Change 2: JAVA and MPJ parameters
## JAVA/MPJ PARAMETERS ##
# maxmimum memory in gigabytes. should be close to, but not over, total memory available
MEM_GIGS=144
# number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available
THREADS=18
# FMPJ_HOME directory, fine to use mine
MPJ_HOME=$WORK/mpj-express
Change 3: hostfile name from fmpjg to mpj
NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"
Change 4: export command
export MPJ_HOME=$WORK/mpj-express
export PATH=$PATH:$FMPJ_HOME/bin
Change 5: Run command between the date commands
echo "RUNNING MPJ"
t1=$(date +%s) # epoch start time in seconds
# Run the job
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
--min-dispatch "$MIN_DISPATCH" \
--max-dispatch "$MAX_DISPATCH" \
--threads "$THREADS" \
$TEMP_OPTION \
$SCRATCH_OPTION \
$CLEAN_OPTION \
--end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
"$ETAS_CONF_JSON"
ret=$?
date
# End time
t2=$(date +%s)
# Time difference calculation (arithmetic expression directly, no need for bc)
numSec=$((t2 - t1))
# Format as HH:MM:SS
runTime=$(date -ud "@$numSec" +%T)cd
echo "Time on compute nodes: $runTime ($numSec seconds)"
exit $ret
Change plot slurm
Change 1: #SBATCH -t 01:00:00 #SBATCH -N 1 #SBATCH -p skx-dev #SBATCH --output=%j_%x.out #SBATCH --error=%j_%x.err #SBATCH --mail-user=maechlin@usc.edu #SBATCH --mail-type=ALL #SBATCH --export=ALL #SBATCH --account=DS-Cybershake