Difference between revisions of "U3ETAS Configurations"
| (4 intermediate revisions by the same user not shown) | |||
| Line 5: | Line 5: | ||
Make sure you can clone the SCECcode event-report repo, and commit results back that repo. Detailed instructions here: | Make sure you can clone the SCECcode event-report repo, and commit results back that repo. Detailed instructions here: | ||
*[Publishing_UCERF3-ETAS_Event_Reports] | *[Publishing_UCERF3-ETAS_Event_Reports] | ||
| + | |||
| + | == Source ./bashrc to setup environment parameters including ETAS_MEM == | ||
| + | |||
| + | <pre> | ||
| + | # Configure UCERF3 ENV | ||
| + | export JAVA_HOME=$WORK/jdk-22.0.2 | ||
| + | export ETAS_LAUNCHER=$WORK/ucerf3/ucerf3-etas-launcher | ||
| + | export ETAS_SIM_DIR=$WORK/ucerf3/u3etas_sims | ||
| + | export ETAS_MEM_GB=25 #this may take precedance over member values in batch scripts for parallel jobs. | ||
| + | export MPJ_HOME=$WORK/mpj-express | ||
| + | export PATH=$ETAS_LAUNCHER/parallel/slurm_sbin/:$ETAS_LAUNCHER/sbin/:$MPJ_HOME/bin:$JAVA_HOME/bin::$PATH | ||
| + | export ETAS_JAR_DISABLE_UPDATE=1 | ||
| + | </pre> | ||
== Run U3E on Stampede 3 == | == Run U3E on Stampede 3 == | ||
| Line 51: | Line 64: | ||
* Edit the two slurm files produced by this script, using the following changes. | * Edit the two slurm files produced by this script, using the following changes. | ||
| + | Change 1: stampede3 sbatch | ||
<pre> | <pre> | ||
| − | |||
| − | |||
| − | |||
#!/bin/bash | #!/bin/bash | ||
| Line 86: | Line 97: | ||
echo "SLURM_NTASKS=$SLURM_NTASKS" | echo "SLURM_NTASKS=$SLURM_NTASKS" | ||
echo "NP=$NP" | echo "NP=$NP" | ||
| + | </pre> | ||
Change 2: JAVA and MPJ parameters | Change 2: JAVA and MPJ parameters | ||
| + | <pre> | ||
## JAVA/MPJ PARAMETERS ## | ## JAVA/MPJ PARAMETERS ## | ||
| Line 97: | Line 110: | ||
THREADS=18 | THREADS=18 | ||
| − | # | + | # MPJ_HOME directory |
MPJ_HOME=$WORK/mpj-express | MPJ_HOME=$WORK/mpj-express | ||
| + | |||
| + | </pre> | ||
Change 3: hostfile name from fmpjg to mpj | Change 3: hostfile name from fmpjg to mpj | ||
| + | |||
| + | <pre> | ||
NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}" | NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}" | ||
| + | |||
| + | </pre> | ||
Change 4: export command | Change 4: export command | ||
| + | <pre> | ||
export MPJ_HOME=$WORK/mpj-express | export MPJ_HOME=$WORK/mpj-express | ||
export PATH=$PATH:$FMPJ_HOME/bin | export PATH=$PATH:$FMPJ_HOME/bin | ||
| + | </pre> | ||
Change 5: Run command between the date commands | Change 5: Run command between the date commands | ||
| + | <pre> | ||
echo "RUNNING MPJ" | echo "RUNNING MPJ" | ||
t1=$(date +%s) # epoch start time in seconds | t1=$(date +%s) # epoch start time in seconds | ||
# Run the job | # Run the job | ||
| − | mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$ | + | mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$MPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \ |
--min-dispatch "$MIN_DISPATCH" \ | --min-dispatch "$MIN_DISPATCH" \ | ||
--max-dispatch "$MAX_DISPATCH" \ | --max-dispatch "$MAX_DISPATCH" \ | ||
| Line 139: | Line 161: | ||
echo "Time on compute nodes: $runTime ($numSec seconds)" | echo "Time on compute nodes: $runTime ($numSec seconds)" | ||
exit $ret | exit $ret | ||
| + | |||
| + | </pre> | ||
Change 6: Update plot_.slurm | Change 6: Update plot_.slurm | ||
| + | <pre> | ||
#SBATCH -t 01:00:00 | #SBATCH -t 01:00:00 | ||
#SBATCH -N 1 | #SBATCH -N 1 | ||
| Line 153: | Line 178: | ||
</pre> | </pre> | ||
| − | == | + | == Run Comcat_report.sh == |
| + | The two scripts above should have created a output directory with the event specific U3ETAS results. File name format is MonthDay_HourMinSec_Msecs (Example: 1126_171427_528129713) | ||
| − | + | There is a script in the same directory as the gen_qw_config.s called: | |
| − | + | * comcat_report.sh | |
| − | + | ||
| − | + | You want to edit this file to update the event_id, and the appropriate SimID as shown above - like 1126_171427_528129713 | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
<pre> | <pre> | ||
| + | login1.stampede3(1005)$ cat comcat_report.sh | ||
| + | u3etas_jar_wrapper.sh org.opensha.commons.data.comcat.plot.ComcatReportPageGen --event-id nc75269596 -min-mag 0d --radius 50 --output-parent-dir /work/00329/tg456034/stampede3/ucerf3/event-reports --etas-dir $ETAS_SIM_DIR/1126_171427_528129713 --etas-output-dir /work/00329/tg456034/stampede3/ucerf3/event-reports/ucerf3-etas | ||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
</pre> | </pre> | ||
| − | == | + | == Push results to github == |
| − | + | When this comcat_report.sh runs completely, it will write results to an event-reports directory like: /work/00329/tg456034/stampede3/ucerf3/event-reports | |
| − | |||
| − | |||
| − | + | This is a git report with main source on SCECcode. Once we generate new results for an event, we commit the updates to this directory, and then push the new repo to github. From there, SCEC's event pages will pick up any new UCERF3 forecast results and post them online. | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
== Complete comcat reports == | == Complete comcat reports == | ||
Now, complete the posting using the Comcat Report Generator: [[Publishing_UCERF3-ETAS_Event_Reports]] | Now, complete the posting using the Comcat Report Generator: [[Publishing_UCERF3-ETAS_Event_Reports]] | ||
Latest revision as of 03:22, 27 November 2025
Notes for running UCERF3 ETAS on Stampede3 and commiting results to SCEC event-report repo. This is part of the process of posting them to the SCEC Event pages.
Contents
Setup git repo access
Make sure you can clone the SCECcode event-report repo, and commit results back that repo. Detailed instructions here:
- [Publishing_UCERF3-ETAS_Event_Reports]
Source ./bashrc to setup environment parameters including ETAS_MEM
# Configure UCERF3 ENV export JAVA_HOME=$WORK/jdk-22.0.2 export ETAS_LAUNCHER=$WORK/ucerf3/ucerf3-etas-launcher export ETAS_SIM_DIR=$WORK/ucerf3/u3etas_sims export ETAS_MEM_GB=25 #this may take precedance over member values in batch scripts for parallel jobs. export MPJ_HOME=$WORK/mpj-express export PATH=$ETAS_LAUNCHER/parallel/slurm_sbin/:$ETAS_LAUNCHER/sbin/:$MPJ_HOME/bin:$JAVA_HOME/bin::$PATH export ETAS_JAR_DISABLE_UPDATE=1
Run U3E on Stampede 3
- use $WORK/ucerf3 directory
- Get event ID from comcat
- Update ./gen_qw_config.sh. Add the correct event id
- request an interactive node "$idev"
- ./gen_qw_config.sh
run gen_qw_config.sh
login1.stampede3(1114)$ more gen_qw_config.sh
#!/bin/bash
# Generate a timestamp string in YYYYMMDD_HHMMSS format
TimeStamp=$(date +%m%d_%H%M%S_%N)
echo "Starting RunID: ${TimeStamp}"
# Job Configuration Params
NUM_NODE=14
NUM_SIM=100000
EVENTID=ci41019687
# While in interactive mode, run this
mkdir $ETAS_SIM_DIR/${TimeStamp}
cd "$ETAS_SIM_DIR/${TimeStamp}" && u3etas_comcat_event_config_builder.sh \
--output-dir "$ETAS_SIM_DIR/${TimeStamp}" \
--event-id "$EVENTID" \
--num-simulations "$NUM_SIM" \
--days-before 7 \
--etas-k-cov 1.5 \
--finite-surf-shakemap \
--finite-surf-shakemap-min-mag 5.0 \
--max-point-src-mag 6 \
--random-seed "$(date +"%Y")" \
--duration-years 1 \
--binary-output \
--hpc-site TACC_FRONTERA \
--nodes "$NUM_NODE" \
--hours 12 \
--queue normal
Review output of gen_qw_config.sh
- This generated a time-stamped subdirectory. Edit the slurm scripts in this directory before submitting jobs
- Edit the two slurm files produced by this script, using the following changes.
Change 1: stampede3 sbatch
#!/bin/bash
#SBATCH -t 02:00:00
#SBATCH --nodes 14
#SBATCH --ntasks 14
#SBATCH --cpus-per-task=48
#SBATCH -p skx-dev
#SBATCH --mem 0
#SBATCH --job-name=u3etas
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake
# Report on status
NP=${SLURM_NTASKS}
echo "Running on $NP Frontera nodes: $NP"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"
echo "Job ID: $SLURM_JOB_ID"
echo "Job name: $SLURM_JOB_NAME"
echo "Node list: $SLURM_NODELIST"
echo "Number of tasks: $SLURM_NTASKS"
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"
Change 2: JAVA and MPJ parameters
## JAVA/MPJ PARAMETERS ## # maxmimum memory in gigabytes. should be close to, but not over, total memory available MEM_GIGS=144 # number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available THREADS=18 # MPJ_HOME directory MPJ_HOME=$WORK/mpj-express
Change 3: hostfile name from fmpjg to mpj
NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"
Change 4: export command
export MPJ_HOME=$WORK/mpj-express export PATH=$PATH:$FMPJ_HOME/bin
Change 5: Run command between the date commands
echo "RUNNING MPJ"
t1=$(date +%s) # epoch start time in seconds
# Run the job
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$MPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
--min-dispatch "$MIN_DISPATCH" \
--max-dispatch "$MAX_DISPATCH" \
--threads "$THREADS" \
$TEMP_OPTION \
$SCRATCH_OPTION \
$CLEAN_OPTION \
--end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
"$ETAS_CONF_JSON"
ret=$?
date
# End time
t2=$(date +%s)
# Time difference calculation (arithmetic expression directly, no need for bc)
numSec=$((t2 - t1))
# Format as HH:MM:SS
runTime=$(date -ud "@$numSec" +%T)cd
echo "Time on compute nodes: $runTime ($numSec seconds)"
exit $ret
Change 6: Update plot_.slurm
#SBATCH -t 01:00:00 #SBATCH -N 1 #SBATCH -p skx-dev #SBATCH --output=%j_%x.out #SBATCH --error=%j_%x.err #SBATCH --mail-user=maechlin@usc.edu #SBATCH --mail-type=ALL #SBATCH --export=ALL #SBATCH --account=DS-Cybershake
Run Comcat_report.sh
The two scripts above should have created a output directory with the event specific U3ETAS results. File name format is MonthDay_HourMinSec_Msecs (Example: 1126_171427_528129713)
There is a script in the same directory as the gen_qw_config.s called:
- comcat_report.sh
You want to edit this file to update the event_id, and the appropriate SimID as shown above - like 1126_171427_528129713
login1.stampede3(1005)$ cat comcat_report.sh u3etas_jar_wrapper.sh org.opensha.commons.data.comcat.plot.ComcatReportPageGen --event-id nc75269596 -min-mag 0d --radius 50 --output-parent-dir /work/00329/tg456034/stampede3/ucerf3/event-reports --etas-dir $ETAS_SIM_DIR/1126_171427_528129713 --etas-output-dir /work/00329/tg456034/stampede3/ucerf3/event-reports/ucerf3-etas
Push results to github
When this comcat_report.sh runs completely, it will write results to an event-reports directory like: /work/00329/tg456034/stampede3/ucerf3/event-reports
This is a git report with main source on SCECcode. Once we generate new results for an event, we commit the updates to this directory, and then push the new repo to github. From there, SCEC's event pages will pick up any new UCERF3 forecast results and post them online.
Complete comcat reports
Now, complete the posting using the Comcat Report Generator: Publishing_UCERF3-ETAS_Event_Reports