Difference between revisions of "U3ETAS Configurations"

From SCECpedia
Jump to navigationJump to search
Line 1: Line 1:
 
Notes for configuring UCERF3 ETAS  
 
Notes for configuring UCERF3 ETAS  
 +
 +
== Setup git repo access
 +
 +
 +
== run gen_qw_config.sh ==
 +
<pre>
 +
login1.stampede3(1114)$ more gen_qw_config.sh
 +
#!/bin/bash
 +
 +
# Generate a timestamp string in YYYYMMDD_HHMMSS format
 +
TimeStamp=$(date +%m%d_%H%M%S_%N)
 +
echo "Starting RunID: ${TimeStamp}"
 +
 +
# Job Configuration Params
 +
NUM_NODE=14
 +
NUM_SIM=100000
 +
EVENTID=ci41019687
 +
 +
# While in interactive mode, run this
 +
mkdir $ETAS_SIM_DIR/${TimeStamp}
 +
cd "$ETAS_SIM_DIR/${TimeStamp}" && u3etas_comcat_event_config_builder.sh \
 +
  --output-dir "$ETAS_SIM_DIR/${TimeStamp}" \
 +
  --event-id "$EVENTID" \
 +
  --num-simulations "$NUM_SIM" \
 +
  --days-before 7 \
 +
  --etas-k-cov 1.5 \
 +
  --finite-surf-shakemap \
 +
  --finite-surf-shakemap-min-mag 5.0 \
 +
  --max-point-src-mag 6 \
 +
  --random-seed "$(date +"%Y")" \
 +
  --duration-years 1 \
 +
  --binary-output \
 +
  --hpc-site TACC_FRONTERA \
 +
  --nodes "$NUM_NODE" \
 +
  --hours 12 \
 +
  --queue normal
 +
</pre>
  
 
== Run U3E on Stampede 3 ==
 
== Run U3E on Stampede 3 ==

Revision as of 21:15, 11 July 2025

Notes for configuring UCERF3 ETAS

== Setup git repo access


run gen_qw_config.sh

login1.stampede3(1114)$ more gen_qw_config.sh 
#!/bin/bash

# Generate a timestamp string in YYYYMMDD_HHMMSS format
TimeStamp=$(date +%m%d_%H%M%S_%N)
echo "Starting RunID: ${TimeStamp}"

# Job Configuration Params
NUM_NODE=14
NUM_SIM=100000
EVENTID=ci41019687

# While in interactive mode, run this
mkdir $ETAS_SIM_DIR/${TimeStamp}
cd "$ETAS_SIM_DIR/${TimeStamp}" && u3etas_comcat_event_config_builder.sh \
  --output-dir "$ETAS_SIM_DIR/${TimeStamp}" \
  --event-id "$EVENTID" \
  --num-simulations "$NUM_SIM" \
  --days-before 7 \
  --etas-k-cov 1.5 \
  --finite-surf-shakemap \
  --finite-surf-shakemap-min-mag 5.0 \
  --max-point-src-mag 6 \
  --random-seed "$(date +"%Y")" \
  --duration-years 1 \
  --binary-output \
  --hpc-site TACC_FRONTERA \
  --nodes "$NUM_NODE" \
  --hours 12 \
  --queue normal

Run U3E on Stampede 3

  • use $WORK/ucerf3 directory
  • Get event ID from comcat
  • Update ./gen_qw_config.sh. Add the correct event id
  • request an interactive node "$idev"
  • ./gen_qw_config.sh
  • This generated a time-stamped subdirectory. Edit the slurm scripts in this directory before submitting jobs
  • Edit
c454-021[skx](1003)$ cat Stampede3_mods.md 
Change 1: stampede3 sbatch

#!/bin/bash

#SBATCH -t 02:00:00
#SBATCH --nodes 14
#SBATCH --ntasks 14
#SBATCH --cpus-per-task=48
#SBATCH -p skx-dev
#SBATCH --mem 0
#SBATCH --job-name=u3etas
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

# Report on status

NP=${SLURM_NTASKS}
echo "Running on $NP Frontera nodes: $NP"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

echo "Job ID: $SLURM_JOB_ID"
echo "Job name: $SLURM_JOB_NAME"
echo "Node list: $SLURM_NODELIST"
echo "Number of tasks: $SLURM_NTASKS"
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"

echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

Change 2: JAVA and MPJ parameters

## JAVA/MPJ PARAMETERS ##

# maxmimum memory in gigabytes. should be close to, but not over, total memory available
MEM_GIGS=144

# number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available
THREADS=18

# FMPJ_HOME directory, fine to use mine
MPJ_HOME=$WORK/mpj-express

Change 3: hostfile name from fmpjg to mpj

NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"

Change 4: export command

export MPJ_HOME=$WORK/mpj-express
export PATH=$PATH:$FMPJ_HOME/bin

Change 5: Run command between the date commands

echo "RUNNING MPJ"
t1=$(date +%s)  # epoch start time in seconds

# Run the job
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
    --min-dispatch "$MIN_DISPATCH" \
    --max-dispatch "$MAX_DISPATCH" \
    --threads "$THREADS" \
    $TEMP_OPTION \
    $SCRATCH_OPTION \
    $CLEAN_OPTION \
    --end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
    "$ETAS_CONF_JSON"
ret=$?

date

# End time
t2=$(date +%s)

# Time difference calculation (arithmetic expression directly, no need for bc)
numSec=$((t2 - t1))

# Format as HH:MM:SS
runTime=$(date -ud "@$numSec" +%T)cd

echo "Time on compute nodes: $runTime ($numSec seconds)"
exit $ret

Change 6: Update plot_.slurm

#SBATCH -t 01:00:00
#SBATCH -N 1
#SBATCH -p skx-dev
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

Common Parameter Settings

--output-dir "$ETAS_SIM_DIR/${TimeStamp}" \
 --event-id "$EVENTID" \
 --num-simulations "$NUM_SIM" \
 --days-before 7 \
 --etas-k-cov 1.5 \
 --finite-surf-shakemap \
 --finite-surf-shakemap-min-mag 5.0 \
 --max-point-src-mag 6 \
 --random-seed "$(date +"%Y")" \
 --duration-years 1 \
 --binary-output \
 --hpc-site TACC_FRONTERA \
 --nodes "$NUM_NODE" \
 --hours 12 \
 --queue normal

Stampede3 Modifications to Slurm Script


Change 1: stampede3 sbatch

#!/bin/bash

#SBATCH -t 02:00:00
#SBATCH --nodes 14
#SBATCH --ntasks 14
#SBATCH --cpus-per-task=48
#SBATCH -p skx-dev
#SBATCH --mem 0
#SBATCH --job-name=u3etas
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

# Report on status

NP=${SLURM_NTASKS}
echo "Running on $NP Frontera nodes: $NP"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

echo "Job ID: $SLURM_JOB_ID"
echo "Job name: $SLURM_JOB_NAME"
echo "Node list: $SLURM_NODELIST"
echo "Number of tasks: $SLURM_NTASKS"
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"

echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

Change 2: JAVA and MPJ parameters

## JAVA/MPJ PARAMETERS ##

# maxmimum memory in gigabytes. should be close to, but not over, total memory available
MEM_GIGS=144

# number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available
THREADS=18

# FMPJ_HOME directory, fine to use mine
MPJ_HOME=$WORK/mpj-express

Change 3: hostfile name from fmpjg to mpj

NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"

Change 4: export command

export MPJ_HOME=$WORK/mpj-express
export PATH=$PATH:$FMPJ_HOME/bin

Change 5: Run command between the date commands

echo "RUNNING MPJ"
t1=$(date +%s)  # epoch start time in seconds

# Run the job
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
    --min-dispatch "$MIN_DISPATCH" \
    --max-dispatch "$MAX_DISPATCH" \
    --threads "$THREADS" \
    $TEMP_OPTION \
    $SCRATCH_OPTION \
    $CLEAN_OPTION \
    --end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
    "$ETAS_CONF_JSON"
ret=$?

date

# End time
t2=$(date +%s)

# Time difference calculation (arithmetic expression directly, no need for bc)
numSec=$((t2 - t1))

# Format as HH:MM:SS
runTime=$(date -ud "@$numSec" +%T)cd

echo "Time on compute nodes: $runTime ($numSec seconds)"
exit $ret

Change plot slurm

Change 1:

#SBATCH -t 01:00:00
#SBATCH -N 1
#SBATCH -p skx-dev
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

Related Entries