Difference between revisions of "U3ETAS Configurations"

From SCECpedia
Jump to navigationJump to search
Line 1: Line 1:
 
Notes for configuring UCERF3 ETAS  
 
Notes for configuring UCERF3 ETAS  
 +
 +
== Run U3E on Stampede 3 ==
 +
* use $WORK/ucerf3 directory
 +
* Get event ID from comcat
 +
* Update ./gen_qw_config.sh. Add the correct event id
 +
* request an interactive node "$idev"
 +
* ./gen_qw_config.sh
 +
* This generated a time-stamped subdirectory. Edit the slurm scripts in this directory before submitting jobs
 +
* Edit
 +
<pre>
 +
c454-021[skx](1003)$ cat Stampede3_mods.md
 +
Change 1: stampede3 sbatch
 +
 +
#!/bin/bash
 +
 +
#SBATCH -t 02:00:00
 +
#SBATCH --nodes 14
 +
#SBATCH --ntasks 14
 +
#SBATCH --cpus-per-task=48
 +
#SBATCH -p skx-dev
 +
#SBATCH --mem 0
 +
#SBATCH --job-name=u3etas
 +
#SBATCH --output=%j_%x.out
 +
#SBATCH --error=%j_%x.err
 +
#SBATCH --mail-user=maechlin@usc.edu
 +
#SBATCH --mail-type=ALL
 +
#SBATCH --export=ALL
 +
#SBATCH --account=DS-Cybershake
 +
 +
# Report on status
 +
 +
NP=${SLURM_NTASKS}
 +
echo "Running on $NP Frontera nodes: $NP"
 +
echo "SLURM_NTASKS=$SLURM_NTASKS"
 +
echo "NP=$NP"
 +
 +
echo "Job ID: $SLURM_JOB_ID"
 +
echo "Job name: $SLURM_JOB_NAME"
 +
echo "Node list: $SLURM_NODELIST"
 +
echo "Number of tasks: $SLURM_NTASKS"
 +
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"
 +
 +
echo "SLURM_NTASKS=$SLURM_NTASKS"
 +
echo "NP=$NP"
 +
 +
Change 2: JAVA and MPJ parameters
 +
 +
## JAVA/MPJ PARAMETERS ##
 +
 +
# maxmimum memory in gigabytes. should be close to, but not over, total memory available
 +
MEM_GIGS=144
 +
 +
# number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available
 +
THREADS=18
 +
 +
# FMPJ_HOME directory, fine to use mine
 +
MPJ_HOME=$WORK/mpj-express
 +
 +
Change 3: hostfile name from fmpjg to mpj
 +
 +
NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"
 +
 +
Change 4: export command
 +
 +
export MPJ_HOME=$WORK/mpj-express
 +
export PATH=$PATH:$FMPJ_HOME/bin
 +
 +
Change 5: Run command between the date commands
 +
 +
echo "RUNNING MPJ"
 +
t1=$(date +%s)  # epoch start time in seconds
 +
 +
# Run the job
 +
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
 +
    --min-dispatch "$MIN_DISPATCH" \
 +
    --max-dispatch "$MAX_DISPATCH" \
 +
    --threads "$THREADS" \
 +
    $TEMP_OPTION \
 +
    $SCRATCH_OPTION \
 +
    $CLEAN_OPTION \
 +
    --end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
 +
    "$ETAS_CONF_JSON"
 +
ret=$?
 +
 +
date
 +
 +
# End time
 +
t2=$(date +%s)
 +
 +
# Time difference calculation (arithmetic expression directly, no need for bc)
 +
numSec=$((t2 - t1))
 +
 +
# Format as HH:MM:SS
 +
runTime=$(date -ud "@$numSec" +%T)cd
 +
 +
echo "Time on compute nodes: $runTime ($numSec seconds)"
 +
exit $ret
 +
 +
Change 6: Update plot_.slurm
 +
 +
#SBATCH -t 01:00:00
 +
#SBATCH -N 1
 +
#SBATCH -p skx-dev
 +
#SBATCH --output=%j_%x.out
 +
#SBATCH --error=%j_%x.err
 +
#SBATCH --mail-user=maechlin@usc.edu
 +
#SBATCH --mail-type=ALL
 +
#SBATCH --export=ALL
 +
#SBATCH --account=DS-Cybershake
 +
</pre>
  
 
== Common Parameter Settings ==
 
== Common Parameter Settings ==

Revision as of 21:13, 11 July 2025

Notes for configuring UCERF3 ETAS

Run U3E on Stampede 3

  • use $WORK/ucerf3 directory
  • Get event ID from comcat
  • Update ./gen_qw_config.sh. Add the correct event id
  • request an interactive node "$idev"
  • ./gen_qw_config.sh
  • This generated a time-stamped subdirectory. Edit the slurm scripts in this directory before submitting jobs
  • Edit
c454-021[skx](1003)$ cat Stampede3_mods.md 
Change 1: stampede3 sbatch

#!/bin/bash

#SBATCH -t 02:00:00
#SBATCH --nodes 14
#SBATCH --ntasks 14
#SBATCH --cpus-per-task=48
#SBATCH -p skx-dev
#SBATCH --mem 0
#SBATCH --job-name=u3etas
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

# Report on status

NP=${SLURM_NTASKS}
echo "Running on $NP Frontera nodes: $NP"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

echo "Job ID: $SLURM_JOB_ID"
echo "Job name: $SLURM_JOB_NAME"
echo "Node list: $SLURM_NODELIST"
echo "Number of tasks: $SLURM_NTASKS"
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"

echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

Change 2: JAVA and MPJ parameters

## JAVA/MPJ PARAMETERS ##

# maxmimum memory in gigabytes. should be close to, but not over, total memory available
MEM_GIGS=144

# number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available
THREADS=18

# FMPJ_HOME directory, fine to use mine
MPJ_HOME=$WORK/mpj-express

Change 3: hostfile name from fmpjg to mpj

NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"

Change 4: export command

export MPJ_HOME=$WORK/mpj-express
export PATH=$PATH:$FMPJ_HOME/bin

Change 5: Run command between the date commands

echo "RUNNING MPJ"
t1=$(date +%s)  # epoch start time in seconds

# Run the job
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
    --min-dispatch "$MIN_DISPATCH" \
    --max-dispatch "$MAX_DISPATCH" \
    --threads "$THREADS" \
    $TEMP_OPTION \
    $SCRATCH_OPTION \
    $CLEAN_OPTION \
    --end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
    "$ETAS_CONF_JSON"
ret=$?

date

# End time
t2=$(date +%s)

# Time difference calculation (arithmetic expression directly, no need for bc)
numSec=$((t2 - t1))

# Format as HH:MM:SS
runTime=$(date -ud "@$numSec" +%T)cd

echo "Time on compute nodes: $runTime ($numSec seconds)"
exit $ret

Change 6: Update plot_.slurm

#SBATCH -t 01:00:00
#SBATCH -N 1
#SBATCH -p skx-dev
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

Common Parameter Settings

--output-dir "$ETAS_SIM_DIR/${TimeStamp}" \
 --event-id "$EVENTID" \
 --num-simulations "$NUM_SIM" \
 --days-before 7 \
 --etas-k-cov 1.5 \
 --finite-surf-shakemap \
 --finite-surf-shakemap-min-mag 5.0 \
 --max-point-src-mag 6 \
 --random-seed "$(date +"%Y")" \
 --duration-years 1 \
 --binary-output \
 --hpc-site TACC_FRONTERA \
 --nodes "$NUM_NODE" \
 --hours 12 \
 --queue normal

Stampede3 Modifications to Slurm Script


Change 1: stampede3 sbatch

#!/bin/bash

#SBATCH -t 02:00:00
#SBATCH --nodes 14
#SBATCH --ntasks 14
#SBATCH --cpus-per-task=48
#SBATCH -p skx-dev
#SBATCH --mem 0
#SBATCH --job-name=u3etas
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

# Report on status

NP=${SLURM_NTASKS}
echo "Running on $NP Frontera nodes: $NP"
echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

echo "Job ID: $SLURM_JOB_ID"
echo "Job name: $SLURM_JOB_NAME"
echo "Node list: $SLURM_NODELIST"
echo "Number of tasks: $SLURM_NTASKS"
echo "Number of CPUs per task: $SLURM_CPUS_PER_TASK"

echo "SLURM_NTASKS=$SLURM_NTASKS"
echo "NP=$NP"

Change 2: JAVA and MPJ parameters

## JAVA/MPJ PARAMETERS ##

# maxmimum memory in gigabytes. should be close to, but not over, total memory available
MEM_GIGS=144

# number of etas threads. should be approximately MEM_GIGS/5, and no more than the total number of threads available
THREADS=18

# FMPJ_HOME directory, fine to use mine
MPJ_HOME=$WORK/mpj-express

Change 3: hostfile name from fmpjg to mpj

NEW_NODEFILE="/tmp/${USER}-hostfile-mpj-${PBS_JOBID}"

Change 4: export command

export MPJ_HOME=$WORK/mpj-express
export PATH=$PATH:$FMPJ_HOME/bin

Change 5: Run command between the date commands

echo "RUNNING MPJ"
t1=$(date +%s)  # epoch start time in seconds

# Run the job
mpjrun_errdetect_wrapper.sh "$PBS_NODEFILE" -dev hybdev -Djava.library.path="$FMPJ_HOME/lib" -Xmx"${MEM_GIGS}G" -cp "$JAR_FILE" scratch.UCERF3.erf.ETAS.launcher.MPJ_ETAS_Launcher \
    --min-dispatch "$MIN_DISPATCH" \
    --max-dispatch "$MAX_DISPATCH" \
    --threads "$THREADS" \
    $TEMP_OPTION \
    $SCRATCH_OPTION \
    $CLEAN_OPTION \
    --end-time "$(scontrol show job "$SLURM_JOB_ID" | grep -o 'EndTime=[^ ]*' | cut -d= -f2)" \
    "$ETAS_CONF_JSON"
ret=$?

date

# End time
t2=$(date +%s)

# Time difference calculation (arithmetic expression directly, no need for bc)
numSec=$((t2 - t1))

# Format as HH:MM:SS
runTime=$(date -ud "@$numSec" +%T)cd

echo "Time on compute nodes: $runTime ($numSec seconds)"
exit $ret

Change plot slurm

Change 1:

#SBATCH -t 01:00:00
#SBATCH -N 1
#SBATCH -p skx-dev
#SBATCH --output=%j_%x.out
#SBATCH --error=%j_%x.err
#SBATCH --mail-user=maechlin@usc.edu
#SBATCH --mail-type=ALL
#SBATCH --export=ALL
#SBATCH --account=DS-Cybershake

Related Entries