From 33c8f080cee6784b5da5564ee53badf0236afc66 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Tue, 27 Jan 2026 14:31:10 -0800 Subject: [PATCH 01/18] chore: update to Julienne 3.6.0 --- demo/fpm.toml | 2 +- fpm.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/fpm.toml b/demo/fpm.toml index 7095b0469..f0a87445d 100644 --- a/demo/fpm.toml +++ b/demo/fpm.toml @@ -1,6 +1,6 @@ name = "Fiats-Demonstration-Applications" [dependencies] -julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.2.1"} +julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.6.0"} fiats = {path = "../"} netcdf-interfaces = {git = "https://github.com/berkeleylab/netcdf-interfaces.git", rev = "d2bbb71ac52b4e346b62572b1ca1620134481096"} diff --git a/fpm.toml b/fpm.toml index e70424e5b..4e01eb279 100644 --- a/fpm.toml +++ b/fpm.toml @@ -1,3 +1,3 @@ name = "fiats" [dependencies] -julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.2.1"} +julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.6.0"} From 9a234a2bcecd447468503eada46428983bb21286 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Sun, 1 Feb 2026 19:25:54 -0800 Subject: [PATCH 02/18] chore(.gitignore): add file types This commit adds several file types to ignore: * archives & compressed files: *.gz, *.tar, etc. * gnuplot & graphics: *.plt, *.png * macos: .DS_Store --- .gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index f5d6d0983..03319a058 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,11 @@ # Graphics *.plt *.png + +# archives and compressed files +*.gz +*.bz2 +*.tar +*.tbz2 +*.tgz +*.zip From 26293e299b4a1d7280dd0de7179de37fce2ecedb Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 11:07:40 -0800 Subject: [PATCH 03/18] WIP(train-cloud-micro): start file reading update --- demo/app/train-cloud-microphysics.F90 | 95 ++++++++++++++++++--------- 1 file changed, 64 insertions(+), 31 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index f22f4b343..1cbd4f8ec 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -158,9 +158,10 @@ subroutine read_train_write(training_configuration, training_data_files, args, p type(training_data_files_t), intent(in) :: training_data_files type(command_line_arguments_t), intent(in) :: args type(plot_file_t), intent(in), optional :: plot_file - type(NetCDF_variable_t), allocatable :: input_variable(:), output_variable(:) type(time_derivative_t), allocatable :: derivative(:) + type(NetCDF_variable_t), allocatable, dimension(:,:) :: input_variable, output_variable type(NetCDF_variable_t) input_time, output_time + type(NetCDF_file_t) , allocatable, dimension(:) :: NetCDF_input_file, NetCDF_output_file ! local variables: type(trainable_network_t) trainable_network @@ -169,14 +170,46 @@ subroutine read_train_write(training_configuration, training_data_files, args, p type(input_output_pair_t), allocatable :: input_output_pairs(:) type(tensor_t), allocatable, dimension(:) :: input_tensors, output_tensors real, allocatable :: cost(:) - integer i, network_unit, io_status, epoch, end_step, t, b, t_end, v + integer f, v, network_unit, io_status, epoch, end_step, t, b, t_end integer(int64) start_training, finish_training logical stop_requested input_names: & associate(input_names => training_configuration%input_variable_names()) - allocate(input_variable(size(input_names))) + associate( & + input_tensor_file_names => training_data_files%fully_qualified_inputs_files() & + ,output_tensor_file_names => training_data_files%fully_qualified_outputs_files() & + ,time_data_file_name => training_data_files%fully_qualified_time_file() & + ,input_component_names => training_configuration%input_variable_names() & + ,output_component_names => training_configuration%output_variable_names() & + ) + allocate(NetCDF_input_file(size(input_tensor_file_names))) + allocate(input_variable(size(input_component_names), size(NetCDF_input_file))) + + input_variable_files: & + associate(num_input_files => size(NetCDF_input_file), num_variables => size(input_variable,1)) + + read_input_files: & + do f = 1, num_input_files + + print '(a)',"Reading physics-based model inputs from " // input_tensor_file_names(f)%string() + NetCDF_input_file(f) = netCDF_file_t(input_tensor_file_names(f)) + + read_variables: & + do v = 1, num_variables + print '(a)',"- reading " // input_component_names(v)%string() // " from " // input_tensor_file_names(f)%string() + call input_variable(v,f)%input(input_component_names(v), NetCDF_input_file(f), rank=4) + call_julienne_assert(input_variable(v,f)%conformable_with(input_variable(1,f))) + end do read_variables + + end do read_input_files + + end associate input_variable_files + + end associate + + stop "-----> WIP <-------" input_file_name: & associate(input_file_name => args%base_name // "_input.nc") @@ -188,11 +221,11 @@ subroutine read_train_write(training_configuration, training_data_files, args, p do v=1, size(input_variable) print *,"- reading ", input_names(v)%string() - call input_variable(v)%input(input_names(v), input_file, rank=4) + !call input_variable(v)%input(input_names(v), input_file, rank=4) end do do v = 2, size(input_variable) - call_julienne_assert(input_variable(v)%conformable_with(input_variable(1))) + !call_julienne_assert(input_variable(v)%conformable_with(input_variable(1))) end do print *,"- reading time" @@ -205,7 +238,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p output_names: & associate(output_names => training_configuration%output_variable_names()) - allocate(output_variable(size(output_names))) + !allocate(output_variable(size(output_names))) output_file_name: & associate(output_file_name => args%base_name // "_output.nc") @@ -217,11 +250,11 @@ subroutine read_train_write(training_configuration, training_data_files, args, p do v=1, size(output_variable) print *,"- reading ", output_names(v)%string() - call output_variable(v)%input(output_names(v), output_file, rank=4) + !call output_variable(v)%input(output_names(v), output_file, rank=4) end do do v = 1, size(output_variable) - call_julienne_assert(output_variable(v)%conformable_with(input_variable(1))) + !call_julienne_assert(output_variable(v)%conformable_with(input_variable(1))) end do print *,"- reading time" @@ -242,7 +275,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p derivative_name: & associate(derivative_name => "d" // output_names(v)%string() // "/dt") print *,"- " // derivative_name - derivative(v) = time_derivative_t(old = input_variable(v), new = output_variable(v), dt=time_data%dt()) + !derivative(v) = time_derivative_t(old = input_variable(v), new = output_variable(v), dt=time_data%dt()) call_julienne_assert(.not. derivative(v)%any_nan()) end associate derivative_name end do @@ -252,11 +285,11 @@ subroutine read_train_write(training_configuration, training_data_files, args, p if (allocated(args%end_step)) then end_step = args%end_step else - end_step = input_variable(1)%end_step() + !end_step = input_variable(1)%end_step() end if print *,"Defining input tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride - input_tensors = tensors(input_variable, step_start = args%start_step, step_end = end_step, step_stride = args%stride) + !input_tensors = tensors(input_variable, step_start = args%start_step, step_end = end_step, step_stride = args%stride) print *,"Defining output tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride output_tensors = tensors(derivative, step_start = args%start_step, step_end = end_step, step_stride = args%stride) @@ -292,26 +325,26 @@ subroutine read_train_write(training_configuration, training_data_files, args, p print *,"Defining a new network from training_configuration_t and tensor_map_t objects" - activation: & - associate(activation => training_configuration%activation()) - trainable_network = trainable_network_t( & - training_configuration & - ,perturbation_magnitude = 0.05 & - ,metadata = [ & - string_t("ICAR microphysics" ) & - ,string_t("max-entropy-filter") & - ,string_t(date ) & - ,activation%function_name( ) & - ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & - ] & - ,input_map = tensor_map_t( & - layer = "inputs" & - ,minima = [( input_variable(v)%minimum(), v=1, size( input_variable) )] & - ,maxima = [( input_variable(v)%maximum(), v=1, size( input_variable) )] & - ) & - ,output_map = output_map & - ) - end associate activation + !activation: & + !associate(activation => training_configuration%activation()) + ! trainable_network = trainable_network_t( & + ! training_configuration & + ! ,perturbation_magnitude = 0.05 & + ! ,metadata = [ & + ! string_t("ICAR microphysics" ) & + ! ,string_t("max-entropy-filter") & + ! ,string_t(date ) & + ! ,activation%function_name( ) & + ! ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & + ! ] & + ! ,input_map = tensor_map_t( & + ! layer = "inputs" & + ! ,minima = [( input_variable(v)%minimum(), v=1, size( input_variable) )] & + ! ,maxima = [( input_variable(v)%maximum(), v=1, size( input_variable) )] & + ! ) & + ! ,output_map = output_map & + ! ) + !end associate activation end block initialize_network end if read_or_initialize_network From 8f5925bc2009d5d77fff13450c80a63be4ad0af0 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 11:27:16 -0800 Subject: [PATCH 04/18] WIP(train-cloud-micro): start derivative calc --- demo/app/train-cloud-microphysics.F90 | 33 ++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index 1cbd4f8ec..e96ab9e24 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -174,20 +174,15 @@ subroutine read_train_write(training_configuration, training_data_files, args, p integer(int64) start_training, finish_training logical stop_requested - input_names: & - associate(input_names => training_configuration%input_variable_names()) - + input_variable_files: & associate( & input_tensor_file_names => training_data_files%fully_qualified_inputs_files() & - ,output_tensor_file_names => training_data_files%fully_qualified_outputs_files() & - ,time_data_file_name => training_data_files%fully_qualified_time_file() & ,input_component_names => training_configuration%input_variable_names() & - ,output_component_names => training_configuration%output_variable_names() & ) allocate(NetCDF_input_file(size(input_tensor_file_names))) allocate(input_variable(size(input_component_names), size(NetCDF_input_file))) - input_variable_files: & + count_files_and_variables: & associate(num_input_files => size(NetCDF_input_file), num_variables => size(input_variable,1)) read_input_files: & @@ -205,12 +200,34 @@ subroutine read_train_write(training_configuration, training_data_files, args, p end do read_input_files - end associate input_variable_files + end associate count_files_and_variables + end associate input_variable_files + + associate( & + output_tensor_file_names => training_data_files%fully_qualified_outputs_files() & + ,output_component_names => training_configuration%output_variable_names() & + ,time_data_file_name => training_data_files%fully_qualified_time_file() & + ) + allocate(NetCDF_output_file(size(output_tensor_file_names))) + allocate(output_variable(size(output_component_names), size(NetCDF_output_file))) + + output_file_and_variable_count: & + associate(num_output_files => size(NetCDF_output_file), num_output_variables => size(output_variable,1)) + + print '(a)',"- reading time from JSON file" + associate(time_data => time_data_t(file_t(time_data_file_name))) + + end associate + + end associate output_file_and_variable_count end associate stop "-----> WIP <-------" + input_names: & + associate(input_names => training_configuration%input_variable_names()) + input_file_name: & associate(input_file_name => args%base_name // "_input.nc") From 5d06aa8fb14ea807eb323cfe21c43cf66e0f6b76 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 11:35:22 -0800 Subject: [PATCH 05/18] WIP(train-cloud-micro): compute time derivatives --- demo/app/train-cloud-microphysics.F90 | 406 ++++++++++++++------------ 1 file changed, 216 insertions(+), 190 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index e96ab9e24..ef8c50cf0 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -158,7 +158,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p type(training_data_files_t), intent(in) :: training_data_files type(command_line_arguments_t), intent(in) :: args type(plot_file_t), intent(in), optional :: plot_file - type(time_derivative_t), allocatable :: derivative(:) + type(time_derivative_t), allocatable, dimension(:,:) :: derivative type(NetCDF_variable_t), allocatable, dimension(:,:) :: input_variable, output_variable type(NetCDF_variable_t) input_time, output_time type(NetCDF_file_t) , allocatable, dimension(:) :: NetCDF_input_file, NetCDF_output_file @@ -215,9 +215,35 @@ subroutine read_train_write(training_configuration, training_data_files, args, p associate(num_output_files => size(NetCDF_output_file), num_output_variables => size(output_variable,1)) print '(a)',"- reading time from JSON file" + read_times: & associate(time_data => time_data_t(file_t(time_data_file_name))) - end associate + print '(a)',"Calculating the desired neural-network model outputs: time derivatives of the outputs" + allocate(derivative(num_output_variables, num_output_files)) + + read_files: & + do f = 1, num_output_files + + print '(a)',"Reading physics-based model outputs from " // output_tensor_file_names(f)%string() + NetCDF_output_file(f) = netCDF_file_t(output_tensor_file_names(f)) + + read_variables: & + do v = 1, num_output_variables + + print '(a)',"- reading " // output_component_names(v)%string() // " from " // output_tensor_file_names(f)%string() + call output_variable(v,f)%input(output_component_names(v), NetCDF_output_file(f), rank=4) + call_julienne_assert(output_variable(v,f)%conformable_with(output_variable(1,f))) + + derivative_name: & + associate(derivative_name => "d" // output_component_names(v)%string() // "_dt") + print '(a)',"- calculating " // derivative_name + derivative(v,f) = time_derivative_t(old = input_variable(v,1), new = output_variable(v,1), dt=time_data%dt()) + call_julienne_assert(.not. derivative(v,f)%any_nan()) + end associate derivative_name + end do read_variables + end do read_files + + end associate read_times end associate output_file_and_variable_count @@ -284,19 +310,19 @@ subroutine read_train_write(training_configuration, training_data_files, args, p print *,"Calculating desired neural-network model outputs" - allocate(derivative(size(output_variable))) - - print '(a)',"- reading time from JSON file" - associate(time_data => time_data_t(file_t(training_data_files%fully_qualified_time_file()))) - do v = 1, size(derivative) - derivative_name: & - associate(derivative_name => "d" // output_names(v)%string() // "/dt") - print *,"- " // derivative_name - !derivative(v) = time_derivative_t(old = input_variable(v), new = output_variable(v), dt=time_data%dt()) - call_julienne_assert(.not. derivative(v)%any_nan()) - end associate derivative_name - end do - end associate + !allocate(derivative(size(output_variable))) + + !print '(a)',"- reading time from JSON file" + !associate(time_data => time_data_t(file_t(training_data_files%fully_qualified_time_file()))) + ! do v = 1, size(derivative) + ! derivative_name: & + ! associate(derivative_name => "d" // output_names(v)%string() // "/dt") + ! print *,"- " // derivative_name + ! !derivative(v) = time_derivative_t(old = input_variable(v), new = output_variable(v), dt=time_data%dt()) + ! call_julienne_assert(.not. derivative(v)%any_nan()) + ! end associate derivative_name + ! end do + !end associate end associate output_names if (allocated(args%end_step)) then @@ -309,184 +335,184 @@ subroutine read_train_write(training_configuration, training_data_files, args, p !input_tensors = tensors(input_variable, step_start = args%start_step, step_end = end_step, step_stride = args%stride) print *,"Defining output tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride - output_tensors = tensors(derivative, step_start = args%start_step, step_end = end_step, step_stride = args%stride) - - output_map_and_network_file: & - associate( & - output_map => tensor_map_t( & - layer = "outputs" & - ,minima = [( derivative(v)%minimum(), v=1, size(derivative) )] & - ,maxima = [( derivative(v)%maximum(), v=1, size(derivative) )] & - ), & - network_file => args%base_name // "_network.json" & - ) - check_for_network_file: & - block - logical preexisting_network_file - - inquire(file=network_file, exist=preexisting_network_file) - - read_or_initialize_network: & - if (preexisting_network_file) then - print *,"Reading network from file " // network_file - trainable_network = trainable_network_t(file_t(string_t(network_file))) - close(network_unit) - else - close(network_unit) - - initialize_network: & - block - character(len=len('YYYYMMDD')) date - - call date_and_time(date) - - print *,"Defining a new network from training_configuration_t and tensor_map_t objects" - - !activation: & - !associate(activation => training_configuration%activation()) - ! trainable_network = trainable_network_t( & - ! training_configuration & - ! ,perturbation_magnitude = 0.05 & - ! ,metadata = [ & - ! string_t("ICAR microphysics" ) & - ! ,string_t("max-entropy-filter") & - ! ,string_t(date ) & - ! ,activation%function_name( ) & - ! ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & - ! ] & - ! ,input_map = tensor_map_t( & - ! layer = "inputs" & - ! ,minima = [( input_variable(v)%minimum(), v=1, size( input_variable) )] & - ! ,maxima = [( input_variable(v)%maximum(), v=1, size( input_variable) )] & - ! ) & - ! ,output_map = output_map & - ! ) - !end associate activation - end block initialize_network - - end if read_or_initialize_network - - end block check_for_network_file - - print *, "Conditionally sampling for a flat distribution of output values" - - flatten_histogram: & - block - integer i - logical occupied(args%num_bins, args%num_bins) - logical keepers(size(output_tensors)) - type(phase_space_bin_t), allocatable :: bin(:) - type(occupancy_t) occupancy - - ! Determine the phase-space bin that holds each output tensor - associate(output_minima => output_map%minima(), output_maxima => output_map%maxima()) - bin = [(phase_space_bin_t(output_tensors(i), output_minima, output_maxima, args%num_bins), i = 1, size(output_tensors))] - end associate - - call occupancy%vacate( dims = [( args%num_bins, i = 1, size(output_variable))] ) - - keepers = .false. - - do i = 1, size(output_tensors) - if (occupancy%occupied(bin(i)%loc)) cycle - call occupancy%occupy(bin(i)%loc) - keepers(i) = .true. - end do - - input_output_pairs = input_output_pair_t(pack(input_tensors, keepers), pack(output_tensors, keepers)) - - print '(*(a,i))' & - ," Keeping " , size(input_output_pairs, kind=int64) & - ," out of " , size(output_tensors, kind=int64) & - ," input/output pairs in ", occupancy%num_occupied() & - ," out of " , occupancy%num_bins() & - ," bins." - - end block flatten_histogram - - print *,"Normalizing the remaining input and output tensors" - input_output_pairs = trainable_network%map_to_training_ranges(input_output_pairs) - - training_parameters: & - associate( & - num_pairs => size(input_output_pairs), & - n_bins => training_configuration%mini_batches(), & - adam => merge(.true., .false., training_configuration%optimizer_name() == "adam"), & - learning_rate => training_configuration%learning_rate() & - ) - bins = [(bin_t(num_items=num_pairs, num_bins=n_bins, bin_number=b), b = 1, n_bins)] - - print *,"Training network" - print *, " Epoch Cost (avg)" - - call system_clock(start_training) - - train_write_and_maybe_exit: & - block - integer first_epoch - integer me -#if defined(MULTI_IMAGE_SUPPORT) - me = this_image() + !output_tensors = tensors(derivative, step_start = args%start_step, step_end = end_step, step_stride = args%stride) + + !output_map_and_network_file: & + !associate( & + ! output_map => tensor_map_t( & + ! layer = "outputs" & + ! ,minima = [( derivative(v)%minimum(), v=1, size(derivative) )] & + ! ,maxima = [( derivative(v)%maximum(), v=1, size(derivative) )] & + ! ), & + ! network_file => args%base_name // "_network.json" & + !) + ! check_for_network_file: & + ! block + ! logical preexisting_network_file + + ! inquire(file=network_file, exist=preexisting_network_file) + + ! read_or_initialize_network: & + ! if (preexisting_network_file) then + ! print *,"Reading network from file " // network_file + ! trainable_network = trainable_network_t(file_t(string_t(network_file))) + ! close(network_unit) + ! else + ! close(network_unit) + + ! initialize_network: & + ! block + ! character(len=len('YYYYMMDD')) date + + ! call date_and_time(date) + + ! print *,"Defining a new network from training_configuration_t and tensor_map_t objects" + + ! !activation: & + ! !associate(activation => training_configuration%activation()) + ! ! trainable_network = trainable_network_t( & + ! ! training_configuration & + ! ! ,perturbation_magnitude = 0.05 & + ! ! ,metadata = [ & + ! ! string_t("ICAR microphysics" ) & + ! ! ,string_t("max-entropy-filter") & + ! ! ,string_t(date ) & + ! ! ,activation%function_name( ) & + ! ! ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & + ! ! ] & + ! ! ,input_map = tensor_map_t( & + ! ! layer = "inputs" & + ! ! ,minima = [( input_variable(v)%minimum(), v=1, size( input_variable) )] & + ! ! ,maxima = [( input_variable(v)%maximum(), v=1, size( input_variable) )] & + ! ! ) & + ! ! ,output_map = output_map & + ! ! ) + ! !end associate activation + ! end block initialize_network + + ! end if read_or_initialize_network + + ! end block check_for_network_file + + ! print *, "Conditionally sampling for a flat distribution of output values" + + ! flatten_histogram: & + ! block + ! integer i + ! logical occupied(args%num_bins, args%num_bins) + ! logical keepers(size(output_tensors)) + ! type(phase_space_bin_t), allocatable :: bin(:) + ! type(occupancy_t) occupancy + + ! ! Determine the phase-space bin that holds each output tensor + ! associate(output_minima => output_map%minima(), output_maxima => output_map%maxima()) + ! bin = [(phase_space_bin_t(output_tensors(i), output_minima, output_maxima, args%num_bins), i = 1, size(output_tensors))] + ! end associate + + ! call occupancy%vacate( dims = [( args%num_bins, i = 1, size(output_variable))] ) + + ! keepers = .false. + + ! do i = 1, size(output_tensors) + ! if (occupancy%occupied(bin(i)%loc)) cycle + ! call occupancy%occupy(bin(i)%loc) + ! keepers(i) = .true. + ! end do + + ! input_output_pairs = input_output_pair_t(pack(input_tensors, keepers), pack(output_tensors, keepers)) + + ! print '(*(a,i))' & + ! ," Keeping " , size(input_output_pairs, kind=int64) & + ! ," out of " , size(output_tensors, kind=int64) & + ! ," input/output pairs in ", occupancy%num_occupied() & + ! ," out of " , occupancy%num_bins() & + ! ," bins." + + ! end block flatten_histogram + + ! print *,"Normalizing the remaining input and output tensors" + ! input_output_pairs = trainable_network%map_to_training_ranges(input_output_pairs) + + ! training_parameters: & + ! associate( & + ! num_pairs => size(input_output_pairs), & + ! n_bins => training_configuration%mini_batches(), & + ! adam => merge(.true., .false., training_configuration%optimizer_name() == "adam"), & + ! learning_rate => training_configuration%learning_rate() & + ! ) + ! bins = [(bin_t(num_items=num_pairs, num_bins=n_bins, bin_number=b), b = 1, n_bins)] + + ! print *,"Training network" + ! print *, " Epoch Cost (avg)" + + ! call system_clock(start_training) + ! + ! train_write_and_maybe_exit: & + ! block + ! integer first_epoch + ! integer me +#if !defined(MULTI_IMAGE_SUPPORT) + ! me = this_image() #else - me = 1 + ! me = 1 #endif - if (me==1) first_epoch = plot_file%previous_epoch + 1 -#if defined(MULTI_IMAGE_SUPPORT) - call co_broadcast(first_epoch, source_image=1) + ! if (me==1) first_epoch = plot_file%previous_epoch + 1 +#if !defined(MULTI_IMAGE_SUPPORT) + ! call co_broadcast(first_epoch, source_image=1) #endif - last_epoch: & - associate(last_epoch => first_epoch + args%num_epochs - 1) - epochs: & - do epoch = first_epoch, last_epoch - - if (size(bins)>1) call shuffle(input_output_pairs) ! set up for stochastic gradient descent - mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - - call trainable_network%train(mini_batches, cost, adam, learning_rate) - - average_cost: & - associate(average_cost => sum(cost)/size(cost)) - converged: & - associate(converged => average_cost <= args%cost_tolerance) - - image_1_maybe_writes: & - if (me==1 .and. any([converged, epoch==[first_epoch,last_epoch], mod(epoch,args%report_step)==0])) then - - !print '(*(g0,4x))', epoch, average_cost - write(plot_file%plot_unit,'(*(g0,4x))') epoch, average_cost - - associate(json_file => trainable_network%to_json()) - call json_file%write_lines(string_t(network_file)) - end associate - - end if image_1_maybe_writes - - signal_convergence: & - if (converged) then - block - integer unit - open(newunit=unit, file="converged", status="unknown") ! The train.sh script detects & removes this file. - close(unit) - exit epochs - end block - end if signal_convergence - end associate converged - end associate average_cost - - inquire(file="stop", exist=stop_requested) - - graceful_exit: & - if (stop_requested) then - print *,'Shutting down because a file named "stop" was found.' - return - end if graceful_exit - - end do epochs - end associate last_epoch - end block train_write_and_maybe_exit - - end associate training_parameters - end associate output_map_and_network_file + ! last_epoch: & + ! associate(last_epoch => first_epoch + args%num_epochs - 1) + ! epochs: & + ! do epoch = first_epoch, last_epoch + + ! if (size(bins)>1) call shuffle(input_output_pairs) ! set up for stochastic gradient descent + ! mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] + + ! call trainable_network%train(mini_batches, cost, adam, learning_rate) + + ! average_cost: & + ! associate(average_cost => sum(cost)/size(cost)) + ! converged: & + ! associate(converged => average_cost <= args%cost_tolerance) + + ! image_1_maybe_writes: & + ! if (me==1 .and. any([converged, epoch==[first_epoch,last_epoch], mod(epoch,args%report_step)==0])) then + + ! !print '(*(g0,4x))', epoch, average_cost + ! write(plot_file%plot_unit,'(*(g0,4x))') epoch, average_cost + + ! associate(json_file => trainable_network%to_json()) + ! call json_file%write_lines(string_t(network_file)) + ! end associate + + ! end if image_1_maybe_writes + + ! signal_convergence: & + ! if (converged) then + ! block + ! integer unit + ! open(newunit=unit, file="converged", status="unknown") ! The train.sh script detects & removes this file. + ! close(unit) + ! exit epochs + ! end block + ! end if signal_convergence + ! end associate converged + ! end associate average_cost + + ! inquire(file="stop", exist=stop_requested) + + ! graceful_exit: & + ! if (stop_requested) then + ! print *,'Shutting down because a file named "stop" was found.' + ! return + ! end if graceful_exit + + ! end do epochs + ! end associate last_epoch + ! end block train_write_and_maybe_exit + + ! end associate training_parameters + !end associate output_map_and_network_file call system_clock(finish_training) print *,"Training time: ", real(finish_training - start_training, real64)/real(clock_rate, real64),"for", & From f069f7af6497ea1642e814978392f3e83cba0ef6 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 12:12:18 -0800 Subject: [PATCH 06/18] feat(NetCDF_variable): start_step generic binding --- demo/src/NetCDF_variable_m.f90 | 14 ++++++++++++++ demo/src/NetCDF_variable_s.F90 | 30 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/demo/src/NetCDF_variable_m.f90 b/demo/src/NetCDF_variable_m.f90 index f3cf4d022..7ecedc525 100644 --- a/demo/src/NetCDF_variable_m.f90 +++ b/demo/src/NetCDF_variable_m.f90 @@ -26,6 +26,8 @@ module NetCDF_variable_m procedure, private, non_overridable :: default_real_conformable_with, double_precision_conformable_with generic :: rank => default_real_rank , double_precision_rank procedure, private, non_overridable :: default_real_rank , double_precision_rank + generic :: start_step => default_real_start_step , double_precision_start_step + procedure, private, non_overridable :: default_real_start_step , double_precision_start_step generic :: end_step => default_real_end_step , double_precision_end_step procedure, private, non_overridable :: default_real_end_step , double_precision_end_step generic :: any_nan => default_real_any_nan , double_precision_any_nan @@ -145,6 +147,18 @@ elemental module function double_precision_rank(self) result(my_rank) integer my_rank end function + elemental module function default_real_start_step(self) result(start_step) + implicit none + class(NetCDF_variable_t), intent(in) :: self + integer start_step + end function + + elemental module function double_precision_start_step(self) result(start_step) + implicit none + class(NetCDF_variable_t(double_precision)), intent(in) :: self + integer start_step + end function + elemental module function default_real_end_step(self) result(end_step) implicit none class(NetCDF_variable_t), intent(in) :: self diff --git a/demo/src/NetCDF_variable_s.F90 b/demo/src/NetCDF_variable_s.F90 index 760286a8b..2457f7aaa 100644 --- a/demo/src/NetCDF_variable_s.F90 +++ b/demo/src/NetCDF_variable_s.F90 @@ -213,6 +213,36 @@ pure function double_precision_components_allocated(NetCDF_variable) result(allo end associate end procedure + module procedure default_real_start_step + select case(self%rank()) + case(1) + start_step = lbound(self%values_1D_,1) + case(2) + start_step = lbound(self%values_2D_,2) + case(3) + start_step = lbound(self%values_3D_,3) + case(4) + start_step = lbound(self%values_4D_,4) + case default + error stop "NetCDF_variable_s(default_real_start_step): unsupported rank" + end select + end procedure + + module procedure double_precision_start_step + select case(self%rank()) + case(1) + start_step = lbound(self%values_1D_,1) + case(2) + start_step = lbound(self%values_2D_,2) + case(3) + start_step = lbound(self%values_3D_,3) + case(4) + start_step = lbound(self%values_4D_,4) + case default + error stop "NetCDF_variable_s(double_precision_start_step): unsupported rank" + end select + end procedure + module procedure default_real_end_step select case(self%rank()) case(1) From 7986846c7f474d50c200dbddb1127ea7ee92c4f3 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 12:13:03 -0800 Subject: [PATCH 07/18] chore(train-cloud-micro): use start_step binding --- demo/app/train-cloud-microphysics.F90 | 90 +++------------------------ 1 file changed, 9 insertions(+), 81 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index ef8c50cf0..8b4f07881 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -203,6 +203,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p end associate count_files_and_variables end associate input_variable_files + output_variable_and_time_files: & associate( & output_tensor_file_names => training_data_files%fully_qualified_outputs_files() & ,output_component_names => training_configuration%output_variable_names() & @@ -244,94 +245,21 @@ subroutine read_train_write(training_configuration, training_data_files, args, p end do read_files end associate read_times - end associate output_file_and_variable_count - - end associate - - stop "-----> WIP <-------" - - input_names: & - associate(input_names => training_configuration%input_variable_names()) - - input_file_name: & - associate(input_file_name => args%base_name // "_input.nc") - - print *,"Reading physics-based model inputs from " // input_file_name - - input_file: & - associate(input_file => netCDF_file_t(input_file_name)) - - do v=1, size(input_variable) - print *,"- reading ", input_names(v)%string() - !call input_variable(v)%input(input_names(v), input_file, rank=4) - end do - - do v = 2, size(input_variable) - !call_julienne_assert(input_variable(v)%conformable_with(input_variable(1))) - end do - - print *,"- reading time" - call input_time%input("time", input_file, rank=1) - - end associate input_file - end associate input_file_name - end associate input_names - - output_names: & - associate(output_names => training_configuration%output_variable_names()) - - !allocate(output_variable(size(output_names))) - - output_file_name: & - associate(output_file_name => args%base_name // "_output.nc") - - print *,"Reading physics-based model outputs from " // output_file_name - - output_file: & - associate(output_file => netCDF_file_t(output_file_name)) - - do v=1, size(output_variable) - print *,"- reading ", output_names(v)%string() - !call output_variable(v)%input(output_names(v), output_file, rank=4) - end do - - do v = 1, size(output_variable) - !call_julienne_assert(output_variable(v)%conformable_with(input_variable(1))) - end do - - print *,"- reading time" - call output_time%input("time", output_file, rank=1) - - call_julienne_assert(output_time%conformable_with(input_time)) - - end associate output_file - end associate output_file_name - - print *,"Calculating desired neural-network model outputs" - - !allocate(derivative(size(output_variable))) - - !print '(a)',"- reading time from JSON file" - !associate(time_data => time_data_t(file_t(training_data_files%fully_qualified_time_file()))) - ! do v = 1, size(derivative) - ! derivative_name: & - ! associate(derivative_name => "d" // output_names(v)%string() // "/dt") - ! print *,"- " // derivative_name - ! !derivative(v) = time_derivative_t(old = input_variable(v), new = output_variable(v), dt=time_data%dt()) - ! call_julienne_assert(.not. derivative(v)%any_nan()) - ! end associate derivative_name - ! end do - !end associate - end associate output_names + end associate output_variable_and_time_files if (allocated(args%end_step)) then end_step = args%end_step else - !end_step = input_variable(1)%end_step() + end_step = input_variable(1,1)%end_step() end if - print *,"Defining input tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride + associate(num_steps => sum( (input_variable(1,:)%end_step()+1) - input_variable(1,:)%start_step())) + print *,"Defining input tensors for ", num_steps, "time steps" + end associate + + stop "-----> WIP <-------" + !input_tensors = tensors(input_variable, step_start = args%start_step, step_end = end_step, step_stride = args%stride) print *,"Defining output tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride From f1acd62e7ecf1d9f8d8e61b61d751263b461d1be Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 12:17:47 -0800 Subject: [PATCH 08/18] chore(train-cloud-micro): rm {start,end}_step Remove variables that are no longer used. --- demo/app/train-cloud-microphysics.F90 | 34 +++++---------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index 8b4f07881..0ec7a8225 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -35,8 +35,7 @@ program train_cloud_microphysics 'The presence of a file named "stop" halts execution gracefully.' // new_line('') type command_line_arguments_t - integer num_epochs, start_step, stride, num_bins, report_step - integer, allocatable :: end_step + integer num_epochs, stride, num_bins, report_step character(len=:), allocatable :: base_name real cost_tolerance end type @@ -112,8 +111,7 @@ function get_command_line_arguments() result(command_line_arguments) character(len=:), allocatable :: & base_name, epochs_string, start_string, end_string, stride_string, bins_string, report_string, tolerance_string real cost_tolerance - integer, allocatable :: end_step - integer num_epochs, num_bins, start_step, stride, report_step + integer num_epochs, num_bins, stride, report_step base_name = command_line%flag_value("--base") epochs_string = command_line%flag_value("--epochs") @@ -131,26 +129,12 @@ function get_command_line_arguments() result(command_line_arguments) read(epochs_string,*) num_epochs stride = default_or_internal_read(1, stride_string) - start_step = default_or_internal_read(1, start_string) report_step = default_or_internal_read(1, report_string) num_bins = default_or_internal_read(3, bins_string) cost_tolerance = default_or_internal_read(5E-8, tolerance_string) - if (len(end_string)/=0) then - allocate(end_step) - read(end_string,*) end_step - end if - - if (allocated(end_step)) then - command_line_arguments = command_line_arguments_t( & - num_epochs, start_step, stride, num_bins, report_step, end_step, base_name, cost_tolerance & - ) - else - command_line_arguments = command_line_arguments_t( & - num_epochs, start_step, stride, num_bins, report_step, null(), base_name, cost_tolerance & - ) - end if - + command_line_arguments = command_line_arguments_t(num_epochs, stride, num_bins, report_step, base_name, cost_tolerance) + end function get_command_line_arguments subroutine read_train_write(training_configuration, training_data_files, args, plot_file) @@ -170,7 +154,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p type(input_output_pair_t), allocatable :: input_output_pairs(:) type(tensor_t), allocatable, dimension(:) :: input_tensors, output_tensors real, allocatable :: cost(:) - integer f, v, network_unit, io_status, epoch, end_step, t, b, t_end + integer f, v, network_unit, io_status, epoch, t, b, t_end integer(int64) start_training, finish_training logical stop_requested @@ -248,12 +232,6 @@ subroutine read_train_write(training_configuration, training_data_files, args, p end associate output_file_and_variable_count end associate output_variable_and_time_files - if (allocated(args%end_step)) then - end_step = args%end_step - else - end_step = input_variable(1,1)%end_step() - end if - associate(num_steps => sum( (input_variable(1,:)%end_step()+1) - input_variable(1,:)%start_step())) print *,"Defining input tensors for ", num_steps, "time steps" end associate @@ -262,7 +240,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p !input_tensors = tensors(input_variable, step_start = args%start_step, step_end = end_step, step_stride = args%stride) - print *,"Defining output tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride + !print *,"Defining output tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride !output_tensors = tensors(derivative, step_start = args%start_step, step_end = end_step, step_stride = args%stride) !output_map_and_network_file: & From a02961231e1b4881b23379001e69628140eb15a9 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 14:00:13 -0800 Subject: [PATCH 09/18] feat(train-cloud): aggregate tensors across files --- demo/app/train-cloud-microphysics.F90 | 128 +++++++++++++------------- demo/src/NetCDF_variable_m.f90 | 5 +- demo/src/NetCDF_variable_s.F90 | 44 +++++---- 3 files changed, 92 insertions(+), 85 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index 0ec7a8225..d02b60601 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -236,69 +236,73 @@ subroutine read_train_write(training_configuration, training_data_files, args, p print *,"Defining input tensors for ", num_steps, "time steps" end associate + input_tensors = tensors(input_variable) + + associate(num_steps => sum( (derivative(1,:)%end_step()+1) - derivative(1,:)%start_step())) + print *,"Defining output tensors for ", num_steps, "time steps" + end associate + + output_tensors = tensors(derivative) + + output_map_and_network_file: & + associate( & + output_map => tensor_map_t( & + layer = "outputs" & + ,minima = [( [( derivative(v,f)%minimum(), v=1, size(derivative,1) )], f = 1, size(derivative,2) )] & + ,maxima = [( [( derivative(v,f)%maximum(), v=1, size(derivative,1) )], f = 1, size(derivative,2) )] & + ), & + network_file => args%base_name // "_network.json" & + ) + stop "-----> WIP <-------" - !input_tensors = tensors(input_variable, step_start = args%start_step, step_end = end_step, step_stride = args%stride) - - !print *,"Defining output tensors for time step", args%start_step, "through", end_step, "with strides of", args%stride - !output_tensors = tensors(derivative, step_start = args%start_step, step_end = end_step, step_stride = args%stride) - - !output_map_and_network_file: & - !associate( & - ! output_map => tensor_map_t( & - ! layer = "outputs" & - ! ,minima = [( derivative(v)%minimum(), v=1, size(derivative) )] & - ! ,maxima = [( derivative(v)%maximum(), v=1, size(derivative) )] & - ! ), & - ! network_file => args%base_name // "_network.json" & - !) - ! check_for_network_file: & - ! block - ! logical preexisting_network_file - - ! inquire(file=network_file, exist=preexisting_network_file) - - ! read_or_initialize_network: & - ! if (preexisting_network_file) then - ! print *,"Reading network from file " // network_file - ! trainable_network = trainable_network_t(file_t(string_t(network_file))) - ! close(network_unit) - ! else - ! close(network_unit) - - ! initialize_network: & - ! block - ! character(len=len('YYYYMMDD')) date - - ! call date_and_time(date) - - ! print *,"Defining a new network from training_configuration_t and tensor_map_t objects" - - ! !activation: & - ! !associate(activation => training_configuration%activation()) - ! ! trainable_network = trainable_network_t( & - ! ! training_configuration & - ! ! ,perturbation_magnitude = 0.05 & - ! ! ,metadata = [ & - ! ! string_t("ICAR microphysics" ) & - ! ! ,string_t("max-entropy-filter") & - ! ! ,string_t(date ) & - ! ! ,activation%function_name( ) & - ! ! ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & - ! ! ] & - ! ! ,input_map = tensor_map_t( & - ! ! layer = "inputs" & - ! ! ,minima = [( input_variable(v)%minimum(), v=1, size( input_variable) )] & - ! ! ,maxima = [( input_variable(v)%maximum(), v=1, size( input_variable) )] & - ! ! ) & - ! ! ,output_map = output_map & - ! ! ) - ! !end associate activation - ! end block initialize_network - - ! end if read_or_initialize_network - - ! end block check_for_network_file + check_for_network_file: & + block + logical preexisting_network_file + + inquire(file=network_file, exist=preexisting_network_file) + + read_or_initialize_network: & + if (preexisting_network_file) then + print *,"Reading network from file " // network_file + trainable_network = trainable_network_t(file_t(string_t(network_file))) + close(network_unit) + else + close(network_unit) + + initialize_network: & + block + character(len=len('YYYYMMDD')) date + + call date_and_time(date) + + print *,"Defining a new network from training_configuration_t and tensor_map_t objects" + + activation: & + associate(activation => training_configuration%activation()) + ! trainable_network = trainable_network_t( & + ! training_configuration & + ! ,perturbation_magnitude = 0.05 & + ! ,metadata = [ & + ! string_t("ICAR microphysics" ) & + ! ,string_t("max-entropy-filter") & + ! ,string_t(date ) & + ! ,activation%function_name( ) & + ! ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & + ! ] & + ! ,input_map = tensor_map_t( & + ! layer = "inputs" & + ! ,minima = [( input_variable(v)%minimum(), v=1, size( input_variable) )] & + ! ,maxima = [( input_variable(v)%maximum(), v=1, size( input_variable) )] & + ! ) & + ! ,output_map = output_map & + ! ) + end associate activation + end block initialize_network + + end if read_or_initialize_network + + end block check_for_network_file ! print *, "Conditionally sampling for a flat distribution of output values" @@ -418,7 +422,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p ! end block train_write_and_maybe_exit ! end associate training_parameters - !end associate output_map_and_network_file + end associate output_map_and_network_file call system_clock(finish_training) print *,"Training time: ", real(finish_training - start_training, real64)/real(clock_rate, real64),"for", & diff --git a/demo/src/NetCDF_variable_m.f90 b/demo/src/NetCDF_variable_m.f90 index 7ecedc525..79ef22003 100644 --- a/demo/src/NetCDF_variable_m.f90 +++ b/demo/src/NetCDF_variable_m.f90 @@ -219,11 +219,10 @@ elemental module function double_precision_maximum(self) result(maximum) real maximum end function - module function tensors(NetCDF_variables, step_start, step_end, step_stride) + module function tensors(NetCDF_variables) implicit none - class(NetCDF_variable_t), intent(in) :: NetCDF_variables(:) + class(NetCDF_variable_t), intent(in) :: NetCDF_variables(:,:) type(tensor_t), allocatable :: tensors(:) - integer, optional :: step_start, step_end, step_stride end function elemental module function default_real_end_time(self) result(end_time) diff --git a/demo/src/NetCDF_variable_s.F90 b/demo/src/NetCDF_variable_s.F90 index 2457f7aaa..71624ebfa 100644 --- a/demo/src/NetCDF_variable_s.F90 +++ b/demo/src/NetCDF_variable_s.F90 @@ -429,30 +429,34 @@ pure function double_precision_upper_bounds(NetCDF_variable) result(ubounds) module procedure tensors - integer t_start, t_end, t_stride + integer v, f, lon, lat, lev, time - select case(NetCDF_variables(1)%rank()) - case(4) - - t_start = default_or_present_value(1, step_start ) - t_stride = default_or_present_value(1, step_stride) - t_end = default_or_present_value(size(NetCDF_variables(1)%values_4D_,4), step_end) + associate(component_rank => NetCDF_variables(1,1)%rank()) - associate( longitudes => size(NetCDF_variables(1)%values_4D_,1) & - ,latitudes => size(NetCDF_variables(1)%values_4D_,2) & - ,levels => size(NetCDF_variables(1)%values_4D_,3) & - ) - block - integer v, lon, lat, lev, time + call_julienne_assert(.all. (NetCDF_variables(:,:)%rank() .equalsExpected. component_rank)) - tensors = [( [( [( [( tensor_t( [( NetCDF_variables(v)%values_4D_(lon,lat,lev,time), v=1,size(NetCDF_variables) )] ), & - lon = 1, longitudes)], lat = 1, latitudes)], lev = 1, levels)], time = t_start, t_end, t_stride)] - end block - end associate + select case(component_rank) + case(4) + associate( longitudes => size(NetCDF_variables(1,1)%values_4D_,1) & + ,latitudes => size(NetCDF_variables(1,1)%values_4D_,2) & + ,levels => size(NetCDF_variables(1,1)%values_4D_,3) & + ,t_end => size(NetCDF_variables(1,1)%values_4D_,4) & + ,variables => size(NetCDF_variables,1) & + ,files => size(NetCDF_variables,2) & + ) + call_julienne_assert(.all. ([( [( size(NetCDF_variables(v,f)%values_4D_,1), v = 1, variables)], f = 1, files)] .equalsExpected. longitudes)) + call_julienne_assert(.all. ([( [( size(NetCDF_variables(v,f)%values_4D_,2), v = 1, variables)], f = 1, files)] .equalsExpected. latitudes)) + call_julienne_assert(.all. ([( [( size(NetCDF_variables(v,f)%values_4D_,3), v = 1, variables)], f = 1, files)] .equalsExpected. levels)) + call_julienne_assert(.all. ([( [( size(NetCDF_variables(v,f)%values_4D_,4), v = 1, variables)], f = 1, files)] .equalsExpected. t_end)) + + tensors = [( [( [( [( [( tensor_t( [( NetCDF_variables(v,f)%values_4D_(lon,lat,lev,time), v=1,size(NetCDF_variables,1) )] ), & + lon = 1, longitudes)], lat = 1, latitudes)], lev = 1, levels)], time = 1, t_end )], f = 1, files )] + end associate + case default + error stop "NetCDF_variable_s(tensors): unsupported rank)" + end select - case default - error stop "NetCDF_variable_s(tensors): unsupported rank)" - end select + end associate end procedure From f23cf200df2496b7d54068cc8367b4a71b1802ed Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 14:57:29 -0800 Subject: [PATCH 10/18] refact(train-cloud): define trainable_network_t --- demo/app/train-cloud-microphysics.F90 | 37 ++++++++++++--------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index d02b60601..0067a456c 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -254,8 +254,6 @@ subroutine read_train_write(training_configuration, training_data_files, args, p network_file => args%base_name // "_network.json" & ) - stop "-----> WIP <-------" - check_for_network_file: & block logical preexisting_network_file @@ -280,28 +278,27 @@ subroutine read_train_write(training_configuration, training_data_files, args, p activation: & associate(activation => training_configuration%activation()) - ! trainable_network = trainable_network_t( & - ! training_configuration & - ! ,perturbation_magnitude = 0.05 & - ! ,metadata = [ & - ! string_t("ICAR microphysics" ) & - ! ,string_t("max-entropy-filter") & - ! ,string_t(date ) & - ! ,activation%function_name( ) & - ! ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & - ! ] & - ! ,input_map = tensor_map_t( & - ! layer = "inputs" & - ! ,minima = [( input_variable(v)%minimum(), v=1, size( input_variable) )] & - ! ,maxima = [( input_variable(v)%maximum(), v=1, size( input_variable) )] & - ! ) & - ! ,output_map = output_map & - ! ) + trainable_network = trainable_network_t( & + training_configuration & + ,perturbation_magnitude = 0.05 & + ,metadata = [ & + string_t("ICAR microphysics" ) & + ,string_t("max-entropy-filter") & + ,string_t(date ) & + ,activation%function_name( ) & + ,string_t(trim(merge("true ", "false", training_configuration%skip_connections()))) & + ] & + ,input_map = tensor_map_t( & + layer = "inputs" & + ,minima = [( [( input_variable(v,f)%minimum(), v = 1, size(input_variable,1) )], f = 1, size(input_variable,2) )] & + ,maxima = [( [( input_variable(v,f)%maximum(), v = 1, size(input_variable,1) )], f = 1, size(input_variable,2) )] & + ) & + ,output_map = output_map & + ) end associate activation end block initialize_network end if read_or_initialize_network - end block check_for_network_file ! print *, "Conditionally sampling for a flat distribution of output values" From dcd88fd4997e1454a1d01e4a0e69e98669d795b9 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 17:50:09 -0800 Subject: [PATCH 11/18] fix(train-cloud): work around flang issues --- demo/app/train-cloud-microphysics.F90 | 244 +++++++++--------- demo/training_data_files.json | 2 +- ...support.F90 => fiats-language-support.F90} | 5 + 3 files changed, 132 insertions(+), 119 deletions(-) rename include/{language-support.F90 => fiats-language-support.F90} (87%) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index 0067a456c..3cad5a8af 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -1,6 +1,7 @@ ! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt +#include "fiats-language-support.F90" #include "julienne-assert-macros.h" program train_cloud_microphysics @@ -48,16 +49,15 @@ program train_cloud_microphysics integer(int64) t_start, t_finish, clock_rate call system_clock(t_start, clock_rate) - associate( & training_configuration => training_configuration_t(file_t("training_configuration.json")) & ,training_data_files => training_data_files_t(file_t("training_data_files.json")) & ) -#if defined(MULTI_IMAGE_SUPPORT) +#if defined(FIATS_MULTI_IMAGE_SUPPORT) if (this_image()==1) then #endif call read_train_write(training_configuration, training_data_files, get_command_line_arguments(), create_or_append_to("cost.plt")) -#if defined(MULTI_IMAGE_SUPPORT) +#if defined(FIATS_MULTI_IMAGE_SUPPORT) else call read_train_write(training_configuration, training_data_files, get_command_line_arguments()) end if @@ -301,127 +301,135 @@ subroutine read_train_write(training_configuration, training_data_files, args, p end if read_or_initialize_network end block check_for_network_file - ! print *, "Conditionally sampling for a flat distribution of output values" - - ! flatten_histogram: & - ! block - ! integer i - ! logical occupied(args%num_bins, args%num_bins) - ! logical keepers(size(output_tensors)) - ! type(phase_space_bin_t), allocatable :: bin(:) - ! type(occupancy_t) occupancy - - ! ! Determine the phase-space bin that holds each output tensor - ! associate(output_minima => output_map%minima(), output_maxima => output_map%maxima()) - ! bin = [(phase_space_bin_t(output_tensors(i), output_minima, output_maxima, args%num_bins), i = 1, size(output_tensors))] - ! end associate - - ! call occupancy%vacate( dims = [( args%num_bins, i = 1, size(output_variable))] ) - - ! keepers = .false. - - ! do i = 1, size(output_tensors) - ! if (occupancy%occupied(bin(i)%loc)) cycle - ! call occupancy%occupy(bin(i)%loc) - ! keepers(i) = .true. - ! end do - - ! input_output_pairs = input_output_pair_t(pack(input_tensors, keepers), pack(output_tensors, keepers)) - - ! print '(*(a,i))' & - ! ," Keeping " , size(input_output_pairs, kind=int64) & - ! ," out of " , size(output_tensors, kind=int64) & - ! ," input/output pairs in ", occupancy%num_occupied() & - ! ," out of " , occupancy%num_bins() & - ! ," bins." - - ! end block flatten_histogram - - ! print *,"Normalizing the remaining input and output tensors" - ! input_output_pairs = trainable_network%map_to_training_ranges(input_output_pairs) - - ! training_parameters: & - ! associate( & - ! num_pairs => size(input_output_pairs), & - ! n_bins => training_configuration%mini_batches(), & - ! adam => merge(.true., .false., training_configuration%optimizer_name() == "adam"), & - ! learning_rate => training_configuration%learning_rate() & - ! ) - ! bins = [(bin_t(num_items=num_pairs, num_bins=n_bins, bin_number=b), b = 1, n_bins)] - - ! print *,"Training network" - ! print *, " Epoch Cost (avg)" - - ! call system_clock(start_training) - ! - ! train_write_and_maybe_exit: & - ! block - ! integer first_epoch - ! integer me -#if !defined(MULTI_IMAGE_SUPPORT) - ! me = this_image() + print *, "Conditionally sampling for a flat distribution of output values" + + flatten_histogram: & + block + integer i + logical occupied(args%num_bins, args%num_bins) + type(phase_space_bin_t), allocatable :: bin(:) + type(occupancy_t) occupancy +#if !defined(__flang__) + logical keepers(size(output_tensors)) + keepers = .false. +#else + logical, allocatable :: keepers(:) + allocate(keepers(size(output_tensors)), source = .false.) +#endif + + print *, "Determine the phase-space bin that holds each output tensor" + ! Determine the phase-space bin that holds each output tensor + associate(output_minima => output_map%minima(), output_maxima => output_map%maxima()) + bin = [(phase_space_bin_t(output_tensors(i), output_minima, output_maxima, args%num_bins), i = 1, size(output_tensors))] + end associate + + call occupancy%vacate( dims = [( args%num_bins, i = 1, size(derivative,1))] ) + + print *, "Populate bins" + do i = 1, size(output_tensors) + if (occupancy%occupied(bin(i)%loc)) cycle + call occupancy%occupy(bin(i)%loc) + keepers(i) = .true. + end do + + print *, "Pack remaining input/output tensor pairs" + input_output_pairs = input_output_pair_t(pack(input_tensors, keepers), pack(output_tensors, keepers)) + + print '(*(a,i))' & + ," Keeping " , size(input_output_pairs, kind=int64) & + ," out of " , size(output_tensors, kind=int64) & + ," input/output pairs in ", occupancy%num_occupied() & + ," out of " , occupancy%num_bins() & + ," bins." + + end block flatten_histogram + + print *,"Normalizing the remaining input and output tensors" + input_output_pairs = trainable_network%map_to_training_ranges(input_output_pairs) + + training_parameters: & + associate( & + num_pairs => size(input_output_pairs), & + n_bins => training_configuration%mini_batches(), & + adam => merge(.true., .false., training_configuration%optimizer_name() == "adam"), & + learning_rate => training_configuration%learning_rate() & + ) + bins = [(bin_t(num_items=num_pairs, num_bins=n_bins, bin_number=b), b = 1, n_bins)] + + print *,"Training network" + print *, " Epoch Cost (avg)" + + call system_clock(start_training) + + train_write_and_maybe_exit: & + block + integer first_epoch + integer me +#if defined(FIATS_MULTI_IMAGE_SUPPORT) + me = this_image() #else - ! me = 1 + me = 1 #endif - ! if (me==1) first_epoch = plot_file%previous_epoch + 1 -#if !defined(MULTI_IMAGE_SUPPORT) - ! call co_broadcast(first_epoch, source_image=1) + if (me==1) first_epoch = plot_file%previous_epoch + 1 +#if defined(FIATS_MULTI_IMAGE_SUPPORT) + call co_broadcast(first_epoch, source_image=1) #endif - ! last_epoch: & - ! associate(last_epoch => first_epoch + args%num_epochs - 1) - ! epochs: & - ! do epoch = first_epoch, last_epoch - - ! if (size(bins)>1) call shuffle(input_output_pairs) ! set up for stochastic gradient descent - ! mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] - - ! call trainable_network%train(mini_batches, cost, adam, learning_rate) - - ! average_cost: & - ! associate(average_cost => sum(cost)/size(cost)) - ! converged: & - ! associate(converged => average_cost <= args%cost_tolerance) - - ! image_1_maybe_writes: & - ! if (me==1 .and. any([converged, epoch==[first_epoch,last_epoch], mod(epoch,args%report_step)==0])) then - - ! !print '(*(g0,4x))', epoch, average_cost - ! write(plot_file%plot_unit,'(*(g0,4x))') epoch, average_cost - - ! associate(json_file => trainable_network%to_json()) - ! call json_file%write_lines(string_t(network_file)) - ! end associate - - ! end if image_1_maybe_writes - - ! signal_convergence: & - ! if (converged) then - ! block - ! integer unit - ! open(newunit=unit, file="converged", status="unknown") ! The train.sh script detects & removes this file. - ! close(unit) - ! exit epochs - ! end block - ! end if signal_convergence - ! end associate converged - ! end associate average_cost - - ! inquire(file="stop", exist=stop_requested) - - ! graceful_exit: & - ! if (stop_requested) then - ! print *,'Shutting down because a file named "stop" was found.' - ! return - ! end if graceful_exit - - ! end do epochs - ! end associate last_epoch - ! end block train_write_and_maybe_exit - - ! end associate training_parameters + last_epoch: & + associate(last_epoch => first_epoch + args%num_epochs - 1) + epochs: & + do epoch = first_epoch, last_epoch + + if (size(bins)>1) call shuffle(input_output_pairs) ! set up for stochastic gradient descent + mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))] + + call trainable_network%train(mini_batches, cost, adam, learning_rate) + + average_cost: & + associate(average_cost => sum(cost)/size(cost)) + converged: & + associate(converged => average_cost <= args%cost_tolerance) + + image_1_maybe_writes: & + if (me==1 .and. any([converged, epoch==[first_epoch,last_epoch], mod(epoch,args%report_step)==0])) then + + !print '(*(g0,4x))', epoch, average_cost + write(plot_file%plot_unit,'(*(g0,4x))') epoch, average_cost + + associate(json_file => trainable_network%to_json()) + call json_file%write_lines(string_t(network_file)) + end associate + + end if image_1_maybe_writes + + signal_convergence: & + if (converged) then + block + integer unit + open(newunit=unit, file="converged", status="unknown") ! The train.sh script detects & removes this file. + close(unit) + exit epochs + end block + end if signal_convergence + end associate converged + end associate average_cost + + inquire(file="stop", exist=stop_requested) + + graceful_exit: & + if (stop_requested) then + print *,'Shutting down because a file named "stop" was found.' + return + end if graceful_exit + + end do epochs + end associate last_epoch + end block train_write_and_maybe_exit + + end associate training_parameters end associate output_map_and_network_file call system_clock(finish_training) + print *,"Training time: ", real(finish_training - start_training, real64)/real(clock_rate, real64),"for", & args%num_epochs,"epochs" close(plot_file%plot_unit) diff --git a/demo/training_data_files.json b/demo/training_data_files.json index baff97882..d88ffee0b 100644 --- a/demo/training_data_files.json +++ b/demo/training_data_files.json @@ -3,6 +3,6 @@ "path" : "fiats-training-data", "inputs prefix" : "training_input-image-_", "outputs prefix" : "training_output-image-_", - "infixes" : ["0", "50"] + "infixes" : ["450", "500"] } } diff --git a/include/language-support.F90 b/include/fiats-language-support.F90 similarity index 87% rename from include/language-support.F90 rename to include/fiats-language-support.F90 index d64f0096f..8aef911f7 100644 --- a/include/language-support.F90 +++ b/include/fiats-language-support.F90 @@ -1,6 +1,9 @@ ! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt +#ifndef FIATS_LANGUAGE_SUPPORT +#define FIATS_LANGUAGE_SUPPORT + #ifndef F2023_LOCALITY #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 202400) # define F2023_LOCALITY 1 @@ -18,3 +21,5 @@ # define MULTI_IMAGE_SUPPORT 1 #endif #endif + +#endif From 48dd99bdf5b770307849762308536d19a1eef49f Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 18:10:26 -0800 Subject: [PATCH 12/18] chore(demo/app/train-cloud-micro): rm unused vars --- demo/app/train-cloud-microphysics.F90 | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index 3cad5a8af..c824a867a 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -29,14 +29,13 @@ program train_cloud_microphysics character(len=*), parameter :: usage = new_line('') // new_line('') // & 'Usage: ' // new_line('') // new_line('') // & './build/run-fpm.sh run train-cloud-microphysics -- \' // new_line('') // & - ' --base --epochs \' // new_line('') // & - ' [--start ] [--end ] [--stride ] [--bins ] [--report ] [--tolerance ]'// & + ' --base --epochs [--bins ] [--report ] [--tolerance ] ' // new_line('') // & new_line('') // new_line('') // & 'where angular brackets denote user-provided values and square brackets denote optional arguments.' // new_line('') // & 'The presence of a file named "stop" halts execution gracefully.' // new_line('') type command_line_arguments_t - integer num_epochs, stride, num_bins, report_step + integer num_epochs, num_bins, report_step character(len=:), allocatable :: base_name real cost_tolerance end type @@ -109,15 +108,12 @@ function get_command_line_arguments() result(command_line_arguments) type(command_line_arguments_t) command_line_arguments type(command_line_t) command_line character(len=:), allocatable :: & - base_name, epochs_string, start_string, end_string, stride_string, bins_string, report_string, tolerance_string + base_name, epochs_string, bins_string, report_string, tolerance_string real cost_tolerance - integer num_epochs, num_bins, stride, report_step + integer num_epochs, num_bins, report_step base_name = command_line%flag_value("--base") epochs_string = command_line%flag_value("--epochs") - start_string = command_line%flag_value("--start") - end_string = command_line%flag_value("--end") - stride_string = command_line%flag_value("--stride") bins_string = command_line%flag_value("--bins") report_string = command_line%flag_value("--report") tolerance_string = command_line%flag_value("--tolerance") @@ -128,12 +124,11 @@ function get_command_line_arguments() result(command_line_arguments) read(epochs_string,*) num_epochs - stride = default_or_internal_read(1, stride_string) report_step = default_or_internal_read(1, report_string) num_bins = default_or_internal_read(3, bins_string) cost_tolerance = default_or_internal_read(5E-8, tolerance_string) - command_line_arguments = command_line_arguments_t(num_epochs, stride, num_bins, report_step, base_name, cost_tolerance) + command_line_arguments = command_line_arguments_t(num_epochs, num_bins, report_step, base_name, cost_tolerance) end function get_command_line_arguments From c93a52b73bccb2faf80c65782a8e141f8af873ea Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 21:03:45 -0800 Subject: [PATCH 13/18] chore(train.sh): error-checking, add defaults With this commit, the demo/train.sh training script 1. Exits if any simple command returns a non-zero exit code, 2. Uses default values for the starting and ending number of bins, 3. Makes the executable path/name a variable with a default, and 4. Removes unused arguments form the training proggram launch line. --- demo/train.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/demo/train.sh b/demo/train.sh index 3dffe24cf..8dffa9c22 100755 --- a/demo/train.sh +++ b/demo/train.sh @@ -1,6 +1,11 @@ #!/bin/bash -min_bins=$1 -max_bins=$2 + +set -e # exit if any simple command returns a non-zero exit code + +min_bins=${1:-3} +max_bins=${2:-4} +executable=${3:-"train-cloud-microphysics"} + let subfloor=$min_bins-1 j=subfloor while (( j++ < max_bins )); do @@ -19,7 +24,7 @@ while (( j++ < max_bins )); do echo "" echo "---------> Run $i <---------" - ./train-cloud-microphysics --base training --epochs 1000000 --bins $j --report 1000 --start 360 --stride 10 --tolerance "5.0E-08" + ./"$executable" --base fiats-training-data/training --epochs 10000 --bins $j --report 1000 --tolerance "5.0E-04" if [ -f converged ]; then echo "" From 510e9d7084e20239886c723eacd971d83cfe2bee Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 21:07:24 -0800 Subject: [PATCH 14/18] feat(train-cloud-micro): print epoch, cost func --- demo/app/train-cloud-microphysics.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/app/train-cloud-microphysics.F90 b/demo/app/train-cloud-microphysics.F90 index c824a867a..e4c026ea6 100644 --- a/demo/app/train-cloud-microphysics.F90 +++ b/demo/app/train-cloud-microphysics.F90 @@ -387,7 +387,7 @@ subroutine read_train_write(training_configuration, training_data_files, args, p image_1_maybe_writes: & if (me==1 .and. any([converged, epoch==[first_epoch,last_epoch], mod(epoch,args%report_step)==0])) then - !print '(*(g0,4x))', epoch, average_cost + print '(*(g0,4x))', epoch, average_cost write(plot_file%plot_unit,'(*(g0,4x))') epoch, average_cost associate(json_file => trainable_network%to_json()) From bbb270bc3644115aad27a16dbae16808fcceb3ce Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 21:11:36 -0800 Subject: [PATCH 15/18] build(fpm): update to julienne 3.6.1 This update fixes an issue with demo/app/train-cloud-microphysics that occured when an attempt is made to open an already open file. --- demo/fpm.toml | 2 +- fpm.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/fpm.toml b/demo/fpm.toml index f0a87445d..10a9fd64d 100644 --- a/demo/fpm.toml +++ b/demo/fpm.toml @@ -1,6 +1,6 @@ name = "Fiats-Demonstration-Applications" [dependencies] -julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.6.0"} +julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.6.1"} fiats = {path = "../"} netcdf-interfaces = {git = "https://github.com/berkeleylab/netcdf-interfaces.git", rev = "d2bbb71ac52b4e346b62572b1ca1620134481096"} diff --git a/fpm.toml b/fpm.toml index 4e01eb279..920a17431 100644 --- a/fpm.toml +++ b/fpm.toml @@ -1,3 +1,3 @@ name = "fiats" [dependencies] -julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.6.0"} +julienne = {git = "https://github.com/berkeleylab/julienne", tag = "3.6.1"} From 64909422afaf661fef0e9722f50ff0c7125a2ded Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 21:12:54 -0800 Subject: [PATCH 16/18] chore(example): use .F90 for prepropocessing --- ...{saturated_mixing_ratio_m.f90 => saturated_mixing_ratio_m.F90} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename example/supporting-modules/{saturated_mixing_ratio_m.f90 => saturated_mixing_ratio_m.F90} (100%) diff --git a/example/supporting-modules/saturated_mixing_ratio_m.f90 b/example/supporting-modules/saturated_mixing_ratio_m.F90 similarity index 100% rename from example/supporting-modules/saturated_mixing_ratio_m.f90 rename to example/supporting-modules/saturated_mixing_ratio_m.F90 From 0735acea6b0733b654f53a2f352ebcbff1b1e107 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 21:17:02 -0800 Subject: [PATCH 17/18] fix(example): import required Julienne operators --- example/supporting-modules/saturated_mixing_ratio_m.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/supporting-modules/saturated_mixing_ratio_m.F90 b/example/supporting-modules/saturated_mixing_ratio_m.F90 index a0ac2e90a..3468ee8dd 100644 --- a/example/supporting-modules/saturated_mixing_ratio_m.F90 +++ b/example/supporting-modules/saturated_mixing_ratio_m.F90 @@ -31,7 +31,7 @@ module saturated_mixing_ratio_m !! in the Intermediate Complexity Atmospheric Research (ICAR) model file src/physics/mp_simple.f90. !! ICAR is distributed under the above MIT license. See https://github.com/ncar/icar. use fiats_m, only : tensor_t - use julienne_m, only : call_julienne_assert_, operator(.equalsExpected.) + use julienne_m, only : call_julienne_assert_, operator(.also.), operator(.equalsExpected.), operator(//) implicit none private @@ -75,7 +75,7 @@ elemental function y(x_in) result(a) type(tensor_t), intent(in) :: x_in type(tensor_t) a associate(x => x_in%values()) - call_julienne_assert((lbound(x,1) .equalsExpected. 1) .also. (ubound(x,1) .equalsExpected. 2,"y(x) :: sufficient input")) + call_julienne_assert((lbound(x,1) .equalsExpected. 1) .also. ((ubound(x,1) .equalsExpected. 2) // "y(x) :: sufficient input")) a = tensor_t([saturated_mixing_ratio(x(1),x(2))]) end associate end function From 1d5f0d16aa7ae1cf4765f7c81c0db75b575e56a0 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 2 Feb 2026 21:26:58 -0800 Subject: [PATCH 18/18] fix(train.sh): reset parameters, check executable --- demo/train.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/demo/train.sh b/demo/train.sh index 8dffa9c22..c28df20cd 100755 --- a/demo/train.sh +++ b/demo/train.sh @@ -6,6 +6,11 @@ min_bins=${1:-3} max_bins=${2:-4} executable=${3:-"train-cloud-microphysics"} +if [ ! -x $executable ]; then + printf "\n $executable not found or not executable. Search ./build and, if you find $executable, create a soft link to it in this directory.\n\n" + exit 1 +fi + let subfloor=$min_bins-1 j=subfloor while (( j++ < max_bins )); do @@ -24,7 +29,7 @@ while (( j++ < max_bins )); do echo "" echo "---------> Run $i <---------" - ./"$executable" --base fiats-training-data/training --epochs 10000 --bins $j --report 1000 --tolerance "5.0E-04" + ./"$executable" --base fiats-training-data/training --epochs 1000000 --bins $j --report 1000 --tolerance "5.0E-08" if [ -f converged ]; then echo ""