Open
Description
I'm trying to implement auto_ts on my multivariate time series data using Lstm , while fitting it is giving the following error.
Data is in this format
== Status ==
Memory usage on this node: 2.7/12.7 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/6.79 GiB heap, 0.0/2.34 GiB objects
Current best trial: 19925_00005 with mse=0.15181996493457617 and parameters={'hidden_dim': 64, 'layer_num': 2, 'lr': 0.0010343663029423226, 'dropout': 0.09671240437800133, 'input_feature_num': None, 'output_feature_num': 1, 'past_seq_len': 4, 'future_seq_len': 1, 'selected_features': ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'MINUTE', 'DAY', 'DAYOFYEAR', 'HOUR', 'WEEKDAY', 'WEEKOFYEAR', 'MONTH', 'YEAR', 'IS_AWAKE', 'IS_BUSY_HOURS', 'IS_WEEKEND'], 'batch_size': 32}
Result logdir: /tmp/autots_estimator/autots_estimator
Number of trials: 6/6 (1 ERROR, 5 TERMINATED)
Number of errored trials: 1
Memory usage on this node: 2.7/12.7 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/4 CPUs, 0/0 GPUs, 0.0/6.79 GiB heap, 0.0/2.34 GiB objects
Current best trial: 19925_00005 with mse=0.15181996493457617 and parameters={'hidden_dim': 64, 'layer_num': 2, 'lr': 0.0010343663029423226, 'dropout': 0.09671240437800133, 'input_feature_num': None, 'output_feature_num': 1, 'past_seq_len': 4, 'future_seq_len': 1, 'selected_features': ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'MINUTE', 'DAY', 'DAYOFYEAR', 'HOUR', 'WEEKDAY', 'WEEKOFYEAR', 'MONTH', 'YEAR', 'IS_AWAKE', 'IS_BUSY_HOURS', 'IS_WEEKEND'], 'batch_size': 32}
Result logdir: /tmp/autots_estimator/autots_estimator
Number of trials: 6/6 (1 ERROR, 5 TERMINATED)
Number of errored trials: 1
Trial name | # failures | error file |
---|---|---|
train_func_19925_00004 | 1 | /tmp/autots_estimator/autots_estimator/train_func_19925_00004/error.txt |
---------------------------------------------------------------------------
TuneError Traceback (most recent call last)
<ipython-input-17-c1017f49fcaa> in <module>()
2 ts_pipeline = auto_estimator.fit(data=tsdata_train, # train dataset
3 validation_data=tsdata_val, # validation dataset
----> 4 epochs=5) # number of epochs to train in each trial
4 frames
/usr/local/lib/python3.7/dist-packages/zoo/chronos/autots/autotsestimator.py in fit(self, data, epochs, batch_size, validation_data, metric_threshold, n_sampling, search_alg, search_alg_params, scheduler, scheduler_params)
246 search_alg_params=search_alg_params,
247 scheduler=scheduler,
--> 248 scheduler_params=scheduler_params
249 )
250
/usr/local/lib/python3.7/dist-packages/zoo/chronos/autots/model/base_automodel.py in fit(self, data, epochs, batch_size, validation_data, metric_threshold, n_sampling, search_alg, search_alg_params, scheduler, scheduler_params)
77 search_alg_params=search_alg_params,
78 scheduler=scheduler,
---> 79 scheduler_params=scheduler_params,
80 )
81 self.best_model = self.auto_est._get_best_automl_model()
/usr/local/lib/python3.7/dist-packages/zoo/orca/automl/auto_estimator.py in fit(self, data, epochs, validation_data, metric, metric_mode, metric_threshold, n_sampling, search_space, search_alg, search_alg_params, scheduler, scheduler_params)
193 scheduler=scheduler,
194 scheduler_params=scheduler_params)
--> 195 self.searcher.run()
196 self._fitted = True
197
/usr/local/lib/python3.7/dist-packages/zoo/orca/automl/search/ray_tune/ray_tune_search_engine.py in run(self)
181 resources_per_trial=self.resources_per_trial,
182 verbose=1,
--> 183 reuse_actors=True
184 )
185 self.trials = analysis.trials
/usr/local/lib/python3.7/dist-packages/ray/tune/tune.py in run(run_or_experiment, name, metric, mode, stop, time_budget_s, config, resources_per_trial, num_samples, local_dir, search_alg, scheduler, keep_checkpoints_num, checkpoint_score_attr, checkpoint_freq, checkpoint_at_end, verbose, progress_reporter, log_to_file, trial_name_creator, trial_dirname_creator, sync_config, export_formats, max_failures, fail_fast, restore, server_port, resume, queue_trials, reuse_actors, trial_executor, raise_on_failed_trial, callbacks, loggers, ray_auto_init, run_errored_only, global_checkpoint_period, with_server, upload_dir, sync_to_cloud, sync_to_driver, sync_on_checkpoint)
442 if incomplete_trials:
443 if raise_on_failed_trial:
--> 444 raise TuneError("Trials did not complete", incomplete_trials)
445 else:
446 logger.error("Trials did not complete: %s", incomplete_trials)
TuneError: ('Trials did not complete', [train_func_19925_00004])
Metadata
Metadata
Assignees
Labels
No labels