1 TensorFlow #23 (Time-Series Prediction) Magnus Erik Hvass Pedersen ( / GitHub ( Labs/TensorFlow-Tutorials) / Videos on YouTube ( list=pl9hr9snujfsmeu1zniy0xphszl5uihcxz) /. RNN (Recurrent Neural Network). TensorFlow Keras, #01 #03-C, #20 RNN ~2018 Denmark ( : Aalborg ( The Hunter Corps (Jægerkorps) ( Aarhus ( C++ - ( Google V8 JavaScript Engine ( Esbjerg ( Odense ( _H. C. Andersen ( Roskilde (

2 :

3 : 5 24 "Odense" ( 2 ). RNN (Recurrent Neural Network). 5, (8 = 24 x 7 X 8)., 3.

4 Imports In [2]: %matplotlib inline import matplotlib.pyplot as plt import tensorflow as tf import numpy as np import pandas as pd import os from sklearn.preprocessing import MinMaxScaler Keras Import. In [3]: # from tf.keras.models import Sequential # This does not work! from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers import Input, Dense, GRU, Embeddin g from tensorflow.python.keras.optimizers import RMSprop from tensorflow.python.keras.callbacks import EarlyStopping, ModelChec kpoint, TensorBoard, ReduceLROnPlateau

5 Python 3.6 (Anaconda) : In [4]: Out[4]: tf. version '1.4.0' In [5]: Out[5]: tf.keras. version '2.0.8-tf' In [6]: Out[6]: pd. version '0.20.3'. [National Climatic Data Center (NCDC), USA] ( ( )..... In [9]: import weather.. 35MB. In [10]: weather.maybe_download_and_extract() - Download progress: 100.0% Download finished. Extracting files. Done.. In [11]: Out[11]: cities = weather.cities cities ['Aalborg', 'Aarhus', 'Esbjerg', 'Odense', 'Roskilde']

6 , In [13]: %%time df = weather.load_resampled_data() CPU times: user 16.3 ms, sys: 24.2 ms, total: 40.5 ms Wall time: 39.1 ms. In [14]: df.head() Out[14]: Aalborg Aarhus Temp Pressure WindSpeed WindDir Temp Pressure WindSp DateTime :00: :00: :00: :00: :00:

7 Esbjerg Roskilde.,...,.,,.. In [15]: Out[15]: df['esbjerg']['pressure'].plot() <matplotlib.axes._subplots.axessubplot at 0x1a37a60080>

8 In [16]: Out[16]: df['roskilde']['pressure'].plot() <matplotlib.axes._subplots.axessubplot at 0x1a406b2240> 20. In [17]: df.values.shape Out[17]: (333109, 20). In [18]: df.drop(('esbjerg', 'Pressure'), axis=1, inplace=true) df.drop(('roskilde', 'Pressure'), axis=1, inplace=true) 18. In [19]: df.values.shape Out[19]: (333109, 18).

9 In [20]: Out[20]: df.head(1) Aalborg Temp Pressure DateTime Aarhus WindSpeed WindDir Temp Pressure WindSpeed Wind :00: Odense 50., ,. In [21]: Out[21]: df['odense']['temp'][' ':' '].plot() <matplotlib.axes._subplots.axessubplot at 0x1a47c09438> , 50.

10 In [22]: Out[22]: df['aarhus']['temp'][' ':' '].plot() <matplotlib.axes._subplots.axessubplot at 0x1a44e41240> In [23]: Out[23]: df['roskilde']['temp'][' ':' '].plot() <matplotlib.axes._subplots.axessubplot at 0x1a44e31b38>

11 . 10.,,... (1 ~ 366) (0 ~ 23). In [23]: df['various', 'Day'] = df.index.dayofyear df['various', 'Hour'] = df.index.hour. In [24]: target_city = 'Odense'. In [26]: target_names = ['Temp', 'WindSpeed', 'Pressure'] * 24. In [27]: shift_days = 1 shift_steps = shift_days * 24 # Number of hours..! In [28]: df_targets = df[target_city][target_names].shift(-shift_steps)

12 !!!. Pandas.. shift_steps + 5. In [29]: df[target_city][target_names].head(shift_steps + 5) Out[29]: Temp WindSpeed Pressure DateTime :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00: :00:

13 :00: :00: :00: :00: :00: :00: :00: In [30]: Out[30]: df_targets.head(5) DateTime Temp WindSpeed Pressure :00: :00: :00: :00: :00: , NaN( ).. In [31]: df_targets.tail() Out[31]: Temp WindSpeed Pressure DateTime :00:00 NaN NaN NaN :00:00 NaN NaN NaN :00:00 NaN NaN NaN :00:00 NaN NaN NaN :00:00 NaN NaN NaN

14 NumPy Pandas NumPy. NaN numpy. : In [32]: x_data = df.values[0:-shift_steps] In [33]: print(type(x_data)) print("shape:", x_data.shape) <class 'numpy.ndarray'> Shape: (333085, 18) ( ) : In [34]: y_data = df_targets.values[:-shift_steps] In [35]: print(type(y_data)) print("shape:", y_data.shape) <class 'numpy.ndarray'> Shape: (333085, 3) ( ) : In [38]: num_data = len(x_data) num_data Out[38]: : In [39]: train_split = 0.9 :

15 In [40]: num_train = int(train_split * num_data) num_train Out[40]: : In [41]: num_test = num_data - num_train num_test Out[41]: : In [42]: x_train = x_data[0:num_train] x_test = x_data[num_train:] len(x_train) + len(x_test) Out[42]: : In [43]: y_train = y_data[0:num_train] y_test = y_data[num_train:] len(y_train) + len(y_test) Out[43]: : In [44]: num_x_signals = x_data.shape[1] num_x_signals Out[44]: 18 : In [45]: num_y_signals = y_data.shape[1] num_y_signals Out[45]: 3

16 : In [46]: print("min:", np.min(x_train)) print("max:", np.max(x_train)) Min: Max: scikit-learn.. In [47]: x_scaler = MinMaxScaler(). In [48]: x_train_scaled = x_scaler.fit_transform(x_train), 0 1. In [49]: print("min:", np.min(x_train_scaled)) print("max:", np.max(x_train_scaled)) Min: 0.0 Max: 1.0. In [50]: x_test_scaled = x_scaler.transform(x_test)..,. In [51]: y_scaler = MinMaxScaler() y_train_scaled = y_scaler.fit_transform(y_train) y_test_scaled = y_scaler.transform(y_test)

17 2 numpy ,000. : In [52]: print(x_train_scaled.shape) print(y_train_scaled.shape) (299776, 18) (299776, 3) 300k RNN. In [54]: def batch_generator(batch_size, sequence_length): """ Generator function for creating random batches of training-data. """ # Infinite loop. while True: # Allocate a new array for the batch of input-signals. x_shape = (batch_size, sequence_length, num_x_signals) x_batch = np.zeros(shape=x_shape, dtype=np.float16) # Allocate a new array for the batch of output-signals. y_shape = (batch_size, sequence_length, num_y_signals) y_batch = np.zeros(shape=y_shape, dtype=np.float16) # Fill the batch with random sequences of data. for i in range(batch_size): # Get a random start-index. # This points somewhere into the training-data. idx = np.random.randint(num_train - sequence_length) # Copy the sequences of data starting at this index. x_batch[i] = x_train_scaled[idx:idx+sequence_length] y_batch[i] = y_train_scaled[idx:idx+sequence_length] yield (x_batch, y_batch) GPU 100%. GPU, RAM 'sequence_length'.

18 In [55]: batch_size = sequence-length x 7 24 x 7 x 8 8. In [56]: sequence_length = 24 * 7 * 8 sequence_length Out[56]: In [57]: generator = batch_generator(batch_size=batch_size, sequence_length=sequence_length). In [58]: x_batch, y_batch = next(generator) In [52]: print(x_batch.shape) print(y_batch.shape) (256, 1344, 20) (256, 1344, 3) 20.

19 In [59]: batch = 0 # First sequence in the batch. signal = 0 # First signal from the 20 input-signals. seq = x_batch[batch, :, signal] plt.plot(seq) Out[59]: [<matplotlib.lines.line2d at 0x1a3b942470>]. 20. In [60]: Out[60]: seq = y_batch[batch, :, signal] plt.plot(seq) [<matplotlib.lines.line2d at 0x1a3c2f9978>]

20 .,.... In [61]: validation_data = (np.expand_dims(x_test_scaled, axis=0), np.expand_dims(y_test_scaled, axis=0)) (RNN) RNN (Recurrent Neural Network). Keras API Keras. Keras #03-C #20. In [62]: model = Sequential() Gated Recurrent Unit (GRU) Keras (None ).(num_x_signals). In [63]: model.add(gru(units=512, return_sequences=true, input_shape=(none, num_x_signals,))) GRU ( dense) Sigmoid In [64]: model.add(dense(num_y_signals, activation='sigmoid'))

21 ., , NaN.. In [65]: if False: from tensorflow.python.keras.initializers import RandomUniform # Maybe use lower init-ranges. init = RandomUniform(minval=-0.05, maxval=0.05) model.add(dense(num_y_signals, activation='linear', kernel_initializer=init)) (Loss Function) (MSE) " ",. In [66]: warmup_steps = 50

22 In [67]: def loss_mse_warmup(y_true, y_pred): """ Calculate the Mean Squared Error between y_true and y_pred, but ignore the beginning "warmup" part of the sequences. y_true is the desired output. y_pred is the model's output. """ # The shape of both input tensors are: # [batch_size, sequence_length, num_y_signals]. # Ignore the "warmup" parts of the sequences # by taking slices of the tensors. y_true_slice = y_true[:, warmup_steps:, :] y_pred_slice = y_pred[:, warmup_steps:, :] # These sliced tensors both have this shape: # [batch_size, sequence_length - warmup_steps, num_y_signals] # Calculate the MSE loss for each value in these tensors. # This outputs a 3-rank tensor of the same shape. loss = tf.losses.mean_squared_error(labels=y_true_slice, predictions=y_pred_slice) # Keras may reduce this across the first axis (the batch) # but the semantics are unclear, so to be sure we use # the loss across the entire tensor, we reduce it to a # single scalar with the mean function. loss_mean = tf.reduce_mean(loss) return loss_mean. In [68]: optimizer = RMSprop(lr=1e-3) Keras. In [69]: model.compile(loss=loss_mse_warmup, optimizer=optimizer). (None, None, 3), 3. 3.

23 In [70]: model.summary() Layer (type) Output Shape Param # ================================================================= gru_1 (GRU) (None, None, 512) dense_1 (Dense) (None, None, 3) 1539 ================================================================= Total params: 817,155 Trainable params: 817,155 Non-trainable params: 0 Callback Functions TensorBoard Keras.. In [71]: path_checkpoint = '23_checkpoint.keras' callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint, monitor='val_loss', verbose=1, save_weights_only=true, save_best_only=true). In [72]: callback_early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1) TensorBoard. In [73]: callback_tensorboard = TensorBoard(log_dir='./23_logs/', histogram_freq=0, write_graph=false) (patience = 0 ). facgtor. 1e e-4..

24 In [74]: callback_reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-4, patience=0, verbose=1) In [75]: callbacks = [callback_early_stopping, callback_checkpoint, callback_tensorboard, callback_reduce_lr]. " ". steps_per_epoch "epoch". GTX 1070 " " NaN..,,,.. In [ ]: %%time model.fit_generator(generator=generator, epochs=20, steps_per_epoch=100, validation_data=validation_data, callbacks=callbacks) Epoch 1/20 22/100 [=====>...] - ETA: 2722s - loss: (early-stopping)...

25 In [70]: try: model.load_weights(path_checkpoint) except Exception as error: print("error trying to load checkpoint.") print(error). (batch),. In [71]: result = model.evaluate(x=np.expand_dims(x_test_scaled, axis=0), y=np.expand_dims(y_test_scaled, axis=0)) 1/1 [==============================]1/1 [=========================== ===] - 4s 4s/step In [72]: print("loss (test-set):", result) loss (test-set): In [1]: #. if False: for res, metric in zip(result, model.metrics_names): print("{0}: {1:.3e}".format(metric, res)). In [74]: def plot_comparison(start_idx, length=100, train=true): """ Plot the predicted and true output-signals. :param start_idx: Start-index for the time-series. :param length: Sequence-length to process and plot. :param train: Boolean whether to use training- or test-set. """ if train: # Use training-data. x = x_train_scaled y_true = y_train else: # Use test-data. x = x_test_scaled

26 y_true = y_test # End-index for the sequences. end_idx = start_idx + length # Select the sequences from the given start-index and # of the given length. x = x[start_idx:end_idx] y_true = y_true[start_idx:end_idx] # Input-signals for the model. x = np.expand_dims(x, axis=0) # Use the model to predict the output-signals. y_pred = model.predict(x) # The output of the model is between 0 and 1. # Do an inverse map to get it back to the scale # of the original data-set. y_pred_rescaled = y_scaler.inverse_transform(y_pred[0]) # For each output-signal. for signal in range(len(target_names)): # Get the output-signal predicted by the model. signal_pred = y_pred_rescaled[:, signal] # Get the true output-signal from the data-set. signal_true = y_true[:, signal] # Make the plotting-canvas bigger. plt.figure(figsize=(15,5)) # Plot and compare the two signals. plt.plot(signal_true, label='true') plt.plot(signal_pred, label='pred') ) # Plot grey box for warmup-period. p = plt.axvspan(0, warmup_steps, facecolor='black', alpha=0.15 # Plot labels etc. plt.ylabel(target_names[signal]) plt.legend() plt.show()

27 , shift_steps. x , " ". 50 " ". " "... In [75]: plot_comparison(start_idx=100000, length=1000, train=true)

28 ..... ( 42 )....

29 In [76]: plot_comparison(start_idx=200000, length=1000, train=true).

30 In [77]: Out[77]: df['odense']['temp'][200000: ].plot() <matplotlib.axes._subplots.axessubplot at 0x7f69f54d37f0>... In [78]: Out[78]: df_org = weather.load_original_data() df_org.xs('odense')['temp'][' ':' '].plot() <matplotlib.axes._subplots.axessubplot at 0x7f69db165860>

31 ..,..... In [79]: plot_comparison(start_idx=200, length=1000, train=false)

32 RNN(Recurrent Neural Network) ,.... TensorFlow. TensorFlow...?.?., GRU,,. #19.. "Odense".. 3 7??, 1, 3?.

33 (MIT) (c) 2018 [Magnus Erik Hvass Pedersen] ( ( ( " ").,,.., (, ) " ".,.


