Data Driven Modeling

(Theme of this semester: CODING AS LITERACY)


PhD seminar series at Chair for Computer Aided Architectural Design (CAAD), ETH Zurich

Vahid Moosavi


17th Session


02 May 2017

Recurrent Neural Nets and Dynamical Systems

To be discussed

  • Dynamical Systems
    • Weather
    • Traffic
    • Energy Networks
    • Water Flow
    • Wind
    • Stock Market
    • Atmospheric Science (e.g. tele-connection)
  • Some Experiments


Unrolled RNN

  • To linearize in time: Multiple copies of the same cell

source: http://colah.github.io/posts/2015-08-Understa/nding-LSTMs/

In [1]:
import warnings
warnings.filterwarnings("ignore")
import datetime
import pandas as pd
# import pandas.io.data
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import sys
import sompylib.sompy as SOM# from pandas import Series, DataFrame

from ipywidgets import interact, HTML, FloatSlider
import tensorflow as tf
%matplotlib inline

Sine Waves

In [14]:
# Two signals have the same frequencies but with a shift in time
N = 500
t = np.arange(N)
a = np.random.rand(4)*.61

x = np.sin(a[0]*(t)) #+ np.cos(a[1]*(t)) #+ np.cos(a[2]*(t))#+np.cos(a[3]*(t))+ .1*np.random.rand(N)
plt.plot(x)
Out[14]:
[<matplotlib.lines.Line2D at 0x11d104c50>]
In [15]:
N = 1000
t = np.arange(N)
Waves = []
for i in range(2000):
    
    a = np.random.rand(4)*.6
    x = np.sin(a[0]*(t)) #+ np.cos(a[1]*(t)) #+ np.cos(a[2]*(t))#+np.cos(a[3]*(t))+ .1*np.random.rand(N)
    Waves.append(x[:,np.newaxis])
Waves = np.asarray(Waves)
Waves.shape
Out[15]:
(2000, 1000, 1)
In [16]:
import random
time_lag = 20
train_test_row=1000
train_data = []
test_data= []

for r in range(train_test_row):
    for t in range(0,Waves.shape[1]-time_lag-1):
        train_data.append(Waves[r,range(t,t+time_lag+1),:])
        
train_data = np.asarray(train_data)    
    
random.shuffle(train_data)
# train_data = np.transpose(train_data,[1,0,2]) #time,batch,inputdim

for r in range(train_test_row,train_test_row+1000):
    for t in range(0,Waves.shape[1]-time_lag-1):
        test_data.append(Waves[r,range(t,t+time_lag+1),:])
        
test_data = np.asarray(test_data)    

# random.shuffle(test_data)
# test_data = np.transpose(test_data,[1,0,2]) #time,batch,inputdim
In [17]:
print train_data.shape
print test_data.shape
(979000, 21, 1)
(979000, 21, 1)
In [18]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import random
from random import shuffle
import tensorflow as tf

NUM_EXAMPLES = 20000
INPUT_SIZE    = 1       # 1 bits per timestep
RNN_HIDDEN    = 20
OUTPUT_SIZE   = 1       # 1 bit per timestep
TINY          = 1e-6    # to avoid NaNs in logs
LEARNING_RATE = 0.01

tf.reset_default_graph()

test_input = test_data[:NUM_EXAMPLES,:time_lag,:]
test_output = test_data[:NUM_EXAMPLES,time_lag,:]
train_input = train_data[:NUM_EXAMPLES,:time_lag,:]
train_output = train_data[:NUM_EXAMPLES,time_lag,:]

print "test and training data loaded"



data = tf.placeholder(tf.float32, [None, time_lag,INPUT_SIZE]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, OUTPUT_SIZE])

num_hidden = 24
num_layers=1
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32,)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)

weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))

# prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)

prediction = tf.matmul(last, weight) + bias

error = tf.reduce_sum(tf.pow(target-prediction, 2))
# error = tf.reduce_mean(error)

# cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(error)

# mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
# error = tf.reduce_mean(tf.cast(mistakes, tf.float32))

accuracy = tf.abs(1-(target -prediction)/target)*100
test and training data loaded
In [19]:
print test_input.shape,test_output.shape, train_input.shape,train_output.shape
(20000, 20, 1) (20000, 1) (20000, 20, 1) (20000, 1)
In [20]:
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)

batch_size = 100
no_of_batches = int(len(train_input)) / batch_size
epoch = 50
for i in range(epoch):
    ptr = 0
    for j in range(no_of_batches):
        inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
        ptr+=batch_size
        sess.run(minimize,{data: inp, target: out})
    if i%2 ==0:
        incorrect = sess.run(error,{data: inp, target: out})
        print "Epoch {} error: {}".format(i,incorrect*100)
Epoch 0 error: 151.137661934
Epoch 2 error: 8.03158283234
Epoch 4 error: 5.75895793736
Epoch 6 error: 3.59865501523
Epoch 8 error: 3.99956032634
Epoch 10 error: 1.65121518075
Epoch 12 error: 0.780969532207
Epoch 14 error: 0.483778258786
Epoch 16 error: 0.301916198805
Epoch 18 error: 0.314855552278
Epoch 20 error: 0.149866065476
Epoch 22 error: 0.188259780407
Epoch 24 error: 0.22558174096
Epoch 26 error: 0.343631673604
Epoch 28 error: 0.317805842496
Epoch 30 error: 0.23353160359
Epoch 32 error: 0.146339193452
Epoch 34 error: 0.135138735641
Epoch 36 error: 0.0324470922351
Epoch 38 error: 0.0912318646442
Epoch 40 error: 0.0711301923729
Epoch 42 error: 0.179471354932
Epoch 44 error: 0.362430373207
Epoch 46 error: 0.256106816232
Epoch 48 error: 0.164400832728
In [21]:
test_preds = sess.run(prediction,{data: test_input, target: test_output})
In [22]:
# for i in range(1):
plt.subplot(111)
plt.plot(test_preds[:,0],test_output[:,0],'.')
Out[22]:
[<matplotlib.lines.Line2D at 0x11b810190>]

One Step Ahead Prediction

In [23]:
fig = plt.figure(figsize=(15,5))
plt.plot(test_output[:150,0],'.-')
plt.plot(test_preds[:150,0],'or')
Out[23]:
[<matplotlib.lines.Line2D at 0x11da5d650>]

Several Step ahead prediction

Run sequenctially and replace the vectors with predictions

In [28]:
N = 1000
t = np.arange(N)
a = np.random.rand(4)*.6
test_wave = np.sin(a[0]*(t)) #+ np.cos(a[1]*(t)) #+ np.cos(a[2]*(t))#+np.cos(a[3]*(t))+ .1*np.random.rand(N)
test_wave_rnn = []
for t in range(0,test_wave.shape[0]-time_lag-1):
    test_wave_rnn.append(test_wave[range(t,t+time_lag+1)])
        
test_wave_rnn = np.asarray(test_wave_rnn)    
test_wave_rnn.shape
Out[28]:
(979, 21)
In [29]:
r = 0
inp = test_input[r:r+1]
inp = test_wave_rnn[:1,:time_lag][:,:,np.newaxis]
preds = []
for step in range(1,500):
    
    pred = sess.run(prediction,{data:inp})
    preds.append(pred[0])
    pred_len = len(preds)
    if pred_len<time_lag:
        x = test_wave_rnn[step:step+1,:time_lag]
        x[0,-pred_len:] = preds[-pred_len:]
        inp = np.asarray(x)[:,:,np.newaxis]
    else:
        x = np.asarray(preds[-time_lag:])
        inp = np.asarray(x)[np.newaxis,:,:]
preds = np.asarray(preds)
In [30]:
fig = plt.figure(figsize=(10,10))
plt.subplot(2,2,1)

plt.plot(preds[:50,0],'.-')
plt.plot(test_wave_rnn[:50,time_lag],'.-r')

# plt.plot(Waves[r:r+1,range(t+time_lag,t+time_lag+3),0].T,'.-r')
plt.grid()

Chaotic Systems

Deterministic but Unpredictable!

e.g. Lorenz curve

In [6]:
from IPython.display import HTML
HTML("""
<video width="600" height="400" controls>
  <source src="files/Images/lorentz_attractor.mp4" type="video/mp4">
</video>
""")
Out[6]:

Now Let's generate some trajectories

In [3]:
#Code from: https://jakevdp.github.io/blog/2013/02/16/animating-the-lorentz-system-in-3d/

import numpy as np
from scipy import integrate

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import cnames
from matplotlib import animation
%matplotlib inline
N_trajectories = 2000


#dx/dt = sigma(y-x)
#dy/dt = x(rho-z)-y
#dz/dt = xy-beta*z

def lorentz_deriv((x, y, z), t0, sigma=10., beta=8./3, rho=28.0):
    """Compute the time-derivative of a Lorentz system."""
    return [sigma * (y - x), x * (rho - z) - y, x * y - beta * z]


# Choose random starting points, uniformly distributed from -15 to 15
np.random.seed(1)
x0 = -15 + 30 * np.random.random((N_trajectories, 3))

# Solve for the trajectories
t = np.linspace(0, 20, 2000)
x_t = np.asarray([integrate.odeint(lorentz_deriv, x0i, t)
                  for x0i in x0])
In [4]:
x_t.shape
Out[4]:
(2000, 2000, 3)
In [5]:
Data = x_t[:,range(0,2000,6)]
Data.shape
Out[5]:
(2000, 334, 3)
In [6]:
# No regularity in the behavior 
# The effect of initial value
figure =plt.figure(figsize=(10,10))
for i in range(20):
    plt.subplot(5,4,i+1);
    plt.plot(Data[i,:,1]);
    plt.xlabel('time')
    plt.ylabel('x')
plt.tight_layout();
In [7]:
import random
time_lag = 1
train_test_row=1000
train_data = []
test_data= []

for r in range(train_test_row):
    for t in range(0,Data.shape[1]-time_lag-1):
        train_data.append(Data[r,range(t,t+time_lag+1),:])
        
train_data = np.asarray(train_data)    
    
random.shuffle(train_data)
# train_data = np.transpose(train_data,[1,0,2]) #time,batch,inputdim

for r in range(train_test_row,train_test_row+1000):
    for t in range(0,Data.shape[1]-time_lag-1):
        test_data.append(Data[r,range(t,t+time_lag+1),:])
        

# We don't need to shuffle it and later easily will use it for predictions        
test_data = np.asarray(test_data)    
In [8]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import random
from random import shuffle
import tensorflow as tf
tf.reset_default_graph()

NUM_EXAMPLES = 20000
INPUT_SIZE    = 3       # 2 bits per timestep
RNN_HIDDEN    = 20
OUTPUT_SIZE   = 3       # 1 bit per timestep
TINY          = 1e-6    # to avoid NaNs in logs
LEARNING_RATE = 0.01



test_input = test_data[:NUM_EXAMPLES,:time_lag,:]
test_output = test_data[:NUM_EXAMPLES,time_lag,:]
train_input = train_data[:NUM_EXAMPLES,:time_lag,:]
train_output = train_data[:NUM_EXAMPLES,time_lag,:]

print "test and training data loaded"



data = tf.placeholder(tf.float32, [None, time_lag,INPUT_SIZE]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, OUTPUT_SIZE])

num_hidden = 24
num_layers=2
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)

weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))

# prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)

prediction = tf.matmul(last, weight) + bias

error = tf.reduce_sum(tf.pow(target-prediction, 2))
# error = tf.reduce_mean(error)

# cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(error)

# mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
# error = tf.reduce_mean(tf.cast(mistakes, tf.float32))

accuracy = tf.abs(1-(target -prediction)/target)*100
test and training data loaded
In [9]:
print test_input.shape,test_output.shape, train_input.shape,train_output.shape
(20000, 1, 3) (20000, 3) (20000, 1, 3) (20000, 3)
In [10]:
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)

batch_size = 200
no_of_batches = int(len(train_input)) / batch_size
epoch = 100
for i in range(epoch):
    ptr = 0
    for j in range(no_of_batches):
        inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
        ptr+=batch_size
        sess.run(minimize,{data: inp, target: out})
    if i%3 ==0:
        incorrect = sess.run(error,{data: inp, target: out})
        print "Epoch {} error: {}".format(i,incorrect*100)
        
Epoch 0 error: 11489012.5
Epoch 3 error: 2854307.03125
Epoch 6 error: 1628328.22266
Epoch 9 error: 959440.625
Epoch 12 error: 607786.425781
Epoch 15 error: 404043.62793
Epoch 18 error: 279811.938477
Epoch 21 error: 201083.483887
Epoch 24 error: 148937.82959
Epoch 27 error: 111654.541016
Epoch 30 error: 83870.2148438
Epoch 33 error: 63246.661377
Epoch 36 error: 48217.4102783
Epoch 39 error: 37168.8842773
Epoch 42 error: 29102.8045654
Epoch 45 error: 23247.1557617
Epoch 48 error: 18867.8787231
Epoch 51 error: 15487.5152588
Epoch 54 error: 12921.7697144
Epoch 57 error: 11046.9238281
Epoch 60 error: 9603.9276123
Epoch 63 error: 8395.11413574
Epoch 66 error: 7303.97186279
Epoch 69 error: 6400.340271
Epoch 72 error: 5572.66235352
Epoch 75 error: 4741.63131714
Epoch 78 error: 3892.06161499
Epoch 81 error: 3123.98872375
Epoch 84 error: 2584.26246643
Epoch 87 error: 2203.18202972
Epoch 90 error: 1926.78127289
Epoch 93 error: 1687.9196167
Epoch 96 error: 1515.82260132
Epoch 99 error: 1395.79143524
In [11]:
test_preds = sess.run(prediction,{data: test_input, target: test_output})
In [12]:
ac = sess.run(accuracy,{data:test_input, target: test_output})
In [13]:
for i in range(3):
    plt.subplot(3,3,i+1)
    plt.plot(test_preds[:,i],test_output[:,i],'.')

One Step Ahead Prediction

In [14]:
fig = plt.figure(figsize=(15,5))
plt.plot(test_output[:150,0],'.-')
plt.plot(test_preds[:150,0],'or')
Out[14]:
[<matplotlib.lines.Line2D at 0x120c79710>]

Run sequenctially

In [72]:
r = train_test_row+900
t = 80
inp = Data[r:r+1,range(t,t+time_lag),:]
preds = []
for step in range(1800):
    
    pred = sess.run(prediction,{data: inp})
    preds.append(pred[0])
    pred_len = len(preds)
    pred_len = np.minimum(pred_len,time_lag)
    x = list(inp[0,:,:])
    x = x[step+1:]+preds[-pred_len:]
    inp = np.asarray(x)[np.newaxis,:,:]
    
    
preds = np.asarray(preds)
In [71]:
# preds1 = preds.copy()
# preds2 = preds.copy()
# preds3 = preds.copy()
In [73]:
fig = plt.figure(figsize=(10,10))
plt.subplot(2,2,1)
plt.plot(preds[:100,0],'.-r')
plt.plot(Data[r:r+1,range(t+time_lag,t+time_lag+100),0].T)

plt.subplot(2,2,2)
plt.plot(preds[:100,1],'.-r')
plt.plot(Data[r:r+1,range(t+time_lag,t+time_lag+100),1].T)

plt.subplot(2,2,3)
plt.plot(preds[:100,2],'.-r')
plt.plot(Data[r:r+1,range(t+time_lag,t+time_lag+100),2].T)
Out[73]:
[<matplotlib.lines.Line2D at 0x128e60f90>]
In [77]:
pred_trajs = preds[np.newaxis,:,:]
# pred_trajs = np.concatenate((preds3[np.newaxis,:Data.shape[1]-(t+time_lag),:],preds2[np.newaxis,:Data.shape[1]-(t+time_lag),:],preds1[np.newaxis,:Data.shape[1]-(t+time_lag),:],preds[np.newaxis,:Data.shape[1]-(t+time_lag),:]),axis=0)

# pred_trajs = np.concatenate((Data[r:r+1,t+time_lag:,:],preds[np.newaxis,:Data.shape[1]-(t+time_lag),:]),axis=0)
pred_trajs.shape
Out[77]:
(1, 1800, 3)
In [78]:
# Set up figure & 3D axis for animation
fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1], projection='3d')
ax.axis('off')
plt.set_cmap(plt.cm.YlOrRd_r)
# plt.set_cmap(plt.cm.hot)
# choose a different color for each trajectory
N_trajectories = pred_trajs.shape[0]
colors = plt.cm.jet_r(np.linspace(0, 1, N_trajectories))


# set up lines and points
lines = sum([ax.plot([], [], [], '-', c=c)
             for c in colors], [])
pts = sum([ax.plot([], [], [], 'o', c=c)
           for c in colors], [])

# prepare the axes limits
ax.set_xlim((-25, 25))
ax.set_ylim((-35, 35))
ax.set_zlim((5, 55))

# set point-of-view: specified by (altitude degrees, azimuth degrees)
ax.view_init(30, 0)

# initialization function: plot the background of each frame
def init():
    for line, pt in zip(lines, pts):
        line.set_data([], [])
        line.set_3d_properties([])

        pt.set_data([], [])
        pt.set_3d_properties([])
    return lines + pts

# animation function.  This will be called sequentially with the frame number
def animate(i):
    # we'll step two time-steps per frame.  This leads to nice results.
    i = (2 * i) % x_t.shape[1]
    
    
    for line, pt, xi in zip(lines, pts, x_t):
        x, y, z = xi[:i].T
        line.set_data(x, y)
        line.set_3d_properties(z)

        pt.set_data(x[-1:], y[-1:])
        pt.set_3d_properties(z[-1:])

    ax.view_init(30, 0.3 * i)
    fig.canvas.draw()
    return lines + pts

# instantiate the animator.
anim = animation.FuncAnimation(fig, animate, init_func=init,
                               frames=500, interval=10, blit=True)

# Save as mp4. This requires mplayer or ffmpeg to be installed
anim.save('./Images/lorenz_preds.mp4', fps=15, extra_args=['-vcodec', 'libx264'],dpi=200)

plt.close()

It can't predict the whole process, but ...

Seemingly it has learned the underlying dynamics and inter-dependencies

(i.e. the differential equations)

In [5]:
from IPython.display import HTML
HTML("""
<video width="600" height="400" controls>
  <source src="files/Images/lorenz_preds.mp4" type="video/mp4">
</video>
""")
Out[5]:

One possible application of the trained RNN:

For resampling on dependent observations! (like a stochastic process)


Financial Time Series

Forex

Can we learn it as well? We don't know if there is an underlying system there.

In [368]:
import glob
import os
path = './Data/Forex_10m/'
path =  './Data/Forex_hourly/'
all_pairs = []
counter = 1
for filename in glob.glob(os.path.join(path, '*.csv')):
    all_pairs.append(filename)

print len(all_pairs)
14
In [369]:
all_opens = pd.DataFrame()
for pair in all_pairs[:]:
    DF = pd.read_csv(pair,index_col=0)
    Ticker = pair.replace(path,'').replace('.csv','')
    DF[Ticker] = DF[Ticker].fillna(method='backfill',limit=1,axis=0)
    all_opens[Ticker] = DF[Ticker]
    print DF.index[0],DF.index[-1], Ticker, DF.shape
2001-01-02 23:00:00 2017-03-31 20:00:00 AUDJPY (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 AUDUSD (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 CHFJPY (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 EURCAD (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 EURCHF (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 EURGBP (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 EURJPY (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 EURUSD (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 GBPCHF (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 GBPJPY (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 GBPUSD (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 USDCAD (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 USDCHF (142366, 1)
2001-01-02 23:00:00 2017-03-31 20:00:00 USDJPY (142366, 1)
In [370]:
all_opens.head()
Out[370]:
AUDJPY AUDUSD CHFJPY EURCAD EURCHF EURGBP EURJPY EURUSD GBPCHF GBPJPY GBPUSD USDCAD USDCHF USDJPY
Date
2001-01-02 23:00:00 64.285435 0.561779 71.512308 1.419355 1.520868 0.632968 108.794898 0.950700 2.401669 171.806923 1.501474 1.492754 1.599382 114.405116
2001-01-03 00:00:00 64.429483 0.562908 71.499831 1.418677 1.521016 0.632986 108.781897 0.950402 2.401876 171.778475 1.501043 1.492696 1.600027 114.428036
2001-01-03 01:00:00 64.508947 0.563557 71.505614 1.419098 1.520830 0.632505 108.779167 0.950341 2.403367 171.897818 1.502067 1.493226 1.599927 114.434074
2001-01-03 02:00:00 64.510862 0.563386 71.510351 1.418898 1.521014 0.632103 108.800667 0.950155 2.405223 172.048276 1.502704 1.493200 1.600433 114.477679
2001-01-03 03:00:00 64.664310 0.564420 71.561000 1.418656 1.520996 0.632241 108.873273 0.950166 2.404674 172.126333 1.502437 1.492967 1.600336 114.553750
In [390]:
DF  = all_opens.ix[:50]
print DF.shape
DF = DF.fillna(method='backfill',limit=1,axis=0)
DF[DF.columns[1:2]].plot(logy=False,legend=False,rot=45,style='.-',grid=True)
(50, 14)
Out[390]:
<matplotlib.axes._subplots.AxesSubplot at 0x21480d490>
In [372]:
import random
time_lag = 20
train_data = []
maxdlen = min(all_opens.shape[0],250000)
for t in range(0,maxdlen-time_lag-1):
        train_data.append(all_opens.values[:][range(t,t+time_lag+1),:])
train_data = np.asarray(train_data)
In [373]:
indnan = np.isnan(train_data).sum(axis=2).sum(axis=1)
ind = indnan==0
train_data = train_data[ind]
train_data.shape
print train_data.shape
NUM_EXAMPLES = 60000
test_data = train_data[NUM_EXAMPLES:]
train_data = train_data[:NUM_EXAMPLES]
print train_data.shape
print test_data.shape
random.shuffle(train_data)
(83041, 21, 14)
(60000, 21, 14)
(23041, 21, 14)
In [374]:
pd.DataFrame(data=all_opens.columns).T
Out[374]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13
0 AUDJPY AUDUSD CHFJPY EURCAD EURCHF EURGBP EURJPY EURUSD GBPCHF GBPJPY GBPUSD USDCAD USDCHF USDJPY

predicting the real values of one ticker

In [379]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import random
from random import shuffle
import tensorflow as tf
tf.reset_default_graph()



# Target = sel_cols
Target = [12]
sel_cols = [7,10,11,13,12]


NUM_EXAMPLES = 20000
INPUT_SIZE    = len(sel_cols)           
RNN_HIDDEN    = 20
OUTPUT_SIZE   = len(Target)      
TINY          = 1e-6    # to avoid NaNs in logs
LEARNING_RATE = 0.01



test_input = train_data[:NUM_EXAMPLES,:time_lag,sel_cols]
if len(Target)>1:
    test_output = train_data[:NUM_EXAMPLES,time_lag,Target]
    train_output = train_data[:NUM_EXAMPLES,time_lag,Target]
else:
    test_output = train_data[:NUM_EXAMPLES,time_lag,Target[0]:Target[0]+1]
    train_output = train_data[:NUM_EXAMPLES,time_lag,Target[0]:Target[0]+1]
train_input = train_data[:NUM_EXAMPLES,:time_lag,sel_cols]
print "test and training data loaded"



data = tf.placeholder(tf.float32, [None, time_lag,INPUT_SIZE]) #Number of examples, number of input, dimension of each input
target = tf.placeholder(tf.float32, [None, OUTPUT_SIZE])

num_hidden = 24
num_layers=2
cell = tf.nn.rnn_cell.LSTMCell(num_hidden,state_is_tuple=True)
cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
val, _ = tf.nn.dynamic_rnn(cell, data, dtype=tf.float32)
val = tf.transpose(val, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1)

weight = tf.Variable(tf.truncated_normal([num_hidden, int(target.get_shape()[1])]))
bias = tf.Variable(tf.constant(0.1, shape=[target.get_shape()[1]]))

# prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)

prediction = tf.matmul(last, weight) + bias

error = tf.reduce_sum(tf.pow(target-prediction, 2))
# error = tf.reduce_mean(error)

# cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(error)

# mistakes = tf.not_equal(tf.argmax(target, 1), tf.argmax(prediction, 1))
# error = tf.reduce_mean(tf.cast(mistakes, tf.float32))

accuracy = tf.abs(1-(target -prediction)/target)*100
test and training data loaded
In [380]:
print test_input.shape,test_output.shape, train_input.shape,train_output.shape
(20000, 20, 5) (20000, 1) (20000, 20, 5) (20000, 1)
In [381]:
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)

batch_size = 200
no_of_batches = int(len(train_input)) / batch_size
epoch = 50
for i in range(epoch):
    ptr = 0
    for j in range(no_of_batches):
        inp, out = train_input[ptr:ptr+batch_size], train_output[ptr:ptr+batch_size]
        ptr+=batch_size
        sess.run(minimize,{data: inp, target: out})
    if i%3 ==0:
        SE = sess.run(error,{data: inp, target: out})
        print "Epoch {} error: {}".format(i,SE)
        
Epoch 0 error: 6.55176973343
Epoch 3 error: 3.89536499977
Epoch 6 error: 1.88389110565
Epoch 9 error: 1.46300196648
Epoch 12 error: 0.986657738686
Epoch 15 error: 0.20409283042
Epoch 18 error: 0.040967117995
Epoch 21 error: 0.0389034375548
Epoch 24 error: 0.0364233143628
Epoch 27 error: 0.0300719439983
Epoch 30 error: 0.0283087622374
Epoch 33 error: 0.0246626548469
Epoch 36 error: 0.0229454580694
Epoch 39 error: 0.0461691729724
Epoch 42 error: 0.0551976561546
Epoch 45 error: 0.0351951792836
Epoch 48 error: 0.0201646499336
Epoch 51 error: 0.032220967114
Epoch 54 error: 0.0309986621141
Epoch 57 error: 0.0301187001169
Epoch 60 error: 0.0380215346813
Epoch 63 error: 0.0487026534975
Epoch 66 error: 0.0469816327095
Epoch 69 error: 0.0302092656493
Epoch 72 error: 0.0180076546967
Epoch 75 error: 0.0161619912833
Epoch 78 error: 0.0271464847028
Epoch 81 error: 0.0429531447589
Epoch 84 error: 0.0427145287395
Epoch 87 error: 0.0419983938336
Epoch 90 error: 0.0414374172688
Epoch 93 error: 0.0416199155152
Epoch 96 error: 0.0402094312012
Epoch 99 error: 0.0399771854281
Epoch 102 error: 0.0385561212897
Epoch 105 error: 0.0380765460432
Epoch 108 error: 0.0365743748844
Epoch 111 error: 0.0331384390593
Epoch 114 error: 0.0340393036604
Epoch 117 error: 0.0267145484686
Epoch 120 error: 0.0306864418089
Epoch 123 error: 0.0129253240302
Epoch 126 error: 0.0118629792705
Epoch 129 error: 0.00793247669935
Epoch 132 error: 0.00597347784787
Epoch 135 error: 0.00955007597804
Epoch 138 error: 0.0230343081057
Epoch 141 error: 0.0292865559459
Epoch 144 error: 0.00565265119076
Epoch 147 error: 0.0144629999995
Epoch 150 error: 0.0204198695719
Epoch 153 error: 0.0174022875726
Epoch 156 error: 0.0112942028791
Epoch 159 error: 0.00883933342993
Epoch 162 error: 0.0083374120295
Epoch 165 error: 0.00591534096748
Epoch 168 error: 0.0073094656691
Epoch 171 error: 0.00984713435173
Epoch 174 error: 0.010978018865
Epoch 177 error: 0.00654088519514
Epoch 180 error: 0.00899933837354
Epoch 183 error: 0.00457765953615
Epoch 186 error: 0.00881194416434
Epoch 189 error: 0.00408240221441
Epoch 192 error: 0.00615442870185
Epoch 195 error: 0.00865379162133
Epoch 198 error: 0.0110528310761
In [382]:
test_preds = sess.run(prediction,{data: test_input, target: test_output})
In [ ]:
 
In [383]:
for i in range(len(Target)):
    plt.subplot(3,3,i+1)
    plt.plot(test_preds[:,i],test_output[:,i],'.')
In [393]:
fig = plt.figure(figsize=(15,5))
plt.plot(test_output[:50,0],'.-')
plt.plot(test_preds[:50,0],'.-')
Out[393]:
[<matplotlib.lines.Line2D at 0x12441c310>]