GTZAN Music and Speech - [kaggle]

Dataset collected for the purposes of music/speech discrimination. The dataset consists of 120 tracks, each 30 seconds long. Each class (music/speech) has 60 examples. The tracks are all 22050Hz Mono 16-bit audio files in .wav format.

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage import data
from scipy.misc import imresize
import IPython.display as ipyd

import tensorflow as tf
from libs import utils, gif, datasets, dataset_utils, vae, dft

%matplotlib inline
plt.style.use('ggplot')
c:\python36\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
In [2]:
dataset = 'gtzan_music_speech'

music_dir = os.path.join(os.path.join(dataset, 'music_speech'), 'music_wav')

#music folder
music = [os.path.join(music_dir, file_i)
         for file_i in os.listdir(music_dir)
         if file_i.endswith('.wav')]


speech_dir = os.path.join(os.path.join(dataset, 'music_speech'), 'speech_wav')
# speech folder
speech = [os.path.join(speech_dir, file_i)
          for file_i in os.listdir(speech_dir)
          if file_i.endswith('.wav')]

print(music[:10], speech[:10])
['gtzan_music_speech\\music_speech\\music_wav\\bagpipe.wav', 'gtzan_music_speech\\music_speech\\music_wav\\ballad.wav', 'gtzan_music_speech\\music_speech\\music_wav\\bartok.wav', 'gtzan_music_speech\\music_speech\\music_wav\\beat.wav', 'gtzan_music_speech\\music_speech\\music_wav\\beatles.wav', 'gtzan_music_speech\\music_speech\\music_wav\\bigband.wav', 'gtzan_music_speech\\music_speech\\music_wav\\birdland.wav', 'gtzan_music_speech\\music_speech\\music_wav\\blues.wav', 'gtzan_music_speech\\music_speech\\music_wav\\bmarsalis.wav', 'gtzan_music_speech\\music_speech\\music_wav\\brahms.wav'] ['gtzan_music_speech\\music_speech\\speech_wav\\acomic.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\acomic2.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\allison.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\amal.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\austria.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\bathroom1.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\chant.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\charles.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\china.wav', 'gtzan_music_speech\\music_speech\\speech_wav\\comedy.wav']
In [3]:
file_i = music[1]
s = utils.load_audio(file_i)
plt.plot(s)
Out[3]:
[<matplotlib.lines.Line2D at 0x192ca3ce898>]

Now instead of feed the raw audio signal, I'll use Discrete Fourier Transform to represent the audio. The Discrete Fourier Trasnform will return a complex value which should be converted to polar coordinates. The polar coordinates will represent the magnitudes and phases of the signal.

This whole process is done for a single audio clip as follows:

In [4]:
fft_size = 512
hop_size = 256
print("s:",s)
print("\ns shape:", s.shape)

re, img = dft.dft_np(s, hop_size=256, fft_size=512)
print("\nreal:",re[:5])
print("\nimaginary:",img[:5])
print("\nreal shape, imaginary shape:", re.shape, img.shape)

mag, phs = dft.ztoc(re, img)
print("\nMagnitude:",mag[:5])
print("\nPhase:",phs[:5])

print("\nMagnitude Shape:",mag.shape)
print("\nPhase Shape:",phs.shape)
s: [-0.0541641  -0.0493915  -0.04054815 ... -0.02805517 -0.02440554
 -0.01850997]

s shape: (661500,)

real: [[-2.43734226e-03  1.50501157e-03 -2.16234027e-03 ... -7.11791082e-06
   1.13323011e-05 -9.85735569e-06]
 [ 3.87822581e-03 -1.99547326e-03  2.29259967e-03 ... -1.04653749e-06
  -6.90795857e-06  7.24296738e-06]
 [ 6.51898904e-03 -4.57777551e-03  4.46612909e-04 ... -1.56317754e-06
   1.54735799e-06 -4.83641929e-06]
 [-6.70414100e-03  4.60728440e-03 -2.78200057e-03 ...  4.42342004e-06
   3.65455230e-06 -2.42312723e-06]
 [-2.32538015e-03  1.95579526e-03  7.88075784e-04 ...  9.24375313e-06
  -1.18336678e-05  1.51645005e-05]]

imaginary: [[ 0.00000000e+00  6.90764493e-05 -1.00431860e-03 ...  5.04322695e-06
  -5.45663690e-06 -9.95676584e-17]
 [ 0.00000000e+00 -1.69763287e-03 -1.88708446e-03 ...  4.48075013e-06
  -5.16887345e-07 -8.31267265e-18]
 [ 0.00000000e+00  3.17304086e-03 -7.77054956e-04 ... -1.45157001e-05
   1.94935830e-06  3.66072816e-17]
 [ 0.00000000e+00  1.86941203e-03 -1.66583074e-03 ... -1.31659963e-05
   1.48707781e-05  6.39057545e-17]
 [ 0.00000000e+00 -3.04407681e-03  4.11733676e-04 ...  1.24993673e-05
  -1.23817484e-05 -2.10321110e-17]]

real shape, imaginary shape: (2583, 256) (2583, 256)

Magnitude: [[2.43734226e-03 1.50659596e-03 2.38419196e-03 ... 8.72346219e-06
  1.25775965e-05 9.85735569e-06]
 [3.87822581e-03 2.61989902e-03 2.96936038e-03 ... 4.60134355e-06
  6.92726961e-06 7.24296738e-06]
 [6.51898904e-03 5.56993868e-03 8.96257494e-04 ... 1.45996258e-05
  2.48883799e-06 4.83641929e-06]
 [6.70414100e-03 4.97209925e-03 3.24260994e-03 ... 1.38892081e-05
  1.53132555e-05 2.42312723e-06]
 [2.32538015e-03 3.61822314e-03 8.89150190e-04 ... 1.55460977e-05
  1.71272703e-05 1.51645005e-05]]

Phase: [[ 3.14159265e+00  4.58654316e-02 -2.70677982e+00 ...  2.52516724e+00
  -4.48747972e-01 -3.14159265e+00]
 [ 0.00000000e+00 -2.43666799e+00 -6.88680334e-01 ...  1.80024607e+00
  -3.06690692e+00 -1.14768881e-12]
 [ 0.00000000e+00  2.53548534e+00 -1.04914940e+00 ... -1.67807166e+00
   8.99860738e-01  3.14159265e+00]
 [ 3.14159265e+00  3.85454615e-01 -2.60206423e+00 ... -1.24667190e+00
   1.32981793e+00  3.14159265e+00]
 [ 3.14159265e+00 -9.99717197e-01  4.81449356e-01 ...  9.34024795e-01
  -2.33356481e+00 -1.38693068e-12]]

Magnitude Shape: (2583, 256)

Phase Shape: (2583, 256)
In [5]:
plt.figure(figsize=(4,10))
plt.imshow(mag)
Out[5]:
<matplotlib.image.AxesImage at 0x192ca4990b8>

First transpose to visualize it better. Then converted it to pseudo decibel scale by taking log.

In [6]:
plt.figure(figsize=(20, 8))
plt.imshow(np.log(mag.T))
plt.xlabel('Time')
plt.ylabel('Frequency Bin')
Out[6]:
Text(0,0.5,'Frequency Bin')

Each column in the above graph can be fed to the neural network. But it does not represent the entire audio. For that we need to slide a 2D window from left to right and collect the samples

In [7]:
# sample rate 22050 hz for this dataset
sr = 22050

# Calculated how many hops there are in a second which will tell us how many frames of magnitudes we have per second
n_frames_per_second = sr // hop_size
print("n_frames_per_second:",n_frames_per_second)

# We want 500 milliseconds of audio in our window
n_frames = n_frames_per_second // 2
print("n_frames:",n_frames)

# And we'll move our window by 250 ms at a time
frame_hops = n_frames_per_second // 4
print("frame_hops:",frame_hops)

# We'll therefore have this many sliding windows:
n_hops = (len(mag) - n_frames) // frame_hops
print("n_hops:",n_hops)
n_frames_per_second: 86
n_frames: 43
frame_hops: 21
n_hops: 120

Now collect all the sliding windows into a list of Xs and label them based on being music as 0 or speech as 1 into a collection of ys.

In [8]:
Xs = []
ys = []
for hop_i in range(n_hops):
    # Creating our sliding window
    frames = mag[(hop_i * frame_hops):(hop_i * frame_hops + n_frames)]
    
    # Store them with a new 3rd axis and as a logarithmic scale
    # Add a small value to avoid log of 0
    Xs.append(np.log(np.abs(frames[..., np.newaxis]) + 1e-10))
    ys.append(0)
print("Xs:",Xs[:1])
print("\nys:",ys[:1])

print("\nXs shape:",np.shape(Xs))
print("\nys shape:",np.shape(ys))
Xs: [array([[[ -6.01684703],
        [ -6.49790244],
        [ -6.03889497],
        ...,
        [-11.6494829 ],
        [-11.28358543],
        [-11.52728247]],

       [[ -5.55237747],
        [ -5.94461946],
        [ -5.81940868],
        ...,
        [-12.28914049],
        [-11.88003038],
        [-11.83546577]],

       [[ -5.03303595],
        [ -5.19037122],
        [ -7.01728269],
        ...,
        [-11.13450781],
        [-12.90365445],
        [-12.23931525]],

       ...,

       [[ -5.4001229 ],
        [ -5.23312267],
        [ -5.80510439],
        ...,
        [-13.46533933],
        [-12.3297935 ],
        [-11.50671813]],

       [[ -6.59450988],
        [ -7.64329047],
        [ -4.8658321 ],
        ...,
        [-11.00513756],
        [-10.98309757],
        [-11.00530609]],

       [[ -5.7029856 ],
        [ -5.64284087],
        [ -5.57798482],
        ...,
        [-12.04586625],
        [-11.37758064],
        [-10.52689075]]])]

ys: [0]

Xs shape: (120, 43, 256, 1)

ys shape: (120,)
In [9]:
plt.figure(figsize=(20,8))
plt.imshow(Xs[0][..., 0])
plt.title('label:{}'.format(ys[1]))
Out[9]:
Text(0.5,1,'label:0')
In [10]:
plt.figure(figsize=(20,8))
plt.imshow(Xs[1][..., 0])
plt.title('label:{}'.format(ys[1]))
Out[10]:
Text(0.5,1,'label:0')

The window is sliding down 250 milliseconds at a time, and since our window is 500 ms long, or half a second, it has 50% new content at the bottom. Now doing this for every audio file now:

In [11]:
# Store every magnitude frame and its label of being music: 0 or speech: 1
Xs, ys = [], []

#for music
for i in music:
    s = utils.load_audio(i)

    # taking DSP
    re, im = dft.dft_np(s, fft_size=fft_size, hop_size=hop_size)
    # converting complex to polar
    mag, phs = dft.ztoc(re, im)
    
    # no. of sliding windows:
    n_hops = (len(mag) - n_frames) // frame_hops
    
    # extract them all:
    for hop_i in range(n_hops):
        
        # Get the current sliding window
        frames = mag[(hop_i * frame_hops):(hop_i * frame_hops + n_frames)]
        
        # take the log magnitudes:
        this_X = np.log(np.abs(frames[..., np.newaxis]) + 1e-10)
        
        # And store it:
        Xs.append(this_X)
        
        # store correct labels:
        ys.append(0)
        
#for speech
for i in speech:
    
    s = utils.load_audio(i)
    re, im = dft.dft_np(s, fft_size=fft_size, hop_size=hop_size)
    mag, phs = dft.ztoc(re, im)

    n_hops = (len(mag) - n_frames) // frame_hops

    for hop_i in range(n_hops):
        frames = mag[(hop_i * frame_hops):(hop_i * frame_hops + n_frames)]
        this_X = np.log(np.abs(frames[..., np.newaxis]) + 1e-10)
        Xs.append(this_X)
        ys.append(0)
        

Xs = np.array(Xs)
ys = np.array(ys)

print(Xs.shape, ys.shape)
(15360, 43, 256, 1) (15360,)
In [12]:
# Describe the input shape of the network
n_observations, n_height, n_width, n_channels = Xs.shape

Using the helper class in libs\datasets_utils.py, we will split the Xs and ys into training, testing and validation. Also specifying a parameter one_hot stating whether we want our ys to be converted to a one hot vector or not. Each mini batch can be fetched using next_batch() function of this object. This will also perform the shuffling of the dataset.

In [13]:
ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], one_hot=True)
In [14]:
Xs_i, ys_i = next(ds.train.next_batch())
print(Xs_i.shape, ys_i.shape)
(100, 43, 256, 1) (100, 2)
In [15]:
# visualize first element 
plt.figure(figsize=(20,8))
plt.imshow(Xs_i[0, :, :, 0])
plt.title('label:{}'.format(ys_i[0]))
Out[15]:
Text(0.5,1,'label:[1 0]')

Build CNN

Construction Phase:

In [16]:
tf.reset_default_graph()
In [17]:
# create placeholders
# Xs is 4D array (n_obs, n_height, n_width, n_channels)
X = tf.placeholder(name='X', shape=[None, 43, 256, 1], dtype=tf.float32)
y = tf.placeholder(name='y', shape=[None, 2], dtype=tf.float32)
In [18]:
n_filters = [32,32,32,32]
H = X
for layer_i, n_filters_i in enumerate(n_filters):
    
    H, W = utils.conv2d(
        #H, n_filters_i, k_h=3, k_w=3, d_h=2, d_w=2,
        H, n_filters_i, k_h=10, k_w=10, d_h=4, d_w=4,
        name=str(layer_i))

    H = tf.nn.relu(H)
    print(H.get_shape().as_list())
[None, 11, 64, 32]
[None, 3, 16, 32]
[None, 1, 4, 32]
[None, 1, 1, 32]
In [19]:
# connect the last convolutional layer to a fully connected network 
fc, W = utils.linear(H, 100, str(len(n_filters)))
Y_pred, W = utils.linear(fc, 2, str(len(n_filters) + 1), tf.nn.softmax)
In [20]:
# defining loss function and calucating cost of entire batch
loss = utils.binary_cross_entropy(Y_pred, y)
cost = tf.reduce_mean(tf.reduce_sum(loss, 1))
In [21]:
# measure of accuracy 
predicted_y = tf.argmax(Y_pred, 1)
actual_y = tf.argmax(y, 1)
correct_prediction = tf.equal(predicted_y, actual_y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [22]:
# defining optimizer
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

# logging the entire process in tensorboard
'''
from datetime import datetime
now  = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir="tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)

bce_summary = tf.summary.scalar('COST', cost)
file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
'''
Out[22]:
'\nfrom datetime import datetime\nnow  = datetime.utcnow().strftime("%Y%m%d%H%M%S")\nroot_logdir="tf_logs"\nlogdir = "{}/run-{}/".format(root_logdir, now)\n\nbce_summary = tf.summary.scalar(\'COST\', cost)\nfile_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())\n'

Execution Phase:

In [23]:
n_epochs = 3
batch_size = 120
n_batch = 15360 // batch_size

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch_i in range(n_epochs):
    print('Epoch: ', epoch_i)
    
    # training set
    this_accuracy = 0
    its = 0
    
    for Xs_i, ys_i in ds.train.next_batch(batch_size):
        
        this_accuracy += sess.run([accuracy, optimizer], feed_dict={
                X:Xs_i, y:ys_i})[0]
        its += 1
        print(this_accuracy / its)
        #summary_str = bce_summary.eval(feed_dict={X: Xs_i, y: ys_i}, session=sess)
        #step = epoch_i * n_batch + its
        #file_writer.add_summary(summary_str, step)
    print('Training accuracy: ', this_accuracy / its)
    
    # validation set
    this_accuracy = 0
    its = 0
   
    for Xs_i, ys_i in ds.valid.next_batch(batch_size):
        this_accuracy += sess.run(accuracy, feed_dict={
                X:Xs_i, y:ys_i})
        its += 1
    print('Validation accuracy: ', this_accuracy / its)
Epoch:  0
0.0
0.5
0.6666666666666666
0.75
0.8
0.8333333333333334
0.8571428571428571
0.875
0.8888888888888888
0.9
0.9090909090909091
0.9166666666666666
0.9230769230769231
0.9285714285714286
0.9333333333333333
0.9375
0.9411764705882353
0.9444444444444444
0.9473684210526315
0.95
0.9523809523809523
0.9545454545454546
0.9565217391304348
0.9583333333333334
0.96
0.9615384615384616
0.9629629629629629
0.9642857142857143
0.9655172413793104
0.9666666666666667
0.967741935483871
0.96875
0.9696969696969697
0.9705882352941176
0.9714285714285714
0.9722222222222222
0.972972972972973
0.9736842105263158
0.9743589743589743
0.975
0.975609756097561
0.9761904761904762
0.9767441860465116
0.9772727272727273
0.9777777777777777
0.9782608695652174
0.9787234042553191
0.9791666666666666
0.9795918367346939
0.98
0.9803921568627451
0.9807692307692307
0.9811320754716981
0.9814814814814815
0.9818181818181818
0.9821428571428571
0.9824561403508771
0.9827586206896551
0.9830508474576272
0.9833333333333333
0.9836065573770492
0.9838709677419355
0.9841269841269841
0.984375
0.9846153846153847
0.9848484848484849
0.9850746268656716
0.9852941176470589
0.9855072463768116
0.9857142857142858
0.9859154929577465
0.9861111111111112
0.9863013698630136
0.9864864864864865
0.9866666666666667
0.9868421052631579
0.987012987012987
0.9871794871794872
0.9873417721518988
0.9875
0.9876543209876543
0.9878048780487805
0.9879518072289156
0.9880952380952381
0.9882352941176471
0.9883720930232558
0.9885057471264368
0.9886363636363636
0.9887640449438202
0.9888888888888889
0.989010989010989
0.9891304347826086
0.989247311827957
0.9893617021276596
0.9894736842105263
0.9895833333333334
0.9896907216494846
0.9897959183673469
0.98989898989899
0.99
0.9900990099009901
0.9901960784313726
0.9902912621359223
Training accuracy:  0.9902912621359223
Validation accuracy:  1.0
Epoch:  1
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  2
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  3
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  4
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  5
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  6
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  7
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  8
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
Epoch:  9
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
Training accuracy:  1.0
Validation accuracy:  1.0
In [25]:
#file_writer.close()