from numpy import zeros from numpy import ones from numpy.random import randn from numpy.random import randint from keras.datasets.cifar10 import load_data from keras.optimizers import Adam from keras.models import Sequential from keras.layers import Dense from keras.layers import Reshape from keras.layers import Flatten from keras.layers import Conv2D, Conv3D, Activation, MaxPooling2D from keras.layers import Conv2DTranspose,Conv3DTranspose from keras.layers import LeakyReLU from keras.layers import Dropout from matplotlib import pyplot import imageio from skimage import img_as_ubyte import numpy as np import cv2 import glob import skvideo.io import tensorflow as tf import keras from keras.layers.wrappers import TimeDistributed from keras.layers.recurrent import LSTM from keras.layers import BatchNormalization train_videos = "D:/Projects/training_datasets/self-made/*.mp4" width = 128 frames = 8 save_epochs = 5 counter = 0 channels = 3 # define the standalone discriminator model def define_discriminator(in_shape=(frames,width,width,channels)): model = Sequential() # normal model.add(Conv3D(filters=64, kernel_size=(4,4,4), strides=(2,2,2), padding='same', input_shape=in_shape)) model.add(LeakyReLU(alpha=0.2)) # downsample model.add(Conv3D(filters=128, kernel_size=(4,4,4), strides=(2,2,2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # downsample model.add(Conv3D(filters=256, kernel_size=(4,4,4), strides=(2,2,2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # downsample model.add(Conv3D(filters=512, kernel_size=(4,4,4), strides=(2,2,2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # classifier model.add(Flatten()) model.add(Dropout(0.4)) model.add(Dense(1, activation='sigmoid')) # compile model opt = Adam(lr=0.0002, beta_1=0.5) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) model.summary() return model # define the standalone generator model def define_generator(latent_dim): model = Sequential() half_frames = frames n_nodes = 128 * 8 * 8 * 1 model.add(Dense(n_nodes, input_dim=latent_dim)) #model.add(LeakyReLU(alpha=0.2)) model.add(Reshape((1, 8, 8, 128))) # upsample to framesxframes model.add(Conv3DTranspose(256, (3,3,3), strides=(2,2,2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # upsample to 16x16 model.add(Conv3DTranspose(128, (3,4,4), strides=(2,2,2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # upsample to 16x16 model.add(Conv3DTranspose(64, (3,3,3), strides=(2,2,2), padding='same')) model.add(LeakyReLU(alpha=0.2)) model.add(Conv3DTranspose(32, (3,4,4), strides=(1,2,2), padding='same')) model.add(LeakyReLU(alpha=0.2)) # output layer model.add(Conv3D(channels, (3,3,3), activation='tanh', padding='same')) model.summary() #model.load_weights("3dgan_models/generator_model_wights_005.h5", by_name=True) #print("Load model generator_model_wights_005...") return model # define the combined generator and discriminator model, for updating the generator def define_gan(g_model, d_model): # make weights in the discriminator not trainable d_model.trainable = False # connect them model = Sequential() # add generator model.add(g_model) # add the discriminator model.add(d_model) # compile model opt = Adam(lr=0.0002, beta_1=0.5) model.compile(loss='binary_crossentropy', optimizer=opt) return model def get_num_frames(video_name): cap = cv2.VideoCapture(video_name) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) return length # select real samples def generate_real_samples(dataset, n_samples): # choose random instances ix = randint(0, dataset.shape[0], n_samples) # retrieve selected images X = dataset[ix] # generate 'real' class labels (1) y = ones((n_samples, 1)) return X, y # generate points in latent space as input for the generator def generate_latent_points(latent_dim, n_samples): # generate points in the latent space x_input = randn(latent_dim * n_samples) #print("-------------------------x_input", x_input) # reshape into a batch of inputs for the network x_input = x_input.reshape(n_samples, latent_dim) return x_input # use the generator to generate n fake examples, with class labels def generate_fake_samples(g_model, latent_dim, n_samples): # generate points in latent space x_input = generate_latent_points(latent_dim, n_samples) # predict outputs X = g_model.predict(x_input) # create 'fake' class labels (0) y = zeros((n_samples, 1)) return X, y # create and save a plot of generated images def save_plot(examples, epoch, n=3): # scale from [-1,1] to [0,1] examples = (examples + 1) / 2.0 # plot images for i in range(n * n): # define subplot pyplot.subplot(n, n, 1 + i) # turn off axis pyplot.axis('off') # plot raw pixel data pyplot.imshow(examples[i]) img = cv2.resize(examples[i], (256, 256), interpolation=cv2.INTER_CUBIC) imageio.imwrite("results/frames/num_{}_{}.png".format(epoch, i), img) # save plot to file filename = '3dgan_results/generated_plot_e%03d.png' % (epoch+1) pyplot.savefig(filename) pyplot.close() # evaluate the discriminator, plot generated images, save generator model def summarize_performance(epoch, g_model, d_model, dataset, latent_dim, n_samples=1): # prepare real samples X_real, y_real = generate_real_samples(dataset, n_samples) # evaluate discriminator on real examples _, acc_real = d_model.evaluate(X_real, y_real, verbose=0) # prepare fake examples x_fake, y_fake = generate_fake_samples(g_model, latent_dim, n_samples) # evaluate discriminator on fake examples _, acc_fake = d_model.evaluate(x_fake, y_fake, verbose=0) # summarize discriminator performance print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100)) global counter # save plot process_and_write_video(x_fake,counter) #save_plot(x_fake, epoch) # save the generator model tile file filename = '3dgan_models/generator_model_%03d.h5' % (epoch+1) g_model.save_weights('3dgan_models/generator_model_wights_%03d.h5' % (counter)) g_model.save(filename) counter += 1 def process_and_write_video(videos,name): videos =np.array(videos) print("SHAPE OF videos:", videos.shape) videos = np.reshape(videos,[-1,frames,width,width,channels]) vidwrite = np.zeros((frames,width,width,channels)) for i in range(videos.shape[0]): vid = videos[i,:,:,:,:] vid = (vid + 1)*127.5 for j in range(vid.shape[0]): frame = vid[j,:,:,:] vidwrite[j,:,:,:] = frame skvideo.io.vwrite("./3dgan_output/" + str(name) + ".mp4",vidwrite) return vidwrite def read_and_process_video(files,size): counter = 0 videos = np.zeros((size,frames,width,width,channels)) for file in files: #nframes = get_num_frames(file) print("file ", file) vid = imageio.get_reader(file, 'ffmpeg') curr_frames = [] i = 0 #print("Video", file, " has ", nframes, " frames!") for i in range(0,frames): frame = cv2.resize(vid.get_data(i),(width,width), interpolation=cv2.INTER_CUBIC) curr_frames.append(frame) curr_frames = np.array(curr_frames) #curr_frames = curr_frames*255.0 curr_frames = curr_frames/127.5 - 1 videos[counter,:,:,:,:] = curr_frames[:,:,:,:] counter = counter + 1 #print("End file ", file) return videos # train the generator and discriminator def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=660000, n_batch=1): bat_per_epo = int(len(dataset)/ n_batch) for i in range(n_epochs): # enumerate batches over the training set for counter in range(bat_per_epo): batch_files = dataset[counter*n_batch:(counter+1)*n_batch] cur_videos = read_and_process_video(batch_files,n_batch) #print("training data: ", cur_videos.shape) # get randomly selected 'real' samples X_real, y_real = generate_real_samples(cur_videos, n_batch) # update discriminator model weights d_loss1, _ = d_model.train_on_batch(X_real, y_real) # generate 'fake' examples X_fake, y_fake = generate_fake_samples(g_model, latent_dim, n_batch) #print("training X_fake: ", X_fake.shape) # update discriminator model weights d_loss2, _ = d_model.train_on_batch(X_fake, y_fake) # prepare points in latent space as input for the generator X_gan = generate_latent_points(latent_dim, n_batch) # create inverted labels for the fake samples y_gan = ones((n_batch, 1)) # update the generator via the discriminator's error g_loss = gan_model.train_on_batch(X_gan, y_gan) # summarize loss on this batch print('>%d, %d/%d, d1=%.3f, d2=%.3f g=%.3f' % (i+1, i+1, bat_per_epo, d_loss1, d_loss2, g_loss)) # evaluate the model performance, sometimes if (i+1) % save_epochs == 0: summarize_performance(i, g_model, d_model, cur_videos, latent_dim) # Use GPU tf.test.is_gpu_available() run_config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 6} ) run_config.gpu_options.per_process_gpu_memory_fraction=0.8 sess = tf.Session(config=run_config) keras.backend.set_session(sess) # size of the latent space latent_dim = 100 # create the discriminator d_model = define_discriminator() # create the generator g_model = define_generator(latent_dim) # create the gan gan_model = define_gan(g_model, d_model) # load video data dataset = glob.glob(train_videos) # train model train(g_model, d_model, gan_model, dataset, latent_dim)