import mxnet as mx
import numpy as np
import cv2
import matplotlib.pyplot as plt
import logging

logger = logging.getLogger()


# Variables are place holders for input arrays. We give each variable a unique name.
data = mx.symbol.Variable(data)

# The input is fed to a fully connected layer that computes Y=WX+b.
# This is the main computation module in the network.
# Each layer also needs an unique name. We‘ll talk more about naming in the next section.
fc1  = mx.symbol.FullyConnected(data = data, name=fc1, num_hidden=128)
# Activation layers apply a non-linear function on the previous layer‘s output.
# Here we use Rectified Linear Unit (ReLU) that computes Y = max(X, 0).
act1 = mx.symbol.Activation(data = fc1, name=relu1, act_type="relu")

fc2  = mx.symbol.FullyConnected(data = act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name=relu2, act_type="relu")

fc3  = mx.symbol.FullyConnected(data = act2, name=fc3, num_hidden=10)
# Finally we have a loss layer that compares the network‘s output with label and generates gradient signals.
mlp  = mx.symbol.SoftmaxOutput(data = fc3, name = softmax)








from sklearn.datasets import fetch_mldata
import os,sys
curr_path = sys.path[0]
sys.path = [os.path.join("/home/hu/mxnet-master/example/autoencoder")] + sys.path
import data

for i in range(10):
    plt.imshow(X[i].reshape((28,28)), cmap=Greys_r)

X = X.astype(np.float32)/255
X_train = X[:60000]
X_test = X[60000:]
Y_train = Y[:60000]
Y_test = Y[60000:]



batch_size = 100
train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size)
test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size)



model = mx.model.FeedForward(
    ctx = mx.gpu(0),      # Run on GPU 0
    symbol = mlp,         # Use the network we just defined
    num_epoch = 10,       # Train for 10 epochs
    learning_rate = 0.1,  # Learning rate
    momentum = 0.9,       # Momentum for SGD with momentum
    wd = 0.00001)         # Weight decay for regularization
    X=train_iter,  # Training data set
    eval_data=test_iter,  # Testing data set. MXNet computes scores on test set every epoch
    batch_end_callback = mx.callback.Speedometer(batch_size, 200))  # Logging module to print out progress



# construct a simple MLP
data = mx.symbol.Variable(data)
fc1  = mx.symbol.FullyConnected(data, name=fc1, num_hidden=128)
act1 = mx.symbol.Activation(fc1, name=relu1, act_type="relu")
fc2  = mx.symbol.FullyConnected(act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(fc2, name=relu2, act_type="relu")
fc3  = mx.symbol.FullyConnected(act2, name=fc3, num_hidden=10)
out  = mx.symbol.SoftmaxOutput(fc3, name = softmax)
# construct the module
mod = mx.mod.Module(out)   
mod.fit(train_iter, eval_data=test_iter,optimizer_params={learning_rate:0.01, momentum: 0.9},num_epoch=10)


plt.imshow((X_test[0].reshape((28,28))*255).astype(np.uint8), cmap=Greys_r)
print Result:, model.predict(X_test[0:1])[0].argmax()


print Accuracy:, model.score(test_iter)*100, %


# run hand drawing test
from IPython.display import HTML

def classify(img):
    img = img[len(data:image/png;base64,):].decode(base64)
    img = cv2.imdecode(np.fromstring(img, np.uint8), -1)
    img = cv2.resize(img[:,:,3], (28,28))
    img = img.astype(np.float32).reshape((1, 784))/255.0
    return model.predict(img)[0].argmax()

html = """<style type="text/css">canvas { border: 1px solid black; }</style><div id="board"><canvas id="myCanvas" width="100px" height="100px">Sorry, your browser doesn‘t support canvas technology.</canvas><p><button id="classify" onclick="classify()">Classify</button><button id="clear" onclick="myClear()">Clear</button>Result: <input type="text" id="result_output" size="5" value=""></p></div>"""
script = """<script type="text/JavaScript" src="https://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js?ver=1.4.2"></script><script type="text/javascript">function init() {var myCanvas = document.getElementById("myCanvas");var curColor = $(‘#selectColor option:selected‘).val();if(myCanvas){var isDown = false;var ctx = myCanvas.getContext("2d");var canvasX, canvasY;ctx.lineWidth = 5;$(myCanvas).mousedown(function(e){isDown = true;ctx.beginPath();var parentOffset = $(this).parent().offset(); canvasX = e.pageX - parentOffset.left;canvasY = e.pageY - parentOffset.top;ctx.moveTo(canvasX, canvasY);}).mousemove(function(e){if(isDown != false) {var parentOffset = $(this).parent().offset(); canvasX = e.pageX - parentOffset.left;canvasY = e.pageY - parentOffset.top;ctx.lineTo(canvasX, canvasY);ctx.strokeStyle = curColor;ctx.stroke();}}).mouseup(function(e){isDown = false;ctx.closePath();});}$(‘#selectColor‘).change(function () {curColor = $(‘#selectColor option:selected‘).val();});}init();function handle_output(out) {document.getElementById("result_output").value = out.content.data["text/plain"];}function classify() {var kernel = IPython.notebook.kernel;var myCanvas = document.getElementById("myCanvas");data = myCanvas.toDataURL(‘image/png‘);document.getElementById("result_output").value = "";kernel.execute("classify(‘" + data +"‘)",  { ‘iopub‘ : {‘output‘ : handle_output}}, {silent:false});}function myClear() {var myCanvas = document.getElementById("myCanvas");myCanvas.getContext("2d").clearRect(0, 0, myCanvas.width, myCanvas.height);}</script>"""


def norm_stat(d):
    """The statistics you want to see.
    We compute the L2 norm here but you can change it to anything you like."""
    return mx.nd.norm(d)/np.sqrt(d.size)
mon = mx.mon.Monitor(
    100,                 # Print every 100 batches
    norm_stat,           # The statistics function defined above
    pattern=.*weight,  # A regular expression. Only arrays with name matching this pattern will be included.
    sort=True)           # Sort output by name
model = mx.model.FeedForward(ctx = mx.gpu(0), symbol = mlp, num_epoch = 1,
                             learning_rate = 0.1, momentum = 0.9, wd = 0.00001)
model.fit(X=train_iter, eval_data=test_iter, monitor=mon,  # Set the monitor here
          batch_end_callback = mx.callback.Speedometer(100, 100))




# ==================Binding=====================
# The symbol we created is only a graph description.
# To run it, we first need to allocate memory and create an executor by ‘binding‘ it.
# In order to bind a symbol, we need at least two pieces of information: context and input shapes.
# Context specifies which device the executor runs on, e.g. cpu, GPU0, GPU1, etc.
# Input shapes define the executor‘s input array dimensions.
# MXNet then run automatic shape inference to determine the dimensions of intermediate and output arrays.

# data iterators defines shapes of its output with provide_data and provide_label property.
input_shapes = dict(train_iter.provide_data+train_iter.provide_label)
print input_shapes, input_shapes
# We use simple_bind to let MXNet allocate memory for us.
# You can also allocate memory youself and use bind to pass it to MXNet.
exe = mlp.simple_bind(ctx=mx.gpu(0), **input_shapes)

# ===============Initialization=================
# First we get handle to input arrays
arg_arrays = dict(zip(mlp.list_arguments(), exe.arg_arrays))
data = arg_arrays[train_iter.provide_data[0][0]]
label = arg_arrays[train_iter.provide_label[0][0]]

# We initialize the weights with uniform distribution on (-0.01, 0.01).
init = mx.init.Uniform(scale=0.01)
for name, arr in arg_arrays.items():
    if name not in input_shapes:
        init(name, arr)
# We also need to create an optimizer for updating weights
opt = mx.optimizer.SGD(
updater = mx.optimizer.get_updater(opt)

# Finally we need a metric to print out training progress
metric = mx.metric.Accuracy()

# Training loop begines
for epoch in range(10):
    t = 0
    for batch in train_iter:
        # Copy data to executor input. Note the [:].
        data[:] = batch.data[0]
        label[:] = batch.label[0]
        # Forward
        # You perform operations on exe.outputs here if you need to.
        # For example, you can stack a CRF on top of a neural network.
        # Backward
        # Update
        for i, pair in enumerate(zip(exe.arg_arrays, exe.grad_arrays)):
            weight, grad = pair
            updater(i, grad, weight)
        metric.update(batch.label, exe.outputs)
        t += 1
        if t % 100 == 0:
            print epoch:, epoch, iter:, t, metric:, metric.get()



# Define custom softmax operator
class NumpySoftmax(mx.operator.NumpyOp):
    def __init__(self):
        # Call the parent class constructor. 
        # Because NumpySoftmax is a loss layer, it doesn‘t need gradient input from layers above.
        super(NumpySoftmax, self).__init__(need_top_grad=False)
    def list_arguments(self):
        # Define the input to NumpySoftmax.
        return [data, label]

    def list_outputs(self):
        # Define the output.
        return [output]

    def infer_shape(self, in_shape):
        # Calculate the dimensions of the output (and missing inputs) from (some) input shapes.
        data_shape = in_shape[0]  # shape of first argument ‘data‘
        label_shape = (in_shape[0][0],)  # ‘label‘ should be one dimensional and has batch_size instances.
        output_shape = in_shape[0] # ‘output‘ dimension is the same as the input.
        return [data_shape, label_shape], [output_shape]

    def forward(self, in_data, out_data):
        x = in_data[0]  # ‘data‘
        y = out_data[0]  # ‘output‘
        # Compute softmax
        y[:] = np.exp(x - x.max(axis=1).reshape((x.shape[0], 1)))
        y /= y.sum(axis=1).reshape((x.shape[0], 1))

    def backward(self, out_grad, in_data, out_data, in_grad):
        l = in_data[1]  # ‘label‘
        l = l.reshape((l.size,)).astype(np.int)  # cast to int
        y = out_data[0]  # ‘output‘
        dx = in_grad[0]  # gradient for ‘data‘
        # Compute gradient
        dx[:] = y
        dx[np.arange(l.shape[0]), l] -= 1.0

numpy_softmax = NumpySoftmax()

data = mx.symbol.Variable(data)
fc1 = mx.symbol.FullyConnected(data = data, name=fc1, num_hidden=128)
act1 = mx.symbol.Activation(data = fc1, name=relu1, act_type="relu")
fc2 = mx.symbol.FullyConnected(data = act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name=relu2, act_type="relu")
fc3 = mx.symbol.FullyConnected(data = act2, name=fc3, num_hidden=10)
# Use the new operator we just defined instead of the standard softmax operator.
mlp = numpy_softmax(data=fc3, name = softmax)

model = mx.model.FeedForward(ctx = mx.gpu(0), symbol = mlp, num_epoch = 2,
                             learning_rate = 0.1, momentum = 0.9, wd = 0.00001)
model.fit(X=train_iter, eval_data=test_iter,
          batch_end_callback = mx.callback.Speedometer(100, 100))



#!/usr/bin/env python2
# -*- coding: utf-8 -*-
Created on Thu Mar 30 15:35:02 2017

@author: root
from __future__ import print_function
import sys
import os
# code to automatically download dataset
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path = [os.path.join(curr_path, "../autoencoder")] + sys.path
import mxnet as mx
import numpy as np
import data
from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans
import model
from autoencoder import AutoEncoderModel
from solver import Solver, Monitor
import logging
import time
global YT
import scipy.io as sio  
import matplotlib.pyplot as plt 
# ==================start setting My-layer=====================
class NumpySoftmax(mx.operator.NumpyOp):
    def __init__(self):
        # Call the parent class constructor. 
        # Because NumpySoftmax is a loss layer, it doesn‘t need gradient input from layers above.
        super(NumpySoftmax, self).__init__(need_top_grad=False)
    def list_arguments(self):
        # Define the input to NumpySoftmax.
        return [data, label]

    def list_outputs(self):
        # Define the output.
        return [output]

    def infer_shape(self, in_shape):
        # Calculate the dimensions of the output (and missing inputs) from (some) input shapes.
        data_shape = in_shape[0]  # shape of first argument ‘data‘
        label_shape = (in_shape[0][0],)  # ‘label‘ should be one dimensional and has batch_size instances.
        output_shape = in_shape[0] # ‘output‘ dimension is the same as the input.
        return [data_shape, label_shape], [output_shape]

    def forward(self, in_data, out_data):
        z = in_data[0]
        q= out_data[0]  # ‘output‘
        kmeans = KMeans(n_clusters=10, random_state=170).fit(z)
        # Compute softmax
        mask = 1.0/(1.0+cdist(z, mu)**2/alpha)
        q[:] = mask**((alpha+1.0)/2.0)
        q[:] = (q.T/q.sum(axis=1)).T

    def backward(self, out_grad, in_data, out_data, in_grad):
        x = in_data[0]  # ‘label‘
        y = out_data[0]  # ‘output‘
        dx = in_grad[0]  # gradient for ‘data‘
        kmeans = KMeans(n_clusters=10, random_state=170).fit(x)
        mask = 1.0/(1.0+cdist(x, mu)**2/alpha)
        p = mask**((alpha+1.0)/2.0)
        mask*= (alpha+1.0)/alpha*(p-y)
        dx[:] = (x.T*mask.sum(axis=1)).T - mask.dot(mu)
#======================end setting==========================
# ==================start of the process of data=====================
X, Y = data.get_mnist()
X_train = X[:60000]
X_test = X[60000:]
Y_train = Y[:60000]
Y_test = Y[60000:]
numpy_softmax = NumpySoftmax()
batch_size = 100
#the office code to create iter
train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size)
test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size)
input_shapes = dict(train_iter.provide_data+train_iter.provide_label)
# ==================end of the process=====================
# ==================start of setting the net=====================
data = mx.symbol.Variable(data)
fc1 = mx.symbol.FullyConnected(data = data, name=fc1, num_hidden=128)
act1 = mx.symbol.Activation(data = fc1, name=relu1, act_type="relu")
fc2 = mx.symbol.FullyConnected(data = act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(data = fc2, name=relu2, act_type="relu")
fc3 = mx.symbol.FullyConnected(data = act2, name=fc3, num_hidden=10)
mlp = numpy_softmax(data=fc3, name = softmax)
# ==================start of setting the net=====================
exe = mlp.simple_bind(ctx=mx.gpu(0), **input_shapes)
# ===============Initialization=================
# First we get handle to input arrays
arg_arrays = dict(zip(mlp.list_arguments(), exe.arg_arrays))
data = arg_arrays[train_iter.provide_data[0][0]]
label = arg_arrays[train_iter.provide_label[0][0]]

# We initialize the weights with uniform distribution on (-0.01, 0.01).
init = mx.init.Uniform(scale=0.01)
for name, arr in arg_arrays.items():
    if name not in input_shapes:
        init(name, arr)
# We also need to create an optimizer for updating weights
opt = mx.optimizer.SGD(
updater = mx.optimizer.get_updater(opt)

# Finally we need a metric to print out training progress
metric = mx.metric.Accuracy()

# Training loop begines
for epoch in range(10):
    t = 0
    for batch in train_iter:
        # Copy data to executor input. Note the [:].
        data[:] = batch.data[0]
        label[:] = batch.label[0]
        # Forward
        # You perform operations on exe.outputs here if you need to.
        # For example, you can stack a CRF on top of a neural network.
        # Backward
        # Update
        for i, pair in enumerate(zip(exe.arg_arrays, exe.grad_arrays)):
            weight, grad = pair
            updater(i, grad, weight)
        metric.update(batch.label, exe.outputs)
        t += 1
        if t % 100 == 0:
            print(epoch:, epoch, iter:, t, metric:, metric.get())



