51、深度学习-自学之路-自己搭建深度学习框架-12、使用我们自己建的架构重写RNN预测网络
总结:虽然预测值和真实值有差距,但是在整体的句式上是正确的。填写的次是一个正确的词,不想以前那么的混乱了。
·
import numpy as np
class Tensor(object):
def __init__(self, data,
autograd=False,
creators=None,
creation_op=None,
id=None):
self.data = np.array(data)
self.autograd = autograd
self.grad = None
if (id is None):
self.id = np.random.randint(0, 100000)
else:
self.id = id
self.creators = creators
self.creation_op = creation_op
self.children = {}
if (creators is not None):
for c in creators:
if (self.id not in c.children):
c.children[self.id] = 1
else:
c.children[self.id] += 1
def all_children_grads_accounted_for(self):
for id, cnt in self.children.items():
if (cnt != 0):
return False
return True
def backward(self, grad=None, grad_origin=None):
if (self.autograd):
if (grad is None):
grad = Tensor(np.ones_like(self.data))
if (grad_origin is not None):
if (self.children[grad_origin.id] == 0):
raise Exception("cannot backprop more than once")
else:
self.children[grad_origin.id] -= 1
if (self.grad is None):
self.grad = grad
else:
self.grad += grad
# grads must not have grads of their own
assert grad.autograd == False
# only continue backpropping if there's something to
# backprop into and if all gradients (from children)
# are accounted for override waiting for children if
# "backprop" was called on this variable directly
if (self.creators is not None and
(self.all_children_grads_accounted_for() or
grad_origin is None)):
if (self.creation_op == "add"):
self.creators[0].backward(self.grad, self)
self.creators[1].backward(self.grad, self)
if (self.creation_op == "sub"):
self.creators[0].backward(Tensor(self.grad.data), self)
self.creators[1].backward(Tensor(self.grad.__neg__().data), self)
if (self.creation_op == "mul"):
new = self.grad * self.creators[1]
self.creators[0].backward(new, self)
new = self.grad * self.creators[0]
self.creators[1].backward(new, self)
if (self.creation_op == "mm"):
c0 = self.creators[0]
c1 = self.creators[1]
new = self.grad.mm(c1.transpose())
c0.backward(new)
new = self.grad.transpose().mm(c0).transpose()
c1.backward(new)
if (self.creation_op == "transpose"):
self.creators[0].backward(self.grad.transpose())
if ("sum" in self.creation_op):
dim = int(self.creation_op.split("_")[1])
self.creators[0].backward(self.grad.expand(dim,
self.creators[0].data.shape[dim]))
if ("expand" in self.creation_op):
dim = int(self.creation_op.split("_")[1])
self.creators[0].backward(self.grad.sum(dim))
if (self.creation_op == "neg"):
self.creators[0].backward(self.grad.__neg__())
if (self.creation_op == "sigmoid"):
ones = Tensor(np.ones_like(self.grad.data))
self.creators[0].backward(self.grad * (self * (ones - self)))
if (self.creation_op == "tanh"):
ones = Tensor(np.ones_like(self.grad.data))
self.creators[0].backward(self.grad * (ones - (self * self)))
if (self.creation_op == "index_select"):
new_grad = np.zeros_like(self.creators[0].data)
indices_ = self.index_select_indices.data.flatten()
grad_ = grad.data.reshape(len(indices_), -1)
for i in range(len(indices_)):
new_grad[indices_[i]] += grad_[i]
self.creators[0].backward(Tensor(new_grad))
if (self.creation_op == "cross_entropy"):
dx = self.softmax_output - self.target_dist
self.creators[0].backward(Tensor(dx))
def __add__(self, other):
if (self.autograd and other.autograd):
return Tensor(self.data + other.data,
autograd=True,
creators=[self, other],
creation_op="add")
return Tensor(self.data + other.data)
def __neg__(self):
if (self.autograd):
return Tensor(self.data * -1,
autograd=True,
creators=[self],
creation_op="neg")
return Tensor(self.data * -1)
def __sub__(self, other):
if (self.autograd and other.autograd):
return Tensor(self.data - other.data,
autograd=True,
creators=[self, other],
creation_op="sub")
return Tensor(self.data - other.data)
def __mul__(self, other):
if (self.autograd and other.autograd):
return Tensor(self.data * other.data,
autograd=True,
creators=[self, other],
creation_op="mul")
return Tensor(self.data * other.data)
def sum(self, dim):
if (self.autograd):
return Tensor(self.data.sum(dim),
autograd=True,
creators=[self],
creation_op="sum_" + str(dim))
return Tensor(self.data.sum(dim))
def expand(self, dim, copies):
trans_cmd = list(range(0, len(self.data.shape)))
trans_cmd.insert(dim, len(self.data.shape))
new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
if (self.autograd):
return Tensor(new_data,
autograd=True,
creators=[self],
creation_op="expand_" + str(dim))
return Tensor(new_data)
def transpose(self):
if (self.autograd):
return Tensor(self.data.transpose(),
autograd=True,
creators=[self],
creation_op="transpose")
return Tensor(self.data.transpose())
def mm(self, x):
if (self.autograd):
return Tensor(self.data.dot(x.data),
autograd=True,
creators=[self, x],
creation_op="mm")
return Tensor(self.data.dot(x.data))
def sigmoid(self):
if (self.autograd):
return Tensor(1 / (1 + np.exp(-self.data)),
autograd=True,
creators=[self],
creation_op="sigmoid")
return Tensor(1 / (1 + np.exp(-self.data)))
def tanh(self):
if (self.autograd):
return Tensor(np.tanh(self.data),
autograd=True,
creators=[self],
creation_op="tanh")
return Tensor(np.tanh(self.data))
def index_select(self, indices):
if (self.autograd):
new = Tensor(self.data[indices.data],
autograd=True,
creators=[self],
creation_op="index_select")
new.index_select_indices = indices
return new
return Tensor(self.data[indices.data])
def cross_entropy(self, target_indices):
temp = np.exp(self.data)
softmax_output = temp / np.sum(temp,
axis=len(self.data.shape) - 1,
keepdims=True)
t = target_indices.data.flatten()
p = softmax_output.reshape(len(t), -1)
target_dist = np.eye(p.shape[1])[t]
loss = -(np.log(p) * (target_dist)).sum(1).mean()
if (self.autograd):
out = Tensor(loss,
autograd=True,
creators=[self],
creation_op="cross_entropy")
out.softmax_output = softmax_output
out.target_dist = target_dist
return out
return Tensor(loss)
def __repr__(self):
return str(self.data.__repr__())
def __str__(self):
return str(self.data.__str__())
class Layer(object):
def __init__(self):
self.parameters = list()
def get_parameters(self):
return self.parameters
class Tanh(Layer):
def __init__(self):
super().__init__()
def forward(self, input):
return input.tanh()
class Sigmoid(Layer):
def __init__(self):
super().__init__()
def forward(self, input):
return input.sigmoid()
class CrossEntropyLoss(object):
def __init__(self):
super().__init__()
def forward(self, input, target):
return input.cross_entropy(target)
class Sequential(Layer):
def __init__(self, layers=list()):
super().__init__()
self.layers = layers
def add(self, layer):
self.layers.append(layer)
def forward(self, input):
for layer in self.layers:
input = layer.forward(input)
return input
def get_parameters(self):
params = list()
for l in self.layers:
params += l.get_parameters()
return params
class Embedding(Layer):
def __init__(self, vocab_size, dim):
super().__init__()
self.vocab_size = vocab_size
self.dim = dim
# this random initialiation style is just a convention from word2vec
self.weight = Tensor((np.random.rand(vocab_size, dim) - 0.5) / dim, autograd=True)
self.parameters.append(self.weight)
def forward(self, input):
return self.weight.index_select(input)
class Linear(Layer):
def __init__(self, n_inputs, n_outputs):
super().__init__()
W = np.random.randn(n_inputs, n_outputs) * np.sqrt(2.0 / (n_inputs))
self.weight = Tensor(W, autograd=True)
self.bias = Tensor(np.zeros(n_outputs), autograd=True)
self.parameters.append(self.weight)
self.parameters.append(self.bias)
def forward(self, input):
return input.mm(self.weight) + self.bias.expand(0, len(input.data))
class MSELoss(Layer):
def __init__(self):
super().__init__()
def forward(self, pred, target):
return ((pred - target) * (pred - target)).sum(0)
class SGD(object):
def __init__(self, parameters, alpha=0.1):
self.parameters = parameters
self.alpha = alpha
def zero(self):
for p in self.parameters:
p.grad.data *= 0
def step(self, zero=True):
for p in self.parameters:
p.data -= p.grad.data * self.alpha
if (zero):
p.grad.data *= 0
class RNNCell(Layer):
def __init__(self, n_inputs, n_hidden, n_output, activation='sigmoid'):
super().__init__()
self.n_inputs = n_inputs
self.n_hidden = n_hidden
self.n_output = n_output
if (activation == 'sigmoid'):
self.activation = Sigmoid()
elif (activation == 'tanh'):
self.activation == Tanh()
else:
raise Exception("Non-linearity not found")
self.w_ih = Linear(n_inputs, n_hidden)
self.w_hh = Linear(n_hidden, n_hidden)
self.w_ho = Linear(n_hidden, n_output)
self.parameters += self.w_ih.get_parameters()
self.parameters += self.w_hh.get_parameters()
self.parameters += self.w_ho.get_parameters()
def forward(self, input, hidden):
from_prev_hidden = self.w_hh.forward(hidden)
combined = self.w_ih.forward(input) + from_prev_hidden
new_hidden = self.activation.forward(combined)
output = self.w_ho.forward(new_hidden)
return output, new_hidden
def init_hidden(self, batch_size=1):
return Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
import sys, random, math
from collections import Counter
import numpy as np
f = open('qa1_single-supporting-fact_train.txt', 'r')
raw = f.readlines()
f.close()
tokens = list()
for line in raw[0:1000]:
tokens.append(line.lower().replace("\n", "").split(" ")[1:])
new_tokens = list()
for line in tokens:
new_tokens.append(['-'] * (6 - len(line)) + line)
tokens = new_tokens
vocab = set()
for sent in tokens:
for word in sent:
vocab.add(word)
vocab = list(vocab)
word2index = {}
for i, word in enumerate(vocab):
word2index[word] = i
def words2indices(sentence):
idx = list()
for word in sentence:
idx.append(word2index[word])
return idx
indices = list()
for line in tokens:
idx = list()
for w in line:
idx.append(word2index[w])
indices.append(idx)
data = np.array(indices)
embed = Embedding(vocab_size=len(vocab),dim=16)
model = RNNCell(n_inputs=16, n_hidden=16, n_output=len(vocab))
criterion = CrossEntropyLoss()
optim = SGD(parameters=model.get_parameters() + embed.get_parameters(), alpha=0.05)
for iter in range(1000):
batch_size = 100
total_loss = 0
hidden = model.init_hidden(batch_size=batch_size)
for t in range(5):
input = Tensor(data[0:batch_size, t], autograd=True)
rnn_input = embed.forward(input=input)
output, hidden = model.forward(input=rnn_input, hidden=hidden)
target = Tensor(data[0:batch_size, t + 1], autograd=True)
loss = criterion.forward(output, target)
loss.backward()
optim.step()
total_loss += loss.data
if (iter % 200 == 0):
p_correct = (target.data == np.argmax(output.data, axis=1)).mean()
print("Loss:", total_loss / (len(data) / batch_size), "% Correct:", p_correct)
batch_size = 1
hidden = model.init_hidden(batch_size=batch_size)
for t in range(5):
input = Tensor(data[0:batch_size,t], autograd=True)
rnn_input = embed.forward(input=input)
output, hidden = model.forward(input=rnn_input, hidden=hidden)
target = Tensor(data[0:batch_size,t+1], autograd=True)
loss = criterion.forward(output, target)
ctx = ""
for idx in data[0:batch_size][0][0:-1]:
ctx += vocab[idx] + " "
print("Context:",ctx)
print("True:",vocab[target.data[0]])
print("Pred:", vocab[output.data.argmax()])
''' 第一次
Loss: 0.4680828278085011 % Correct: 0.0
Loss: 0.17895626941023882 % Correct: 0.23
Loss: 0.1606657974044729 % Correct: 0.3
Loss: 0.1481854218501178 % Correct: 0.32
Loss: 0.13960603129533444 % Correct: 0.35
Context: - mary moved to the
True: bathroom.
Pred: bathroom.
'''
'''第二次
Loss: 0.4554923906553056 % Correct: 0.01
Loss: 0.17450458457970364 % Correct: 0.23
Loss: 0.1537305632182028 % Correct: 0.33
Loss: 0.13882016326307411 % Correct: 0.36
Loss: 0.13465901151417053 % Correct: 0.37
Context: - mary moved to the
True: bathroom.
Pred: office.
'''
'''第三次
Loss: 0.45696131353100666 % Correct: 0.12
Loss: 0.17446651127257118 % Correct: 0.27
Loss: 0.16225291144270232 % Correct: 0.28
Loss: 0.1417173151945064 % Correct: 0.34
Loss: 0.13637942677769582 % Correct: 0.37
Context: - mary moved to the
True: bathroom.
Pred: hallway.
'''
'''第四次
Loss: 0.4449260906841651 % Correct: 0.0
Loss: 0.1782109486619849 % Correct: 0.23
Loss: 0.1496331404381601 % Correct: 0.35
Loss: 0.14350842163988237 % Correct: 0.34
Loss: 0.13665930525935824 % Correct: 0.37
Context: - mary moved to the
True: bathroom.
Pred: hallway.
'''
'''第五次
Loss: 0.45827573579339315 % Correct: 0.0
Loss: 0.1756007557865982 % Correct: 0.23
Loss: 0.15933848432214442 % Correct: 0.31
Loss: 0.142949504390499 % Correct: 0.34
Loss: 0.13783751879604417 % Correct: 0.35
Context: - mary moved to the
True: bathroom.
Pred: office.
'''
'''
总结:虽然预测值和真实值有差距,但是在整体的句式上是正确的。填写的次是一个正确的词,不想以前那么的混乱了。
'''
更多推荐




所有评论(0)