3D点云深度学习PointNet源码解析——pointnet_cls.py.py

参考博客：#这个文件实现了网络的分类结构。输出为B*40，是每个样本对于每个类别的概率。网络结构在get_model()中定义，loss则在get_loss中定义import tensorflow as tfimport numpy as npimport mathimport sysimport osBASE_DIR = os.path.dirname(os.pa...

夜晓岚渺渺

1159人浏览 · 2019-10-25 10:22:31

夜晓岚渺渺 · 2019-10-25 10:22:31 发布

参考博客：

#这个文件实现了网络的分类结构。输出为B*40，是每个样本对于每个类别的概率。网络结构在get_model()中定义，loss则在get_loss中定义
import tensorflow as tf
import numpy as np
import math
import sys
import os
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, '../utils'))
import tf_util
from transform_nets import input_transform_net, feature_transform_net

# tf.placeholder(dtype, shape=None, name=None)
# placeholder，占位符，在tensorflow中类似于函数参数，运行时必须传入值。
# #placeholder_inputs 函数返回点云及其标签的placeholder。返回两个tensor变量。
# pointclouds_pl 标记每个 batch中，batch的大小(点云数目),每个点云点的数目，每个点的维度
# labels_pl ：标记 batch的大小
def placeholder_inputs(batch_size, num_point):
    pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
    labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
    return pointclouds_pl, labels_pl


def get_model(point_cloud, is_training, bn_decay=None):
    """ Classification PointNet, input is BxNx3, output Bx40 """
    batch_size = point_cloud.get_shape()[0].value
    num_point = point_cloud.get_shape()[1].value
    end_points = {}

    # 创建一个命名空间，名字为：transform_net1。然后在作用域下定义一个变量 transform(该变量可以在后面使用)
    # tf.variable_scope(<scope_name>) 必须要在tf.variable_scope的作用域下使用tf.get_variable()函数
    with tf.variable_scope('transform_net1') as sc:
        transform = input_transform_net(point_cloud, is_training, bn_decay, K=3)
    
    # 两个三维矩阵的乘法怎样计算呢?我通过实验发现，tensorflow把前面的维度当成是batch，对最后两维进行普通的矩阵乘法。 
    # 也就是说，最后两维之前的维度，都需要相同。
    point_cloud_transformed = tf.matmul(point_cloud, transform)
    
    input_image = tf.expand_dims(point_cloud_transformed, -1) #扩展成 4D 张量，在最后增加一维

    net = tf_util.conv2d(input_image, 64, [1,3],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv1', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv2', bn_decay=bn_decay)

    with tf.variable_scope('transform_net2') as sc:
        transform = feature_transform_net(net, is_training, bn_decay, K=64)
    end_points['transform'] = transform  #end_points 用于存储张量 transform 的信息。是一个字典？

    # tf.squeeze( ): 默认从tensor中删除所有大小是1的维度。tf.squeeze(net, axis=[2]) 移除第三维，因为维度的开始索引为0
    net_transformed = tf.matmul(tf.squeeze(net, axis=[2]), transform)

    # 给定张量输入，此操作在输入形状的维度索引轴处插入1的尺寸。 尺寸索引轴从零开始; 如果您指定轴的负数，则从最后向后计数。
    # tf.expand_dims(input, axis=None, name=None, dim=None) 在第axis位置增加一个维度
    net_transformed = tf.expand_dims(net_transformed, [2]) #第三个索引增加一个维度
   
    net = tf_util.conv2d(net_transformed, 64, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv3', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 128, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv4', bn_decay=bn_decay)
    net = tf_util.conv2d(net, 1024, [1,1],
                         padding='VALID', stride=[1,1],
                         bn=True, is_training=is_training,
                         scope='conv5', bn_decay=bn_decay)

    # Symmetric function: max pooling
    net = tf_util.max_pool2d(net, [num_point,1],
                             padding='VALID', scope='maxpool')

    net = tf.reshape(net, [batch_size, -1])
    net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
                                  scope='fc1', bn_decay=bn_decay)
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp1')
    net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
                                  scope='fc2', bn_decay=bn_decay)
    net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
                          scope='dp2')
    net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')

    return net, end_points


def get_loss(pred, label, end_points, reg_weight=0.001):
    """ pred: B*NUM_CLASSES,
        label: B, """
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
    classify_loss = tf.reduce_mean(loss)
    tf.summary.scalar('classify loss', classify_loss)

    # Enforce the transformation as orthogonal matrix
    transform = end_points['transform'] # BxKxK
    K = transform.get_shape()[1].value
    mat_diff = tf.matmul(transform, tf.transpose(transform, perm=[0,2,1]))
    mat_diff -= tf.constant(np.eye(K), dtype=tf.float32)
    mat_diff_loss = tf.nn.l2_loss(mat_diff) 
    tf.summary.scalar('mat loss', mat_diff_loss)

    return classify_loss + mat_diff_loss * reg_weight


if __name__=='__main__':
    with tf.Graph().as_default():
        inputs = tf.zeros((32,1024,3))
        outputs = get_model(inputs, tf.constant(True))
        print(outputs)