用户
 找回密码
 立即注册
herbiezhao 该用户已被删除
发表于 2019-3-20 09:24:35
105200
使用tensorrt对resnet_v1_50分类网络进行加速,使用tensorflow对图片数据进行预处理,CUDA error出现在common.do_inference函数中。
do_inference函数代码,出错代码行context.execute_async:
def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU.
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    # Run inference.
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
    # Transfer predictions back from the GPU.
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    # Synchronize the stream
    stream.synchronize()
    # Return only the host outputs.
    return [out.host for out in outputs]


主程序代码:
from random import randint
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import sys, os
import common
from preprocessing import preprocessing_factory
from nets import nets_factory
import tensorflow as tf
import time

# You can set the logger severity higher to suppress messages (or lower to display more messages).
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

class ModelData(object):
    MODEL_FILE = os.path.join(os.path.dirname(__file__), "model.uff")
    INPUT_NAME ="input"
    INPUT_SHAPE = (3, 224, 224)
    OUTPUT_NAME = "resnet_v1_50/SpatialSqueeze"

def build_engine(model_file):
    # For more information on TRT basics, refer to the introductory samples.
    print("build engine begin\n")
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_batch_size = 1
        #builder.int8_mode = True
        #builder.fp16_mode = True
        builder.max_workspace_size = common.GiB(1)
        # Parse the Uff Network
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE, trt.UffInputOrder.NHWC)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

def main():
    #data_path = common.find_sample_data(description="Runs an MNIST network using a UFF model file", subfolder="mnist")
    model_file = ModelData.MODEL_FILE

    g = tf.Graph()
    sess = tf.Session(graph=g)
    with g.as_default():
        image = tf.placeholder(tf.uint8, shape=[None, None, 3])
        model_name = "resnet_v1_50"
        network_fn = nets_factory.get_network_fn(model_name, 8, is_training=False)
        image_size = network_fn.default_image_size
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(model_name,is_training=False)
        arg_scope = nets_factory.arg_scopes_map[model_name]()
        pre_image = image_preprocessing_fn(image, image_size, image_size)
        input = tf.expand_dims(pre_image, 0)

        with build_engine(model_file) as engine:
            # Build an engine, allocate buffers and create a stream.
            # For more information on buffer allocation, refer to the introductory samples.
            inputs, outputs, bindings, stream = common.allocate_buffers(engine)
            pagelocked_buffer=inputs[0].host

            with engine.create_execution_context() as context:
                image_path = os.path.join("test.bmp")
                img = Image.open(image_path)
                img = np.array(img)

                input_ = sess.run(pre_image, feed_dict={image: img})
                input_ = np.array(input_.ravel())
                np.copyto(pagelocked_buffer, input_)
                [output] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
if __name__ == '__main__':
    main()


使用道具 举报 回复
发新帖
您需要登录后才可以回帖 登录 | 立即注册