Face embeddings with deepstream peoplenet

Please provide complete information as applicable to your setup.

• Hardware Platform (Jetson / GPU)
rtx 4060
• DeepStream Version
ds7.1
• JetPack Version (valid for Jetson only)
• TensorRT Version
default
• NVIDIA GPU Driver Version (valid for GPU only)
default
• Issue Type( questions, new requirements, bugs)
unable to print the embedding using sgie, pgie model using peoplenet
• How to reproduce the issue ? (This is for bugs. Including which sample app is using, the configuration files content, the command line used and other details for reproducing)
• Requirement details( This is for new requirement. Including the module name-for which plugin or for which sample application, the function description)

I will like to have a guide to implement resnet model into deepstream, I have successfully loaded the face-embeddings model as sgie.
The code below provided

import sys
sys.path.append('../')
import os
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GLib, Gst
from common.platform_info import PlatformInfo
from common.bus_call import bus_call

import pyds

PGIE_CLASS_ID_PERSON = 0
PGIE_CLASS_ID_BAG = 1
PGIE_CLASS_ID_FACE = 2
MUXER_BATCH_TIMEOUT_USEC = 33000

def osd_sink_pad_buffer_probe(pad,info,u_data):
    frame_number=0
    num_rects=0

    gst_buffer = info.get_buffer()
    if not gst_buffer:
        print("Unable to get GstBuffer")
        return

    # Retrieve batch metadata from the gst_buffer
    # Note that pyds.gst_buffer_get_nvds_batch_meta() expects the
    # C address of gst_buffer as input, which is obtained with hash(gst_buffer)
    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
    l_frame = batch_meta.frame_meta_list
    while l_frame is not None:
        try:
            # Note that l_frame.data needs a cast to pyds.NvDsFrameMeta
            # The casting is done by pyds.NvDsFrameMeta.cast()
            # The casting also keeps ownership of the underlying memory
            # in the C code, so the Python garbage collector will leave
            # it alone.
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break

        #Intiallizing object counter with 0.
        obj_counter = {
            PGIE_CLASS_ID_PERSON:0,
            PGIE_CLASS_ID_BAG:0,
            PGIE_CLASS_ID_FACE:0,
        }
        frame_number=frame_meta.frame_num
        num_rects = frame_meta.num_obj_meta
        l_obj=frame_meta.obj_meta_list
        while l_obj is not None:
            try:
                # Casting l_obj.data to pyds.NvDsObjectMeta
                obj_meta=pyds.NvDsObjectMeta.cast(l_obj.data)
            except StopIteration:
                break
            obj_counter[obj_meta.class_id] += 1
            obj_meta.rect_params.border_color.set(0.0, 0.0, 1.0, 0.8) #0.8 is alpha (opacity)
            try: 
                l_obj=l_obj.next
            except StopIteration:
                break

        # Acquiring a display meta object. The memory ownership remains in
        # the C code so downstream plugins can still access it. Otherwise
        # the garbage collector will claim it when this probe function exits.
        display_meta=pyds.nvds_acquire_display_meta_from_pool(batch_meta)
        display_meta.num_labels = 1
        py_nvosd_text_params = display_meta.text_params[0]
        # Setting display text to be shown on screen
        # Note that the pyds module allocates a buffer for the string, and the
        # memory will not be claimed by the garbage collector.
        # Reading the display_text field here will return the C address of the
        # allocated string. Use pyds.get_string() to get the string content.
        py_nvosd_text_params.display_text = "Frame Number={} Number of Objects={} Bag_count={} Face_count={}".format(frame_number, num_rects, obj_counter[PGIE_CLASS_ID_BAG], obj_counter[PGIE_CLASS_ID_FACE])

        # Now set the offsets where the string should appear
        py_nvosd_text_params.x_offset = 10
        py_nvosd_text_params.y_offset = 12

        # Font , font-color and font-size
        py_nvosd_text_params.font_params.font_name = "Serif"
        py_nvosd_text_params.font_params.font_size = 10
        # set(red, green, blue, alpha); set to White
        py_nvosd_text_params.font_params.font_color.set(1.0, 1.0, 1.0, 1.0)

        # Text background color
        py_nvosd_text_params.set_bg_clr = 1
        # set(red, green, blue, alpha); set to Black
        py_nvosd_text_params.text_bg_clr.set(0.0, 0.0, 0.0, 1.0)
        # Using pyds.get_string() to get display_text as string
        # print(pyds.get_string(py_nvosd_text_params.display_text))
        pyds.nvds_add_display_meta_to_frame(frame_meta, display_meta)
        try:
            l_frame=l_frame.next
        except StopIteration:
            break
			
    return Gst.PadProbeReturn.OK	

def pgie_src_pad_buffer_probe(pad, info, u_data):
    gst_buffer = info.get_buffer()
    if not gst_buffer:
        return Gst.PadProbeReturn.OK

    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer))
    l_frame = batch_meta.frame_meta_list
    while l_frame is not None:
        try:
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break

        l_obj = frame_meta.obj_meta_list
        face_class_detected = False
        while l_obj is not None:
            try:
                obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
            except StopIteration:
                break

            if obj_meta.class_id == PGIE_CLASS_ID_FACE:
                face_class_detected = True
                break

            try:
                l_obj = l_obj.next
            except StopIteration:
                break

        # if not face_class_detected:
        #     # No class 1 detected, clear objects so sgie does nothing
        #     frame_meta.obj_meta_list = None
        #     frame_meta.num_obj_meta = 0

        try:
            l_frame = l_frame.next
        except StopIteration:
            break

    return Gst.PadProbeReturn.OK

def sgie_src_pad_buffer_probe(pad, info, u_data):
    # Get buffer and batch metadata
    buffer = info.get_buffer()
    if not buffer:
        print("Unable to get buffer")
        return Gst.PadProbeReturn.OK

    batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(buffer))
    if not batch_meta:
        print("No batch metadata available")
        return Gst.PadProbeReturn.OK

    # Iterate through frames in the batch
    l_frame = batch_meta.frame_meta_list
    while l_frame is not None:
        try:
            frame_meta = pyds.NvDsFrameMeta.cast(l_frame.data)
        except StopIteration:
            break

        # Iterate through objects in the frame
        l_obj = frame_meta.obj_meta_list
        while l_obj is not None:
            try:
                obj_meta = pyds.NvDsObjectMeta.cast(l_obj.data)
            except StopIteration:
                break

            if obj_meta.unique_component_id == 1:
                if obj_meta.class_id == 2:
                    l_user = obj_meta.obj_user_meta_list

            # print(obj_meta.unique_component_id)
            # print(obj_meta.class_id)
            # print(obj_meta.object_id)
            # print(obj_meta.confidence)
            # print(obj_meta.detector_bbox_info)
            # print(obj_meta.tracker_bbox_info)
            # print(obj_meta.tracker_confidence)
            # print(obj_meta.rect_params)
            # print(obj_meta.mask_params)
            # print(obj_meta.text_params)
            # print(obj_meta.obj_label)
            # print(obj_meta.classifier_meta_list)
            # print(obj_meta.obj_user_meta_list)
            # print(obj_meta.misc_obj_info)
            # print(obj_meta.reserved)
            
            # Filter for class ID 2 (faces)
            if obj_meta.class_id == 2:
                # Iterate through user metadata for tensor data
                l_user = obj_meta.obj_user_meta_list
                while l_user is not None:
                    try:
                        user_meta = pyds.NvDsUserMeta.cast(l_user.data)
                        # Check for tensor metadata from sgie (gie-unique-id=2)
                        if (user_meta.base_meta.meta_type == pyds.nvds_get_user_meta_type("NVIDIA.TENSOR_OUTPUT_USER_META") and
                            user_meta.base_meta.component_id == 2):
                            tensor_meta = pyds.NvDsTensorMeta.cast(user_meta.user_meta_data)
                            # Extract and print embeddings (assuming FLOAT data type)
                            for layer in tensor_meta.tensor_layer:
                                if layer.data_type == 0:  # FLOAT
                                    embedding = np.array(pyds.get_tensor_data_float(layer), dtype=np.float32)
                                    print(f"Frame {frame_meta.frame_num}, Object {obj_meta.object_id}, Face Embedding: {embedding.tolist()}")
                    except StopIteration:
                        break
                    try:
                        l_user = l_user.next
                    except StopIteration:
                        break
            try:
                l_obj = l_obj.next
            except StopIteration:
                break
        try:
            l_frame = l_frame.next
        except StopIteration:
            break

    return Gst.PadProbeReturn.OK

def main(args):
    # Check input arguments
    if len(args) != 2:
        sys.stderr.write("usage: %s <media file or uri>\n" % args[0])
        sys.exit(1)

    platform_info = PlatformInfo()
    # Standard GStreamer initialization
    Gst.init(None)

    # Create gstreamer elements
    # Create Pipeline element that will form a connection of other elements
    print("Creating Pipeline \n ")
    pipeline = Gst.Pipeline()

    if not pipeline:
        sys.stderr.write(" Unable to create Pipeline \n")

    # Source element for reading from the file
    print("Creating Source \n ")
    source = Gst.ElementFactory.make("filesrc", "file-source")
    if not source:
        sys.stderr.write(" Unable to create Source \n")

    # Since the data format in the input file is elementary h264 stream,
    # we need a h264parser
    print("Creating H264Parser \n")
    h264parser = Gst.ElementFactory.make("h264parse", "h264-parser")
    if not h264parser:
        sys.stderr.write(" Unable to create h264 parser \n")

    # Use nvdec_h264 for hardware accelerated decode on GPU
    print("Creating Decoder \n")
    decoder = Gst.ElementFactory.make("nvv4l2decoder", "nvv4l2-decoder")
    if not decoder:
        sys.stderr.write(" Unable to create Nvv4l2 Decoder \n")

    # Create nvstreammux instance to form batches from one or more sources.
    streammux = Gst.ElementFactory.make("nvstreammux", "Stream-muxer")
    if not streammux:
        sys.stderr.write(" Unable to create NvStreamMux \n")

    # Use nvinfer to run inferencing on decoder's output,
    # behaviour of inferencing is set through config file
    pgie = Gst.ElementFactory.make("nvinfer", "primary-inference")
    if not pgie:
        sys.stderr.write(" Unable to create pgie \n")

    sgie = Gst.ElementFactory.make("nvinfer", "secondary-inference")
    if not sgie:
        sys.stderr.write(" Unable to create sgie \n")

    # Use convertor to convert from NV12 to RGBA as required by nvosd
    nvvidconv = Gst.ElementFactory.make("nvvideoconvert", "convertor")
    if not nvvidconv:
        sys.stderr.write(" Unable to create nvvidconv \n")

    # Create OSD to draw on the converted RGBA buffer
    nvosd = Gst.ElementFactory.make("nvdsosd", "onscreendisplay")

    if not nvosd:
        sys.stderr.write(" Unable to create nvosd \n")

    # Finally render the osd output
    if platform_info.is_integrated_gpu():
        print("Creating nv3dsink \n")
        sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
        if not sink:
            sys.stderr.write(" Unable to create nv3dsink \n")
    else:
        if platform_info.is_platform_aarch64():
            print("Creating nv3dsink \n")
            sink = Gst.ElementFactory.make("nv3dsink", "nv3d-sink")
        else:
            print("Creating EGLSink \n")
            sink = Gst.ElementFactory.make("nveglglessink", "nvvideo-renderer")
        if not sink:
            sys.stderr.write(" Unable to create egl sink \n")

    print("Playing file %s " %args[1])
    source.set_property('location', args[1])
    if os.environ.get('USE_NEW_NVSTREAMMUX') != 'yes': # Only set these properties if not using new gst-nvstreammux
        streammux.set_property('width', 1920)
        streammux.set_property('height', 1080)
        streammux.set_property('batched-push-timeout', MUXER_BATCH_TIMEOUT_USEC)
    
    streammux.set_property('batch-size', 1)
    pgie.set_property('config-file-path', "dstest1_pgie_config.txt")
    sgie.set_property('config-file-path', "dstest1_sgie_config.txt")

    print("Adding elements to Pipeline \n")
    pipeline.add(source)
    pipeline.add(h264parser)
    pipeline.add(decoder)
    pipeline.add(streammux)
    pipeline.add(pgie)
    pipeline.add(sgie)
    pipeline.add(nvvidconv)
    pipeline.add(nvosd)
    pipeline.add(sink)

    # we link the elements together
    # file-source -> h264-parser -> nvh264-decoder ->
    # nvinfer -> nvvidconv -> nvosd -> video-renderer
    print("Linking elements in the Pipeline \n")
    source.link(h264parser)
    h264parser.link(decoder)

    # Get the source pad of decoder and link it to streammux's sink pad
    sinkpad = streammux.get_request_pad("sink_0")
    if not sinkpad:
        sys.stderr.write(" Unable to get the sink pad of streammux\n")

    srcpad = decoder.get_static_pad("src")
    if not srcpad:
        sys.stderr.write(" Unable to get source pad of decoder\n")

    # Link the decoder's src pad to streammux's sink pad
    if srcpad.link(sinkpad) != Gst.PadLinkReturn.OK:
        sys.stderr.write(" Failed to link decoder to streammux\n")

    # Link the remaining elements in the pipeline
    if not streammux.link(pgie):
        sys.stderr.write(" Failed to link streammux to pgie\n")
    if not pgie.link(sgie):
        sys.stderr.write(" Failed to link pgie to sgie\n")
    if not sgie.link(nvvidconv):
        sys.stderr.write(" Failed to link sgie to nvvidconv\n")
    if not nvvidconv.link(nvosd):
        sys.stderr.write(" Failed to link nvvidconv to nvosd\n")
    if not nvosd.link(sink):
        sys.stderr.write(" Failed to link nvosd to sink\n")

    sgie_srcpad = sgie.get_static_pad("src")
    if not sgie_srcpad:
        sys.stderr.write(" Unable to get src pad of sgie\n")
    else:
        sgie_srcpad.add_probe(Gst.PadProbeType.BUFFER, sgie_src_pad_buffer_probe, 0)

    # create an event loop and feed gstreamer bus mesages to it
    loop = GLib.MainLoop()
    bus = pipeline.get_bus()
    bus.add_signal_watch()
    bus.connect ("message", bus_call, loop)

    # Lets add probe to get informed of the meta data generated, we add probe to
    # the sink pad of the osd element, since by that time, the buffer would have
    # had got all the metadata.
    osdsinkpad = nvosd.get_static_pad("sink")
    if not osdsinkpad:
        sys.stderr.write(" Unable to get sink pad of nvosd \n")

    osdsinkpad.add_probe(Gst.PadProbeType.BUFFER, osd_sink_pad_buffer_probe, 0)

    # start play back and listen to events
    print("Starting pipeline \n")
    pipeline.set_state(Gst.State.PLAYING)
    try:
        loop.run()
    except:
        pass
    # cleanup
    pipeline.set_state(Gst.State.NULL)

if __name__ == '__main__':
    sys.exit(main(sys.argv))

This is my sgie config file

[property]
gpu-id=0
onnx-file=./recognition_resnet27.onnx
model-engine-file=/home/wesenkhoo2/Documents/deepstream_python_apps/apps/deepstream-test1/recognition_resnet27.onnx_b1_gpu0_fp16.engine
batch-size=1
network-mode=3
gie-unique-id=2
interval=0
net-scale-factor=0.00392156862745098
operate-on-class-ids=2

# Feature extraction mode (no bbox or class parsing)
network-type=100
output-tensor-meta=1

You need to parse the tensor_output_data after the inference based on your model.
You can refer to our sample deepstream_ssd_parser.py to get the tensor_output_data.

<capsule object NULL at 0x7f50266cd200>

I get something like this, which means there’s nothing inside the inference?

def preprocess_image_for_onnx(image: np.ndarray, size=(128, 128)):
    """
    Preprocess a Pillow image for ONNX model inference.

    Args:
        image (np.ndarray): Path to the image file.
        size (tuple): Target size for the image (width, height).

    Returns:
        np.ndarray: Preprocessed image batch of shape (1, Channels, Height, Width).
    """
    # Resize the image
    resized_image = cv2.resize(image, size)

    # Convert to NumPy array
    image_array = np.array(resized_image, dtype=np.float32)

    # Normalize pixel values to [0, 1] or other normalization (e.g., mean/std scaling)
    image_array /= 255.0  # Scale to [0, 1]

    # Rearrange dimensions to (Channels, Height, Width) from (Height, Width, Channels)
    image_array = np.transpose(image_array, (2, 0, 1))

    # Add batch dimension to make it (1, Channels, Height, Width)
    image_array = np.expand_dims(image_array, axis=0)

    return image_array

before that I use onnxruntime to run the inferencing, this is the code above that I have convert the cv image to the pillow format, do i need to do it in deepstream?

Yes. We suggest that you first run our sample attached to get familiar with the process.

No. DeepStream has its own decoding part.

There is no update from you for a period, assuming this is not an issue anymore. Hence we are closing this topic. If need further support, please open a new one. Thanks

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.