From f41e8ae7f513ae1088802dd3d90657dc0f98b7a2 Mon Sep 17 00:00:00 2001
From: Roman Arzumanyan <rarzumanyan@nvidia.com>
Date: Tue, 4 Dec 2018 09:26:37 +0300
Subject: [PATCH] Updating headers for Video Codec SDK 9 release

---
 ffnvcodec.pc.in                      |  2 +-
 include/ffnvcodec/dynlink_cuviddec.h | 42 ++++++++++++++++---
 include/ffnvcodec/nvEncodeAPI.h      | 81 +++++++++++++++++++++++++++++++-----
 3 files changed, 107 insertions(+), 18 deletions(-)

diff --git a/ffnvcodec.pc.in b/ffnvcodec.pc.in
index edae59c..80fcfef 100644
--- a/ffnvcodec.pc.in
+++ b/ffnvcodec.pc.in
@@ -3,5 +3,5 @@ includedir=${prefix}/include
 
 Name: ffnvcodec
 Description: FFmpeg version of Nvidia Codec SDK headers
-Version: 8.2.15.7
+Version: 9.0.0.0
 Cflags: -I${includedir}
diff --git a/include/ffnvcodec/dynlink_cuviddec.h b/include/ffnvcodec/dynlink_cuviddec.h
index 431c776..9aeb05d 100644
--- a/include/ffnvcodec/dynlink_cuviddec.h
+++ b/include/ffnvcodec/dynlink_cuviddec.h
@@ -41,8 +41,11 @@
 #endif
 #endif
 
-#define NVDECAPI_MAJOR_VERSION 8
-#define NVDECAPI_MINOR_VERSION 1
+#define NVDECAPI_MAJOR_VERSION 9
+#define NVDECAPI_MINOR_VERSION 0
+
+#define NVDECAPI_CHECK_VERSION(major, minor) \
+    ((major) < NVDECAPI_MAJOR_VERSION || ((major) == NVDECAPI_MAJOR_VERSION && (minor) <= NVDECAPI_MINOR_VERSION))
 
 #define NVDECAPI_VERSION (NVDECAPI_MAJOR_VERSION | (NVDECAPI_MINOR_VERSION << 24))
 
@@ -91,9 +94,12 @@ typedef enum cudaVideoCodec_enum {
 //! These enums are used in CUVIDDECODECREATEINFO structure
 /*********************************************************************************/
 typedef enum cudaVideoSurfaceFormat_enum {
-    cudaVideoSurfaceFormat_NV12=0,       /**< Semi-Planar YUV [Y plane followed by interleaved UV plane]     */
-    cudaVideoSurfaceFormat_P016=1        /**< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane].
-                                              Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0)      */
+    cudaVideoSurfaceFormat_NV12=0,          /**< Semi-Planar YUV [Y plane followed by interleaved UV plane]     */
+    cudaVideoSurfaceFormat_P016=1,          /**< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane].
+                                                 Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0)      */
+    cudaVideoSurfaceFormat_YUV444=2,        /**< Planar YUV [Y plane followed by U and V planes]                */
+    cudaVideoSurfaceFormat_YUV444_16Bit=3,  /**< 16 bit Planar YUV [Y plane followed by U and V planes]. 
+                                                 Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0)      */
 } cudaVideoSurfaceFormat;
 
 /******************************************************************************************************************/
@@ -567,7 +573,31 @@ typedef struct _CUVIDHEVCPICPARAMS
 
     unsigned short column_width_minus1[21];
     unsigned short row_height_minus1[21];
-    unsigned int   reserved3[15];
+
+    // sps and pps extension HEVC-main 444
+    unsigned char sps_range_extension_flag;
+    unsigned char transform_skip_rotation_enabled_flag;
+    unsigned char transform_skip_context_enabled_flag;
+    unsigned char implicit_rdpcm_enabled_flag;
+
+    unsigned char explicit_rdpcm_enabled_flag;
+    unsigned char extended_precision_processing_flag;
+    unsigned char intra_smoothing_disabled_flag;
+    unsigned char persistent_rice_adaptation_enabled_flag;
+
+    unsigned char cabac_bypass_alignment_enabled_flag;
+    unsigned char pps_range_extension_flag;
+    unsigned char cross_component_prediction_enabled_flag;
+    unsigned char chroma_qp_offset_list_enabled_flag;
+
+    unsigned char diff_cu_chroma_qp_offset_depth;
+    unsigned char chroma_qp_offset_list_len_minus1;
+    signed char cb_qp_offset_list[6];
+
+    signed char cr_qp_offset_list[6];
+    unsigned char reserved2[2];
+
+    unsigned int   reserved3[8];
 
     // RefPicSets
     int NumBitsForShortTermRPSInSlice;
diff --git a/include/ffnvcodec/nvEncodeAPI.h b/include/ffnvcodec/nvEncodeAPI.h
index 3ba9368..4a6ec14 100644
--- a/include/ffnvcodec/nvEncodeAPI.h
+++ b/include/ffnvcodec/nvEncodeAPI.h
@@ -113,8 +113,8 @@ typedef void* NV_ENC_INPUT_PTR;             /**< NVENCODE API input buffer
 typedef void* NV_ENC_OUTPUT_PTR;            /**< NVENCODE API output buffer*/
 typedef void* NV_ENC_REGISTERED_PTR;        /**< A Resource that has been registered with NVENCODE API*/
 
-#define NVENCAPI_MAJOR_VERSION 8
-#define NVENCAPI_MINOR_VERSION 1
+#define NVENCAPI_MAJOR_VERSION 9
+#define NVENCAPI_MINOR_VERSION 0
 
 #define NVENCAPI_VERSION (NVENCAPI_MAJOR_VERSION | (NVENCAPI_MINOR_VERSION << 24))
 
@@ -314,6 +314,7 @@ typedef enum _NV_ENC_PIC_TYPE
     NV_ENC_PIC_TYPE_BI              = 0x04,    /**< Bi-directionally predicted with only Intra MBs */
     NV_ENC_PIC_TYPE_SKIPPED         = 0x05,    /**< Picture is skipped */
     NV_ENC_PIC_TYPE_INTRA_REFRESH   = 0x06,    /**< First picture in intra refresh cycle */
+    NV_ENC_PIC_TYPE_NONREF_P        = 0x07,    /**< Non reference P picture */            
     NV_ENC_PIC_TYPE_UNKNOWN         = 0xFF     /**< Picture type unknown */
 } NV_ENC_PIC_TYPE;
 
@@ -362,6 +363,10 @@ typedef enum _NV_ENC_BUFFER_FORMAT
                                                                              where a pixel is represented by a 32-bit word with R
                                                                              in the lowest 10 bits, G in the next 10 bits, B in the
                                                                              10 bits after that and A in the highest 2 bits. */
+    NV_ENC_BUFFER_FORMAT_U8                              = 0x40000000,  /**< Buffer format representing one-dimensional buffer. 
+                                                                             This format should be used only when registering the 
+                                                                             resource as output buffer, which will be used to write
+                                                                             the encoded bit stream or H.264 ME only mode output */
 } NV_ENC_BUFFER_FORMAT;
 
 #define NV_ENC_BUFFER_FORMAT_NV12_PL NV_ENC_BUFFER_FORMAT_NV12
@@ -681,10 +686,23 @@ typedef enum _NV_ENC_INPUT_RESOURCE_TYPE
 {
     NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX          = 0x0,   /**< input resource type is a directx9 surface*/
     NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR    = 0x1,   /**< input resource type is a cuda device pointer surface*/
-    NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY        = 0x2,   /**< input resource type is a cuda array surface */
+    NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY        = 0x2,   /**< input resource type is a cuda array surface.
+                                                              This array must be a 2D array and the CUDA_ARRAY3D_SURFACE_LDST
+                                                              flag must have been specified when creating it. */
     NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX       = 0x3    /**< input resource type is an OpenGL texture */
 } NV_ENC_INPUT_RESOURCE_TYPE;
 
+/**
+ *  Buffer usage
+ */
+typedef enum _NV_ENC_BUFFER_USAGE
+{
+    NV_ENC_INPUT_IMAGE              = 0x0,          /**< Registered surface will be used for input image */
+    NV_ENC_OUTPUT_MOTION_VECTOR     = 0x1,          /**< Registered surface will be used for output of H.264 MEOnly mode. 
+                                                         This buffer usage type is not supported for HEVC MEonly mode. */
+    NV_ENC_OUTPUT_BITSTREAM         = 0x2           /**< Registered surface will be used for output bitstream in encoding */
+} NV_ENC_BUFFER_USAGE;
+
 /**
  *  Encoder Device type
  */
@@ -1047,6 +1065,19 @@ typedef struct _NV_ENC_CAPS_PARAM
 #define NV_ENC_CAPS_PARAM_VER NVENCAPI_STRUCT_VERSION(1)
 
 
+/**
+ * Encoder Output parameters
+ */
+typedef struct _NV_ENC_ENCODE_OUT_PARAMS
+{
+    uint32_t                  version;                 /**< [out]: Struct version. */
+    uint32_t                  bitstreamSizeInBytes;    /**< [out]: Encoded bitstream size in bytes */
+    uint32_t                  reserved[62];            /**< [out]: Reserved and must be set to 0 */
+} NV_ENC_ENCODE_OUT_PARAMS;
+
+/** NV_ENC_ENCODE_OUT_PARAMS struct version. */
+#define NV_ENC_ENCODE_OUT_PARAMS_VER NVENCAPI_STRUCT_VERSION(1) 
+
 /**
  * Creation parameters for input buffer.
  */
@@ -1390,7 +1421,8 @@ typedef struct _NV_ENC_CONFIG_HEVC
                                                                                Set to 1 to use "LTR Trust" mode of LTR operation. Clients are discouraged to use "LTR Trust" mode as this mode may
                                                                                be deprecated in future releases.
                                                                                Set to 0 when using "LTR Per Picture" mode of LTR operation. */
-    uint32_t                            reserved1[217];             /**< [in]: Reserved and must be set to 0.*/
+    NV_ENC_BFRAME_REF_MODE              useBFramesAsRef;            /**< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using  ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps.*/
+    uint32_t                            reserved1[216];             /**< [in]: Reserved and must be set to 0.*/
     void*    reserved2[64];                                         /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_CONFIG_HEVC;
 
@@ -1486,7 +1518,8 @@ typedef struct _NV_ENC_INITIALIZE_PARAMS
                                                                                            NV_ENC_PIC_PARAMS::meHintRefPicDist should preferably be set with enablePTD=1. */
     uint32_t                                   enableMEOnlyMode          :1;    /**< [in]: Set to 1 to enable ME Only Mode .*/
     uint32_t                                   enableWeightedPrediction  :1;    /**< [in]: Set this to 1 to enable weighted prediction. Not supported if encode session is configured for B-Frames( 'frameIntervalP' in NV_ENC_CONFIG is greater than 1).*/
-    uint32_t                                   reservedBitFields         :27;   /**< [in]: Reserved bitfields and must be set to 0 */
+    uint32_t                                   enableOutputInVidmem      :1;    /**< [in]: Set this to 1 to enable output of NVENC in video memory created by application. This feature is not supported for HEVC MEonly mode. */
+    uint32_t                                   reservedBitFields         :26;   /**< [in]: Reserved bitfields and must be set to 0 */
     uint32_t                                   privDataSize;                    /**< [in]: Reserved private data buffer size and must be set to 0 */
     void*                                      privData;                        /**< [in]: Reserved private data buffer and must be set to NULL */
     NV_ENC_CONFIG*                             encodeConfig;                    /**< [in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters.
@@ -1677,7 +1710,14 @@ typedef struct _NV_ENC_PIC_PARAMS
     uint64_t                                    inputTimeStamp;                 /**< [in]: Specifies presentation timestamp associated with the input picture. */
     uint64_t                                    inputDuration;                  /**< [in]: Specifies duration of the input picture */
     NV_ENC_INPUT_PTR                            inputBuffer;                    /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.*/
-    NV_ENC_OUTPUT_PTR                           outputBitstream;                /**< [in]: Specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API. */
+    NV_ENC_OUTPUT_PTR                           outputBitstream;                /**< [in]: Specifies the output buffer pointer. 
+                                                                                           If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API. 
+                                                                                           If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for NV_ENC_ENCODE_OUT_PARAMS struct and encoded bitstream data. Client 
+                                                                                           should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this output buffer and assign it to NV_ENC_PIC_PARAMS::outputBitstream. 
+                                                                                           First 256 bytes of this buffer should be interpreted as NV_ENC_ENCODE_OUT_PARAMS struct followed by encoded bitstream data. Recommended size for output buffer is sum of size of 
+                                                                                           NV_ENC_ENCODE_OUT_PARAMS struct and twice the input frame size for lower resolution eg. CIF and 1.5 times the input frame size for higher resolutions. If encoded bitstream size is 
+                                                                                           greater than the allocated buffer size for encoded bitstream, then the output buffer will have encoded bitstream data equal to buffer size. All CUDA operations on this buffer must use 
+                                                                                           the default stream. */
     void*                                       completionEvent;                /**< [in]: Specifies an event to be signalled on completion of encoding of this Frame [only if operating in Asynchronous mode]. Each output buffer should be associated with a distinct event pointer. */
     NV_ENC_BUFFER_FORMAT                        bufferFmt;                      /**< [in]: Specifies the input buffer format. */
     NV_ENC_PIC_STRUCT                           pictureStruct;                  /**< [in]: Specifies structure of the input picture. */
@@ -1719,7 +1759,12 @@ typedef struct _NV_ENC_MEONLY_PARAMS
     uint32_t                inputHeight;                        /**< [in]: Specifies the input buffer height */
     NV_ENC_INPUT_PTR        inputBuffer;                        /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from NvEncCreateInputBuffer() or NvEncMapInputResource() APIs. */
     NV_ENC_INPUT_PTR        referenceFrame;                     /**< [in]: Specifies the reference frame pointer */
-    NV_ENC_OUTPUT_PTR       mvBuffer;                           /**< [in]: Specifies the pointer to motion vector data buffer allocated by NvEncCreateMVBuffer. Client must lock mvBuffer using ::NvEncLockBitstream() API to get the motion vector data. */
+    NV_ENC_OUTPUT_PTR       mvBuffer;                           /**< [in]: Specifies the output buffer pointer.
+                                                                           If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to motion vector data buffer allocated by NvEncCreateMVBuffer. 
+                                                                           Client must lock mvBuffer using ::NvEncLockBitstream() API to get the motion vector data. 
+                                                                           If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for storing the motion vector data. The size of this buffer must 
+                                                                           be equal to total number of macroblocks multiplied by size of NV_ENC_H264_MV_DATA struct. Client should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this 
+                                                                           output buffer and assign it to NV_ENC_MEONLY_PARAMS::mvBuffer. All CUDA operations on this buffer must use the default stream. */
     NV_ENC_BUFFER_FORMAT    bufferFmt;                          /**< [in]: Specifies the input buffer format. */
     void*                   completionEvent;                    /**< [in]: Specifies an event to be signalled on completion of motion estimation
                                                                            of this Frame [only if operating in Asynchronous mode].
@@ -1839,12 +1884,24 @@ typedef struct _NV_ENC_REGISTER_RESOURCE
                                                                            ::NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX */
     uint32_t                    width;                          /**< [in]: Input buffer Width. */
     uint32_t                    height;                         /**< [in]: Input buffer Height. */
-    uint32_t                    pitch;                          /**< [in]: Input buffer Pitch.  */
+    uint32_t                    pitch;                          /**< [in]: Input buffer Pitch.
+                                                                           For ::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX resources, set this to 0.
+                                                                           For ::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR resources, set this to
+                                                                             the pitch as obtained from cuMemAllocPitch(), or to the width in
+                                                                             bytes (if this resource was created by using cuMemAlloc()). This
+                                                                             value must be a multiple of 4.
+                                                                           For ::NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY resources, set this to the
+                                                                             width of the allocation in bytes (i.e.
+                                                                             CUDA_ARRAY3D_DESCRIPTOR::Width * CUDA_ARRAY3D_DESCRIPTOR::NumChannels).
+                                                                           For ::NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX resources, set this to the
+                                                                             texture width multiplied by the number of components in the texture
+                                                                             format. */
     uint32_t                    subResourceIndex;               /**< [in]: Subresource Index of the DirectX resource to be registered. Should be set to 0 for other interfaces. */
     void*                       resourceToRegister;             /**< [in]: Handle to the resource that is being registered. */
     NV_ENC_REGISTERED_PTR       registeredResource;             /**< [out]: Registered resource handle. This should be used in future interactions with the Nvidia Video Encoder Interface. */
     NV_ENC_BUFFER_FORMAT        bufferFormat;                   /**< [in]: Buffer format of resource to be registered. */
-    uint32_t                    reserved1[248];                 /**< [in]: Reserved and must be set to 0. */
+    NV_ENC_BUFFER_USAGE         bufferUsage;                    /**< [in]: Usage of resource to be registered. */
+    uint32_t                    reserved1[247];                 /**< [in]: Reserved and must be set to 0. */
     void*                       reserved2[62];                  /**< [in]: Reserved and must be set to NULL. */
 } NV_ENC_REGISTER_RESOURCE;
 
@@ -2936,8 +2993,10 @@ NVENCSTATUS NVENCAPI NvEncUnregisterAsyncEvent                  (void* encoder,
  * mapped resource is returned in the field NV_ENC_MAP_INPUT_RESOURCE::outputResourcePtr.
  * The NvEncodeAPI interface also returns the buffer format of the mapped resource
  * in the field NV_ENC_MAP_INPUT_RESOURCE::outbufferFmt.
- * This function provides synchronization guarantee that any graphics or compute
- * work submitted on the input buffer is completed before the buffer is used for encoding.
+ * This function provides synchronization guarantee that any graphics work submitted
+ * on the input buffer is completed before the buffer is used for encoding. This is
+ * also true for compute (i.e. CUDA) work, provided that the previous workload using
+ * the input resource was submitted to the default stream.
  * The client should not access any input buffer while they are mapped by the encoder.
  *
  * \param [in] encoder
-- 
2.15.1.windows.2

