This commit is contained in:
Sorgelig
2021-11-07 20:12:39 +08:00
parent b6f2ca1c4d
commit aba1ef4c11
10610 changed files with 600603 additions and 270114 deletions

View File

@@ -276,7 +276,17 @@ enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET,
HL_DEVICE_STATUS_MALFUNCTION,
HL_DEVICE_STATUS_NEEDS_RESET
HL_DEVICE_STATUS_NEEDS_RESET,
HL_DEVICE_STATUS_IN_DEVICE_CREATION,
HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
};
enum hl_server_type {
HL_SERVER_TYPE_UNKNOWN = 0,
HL_SERVER_GAUDI_HLS1 = 1,
HL_SERVER_GAUDI_HLS1H = 2,
HL_SERVER_GAUDI_TYPE1 = 3,
HL_SERVER_GAUDI_TYPE2 = 4
};
/* Opcode for management ioctl
@@ -337,17 +347,49 @@ enum hl_device_status {
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
/**
* struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
* @sram_base_address: The first SRAM physical base address that is free to be
* used by the user.
* @dram_base_address: The first DRAM virtual or physical base address that is
* free to be used by the user.
* @dram_size: The DRAM size that is available to the user.
* @sram_size: The SRAM size that is available to the user.
* @num_of_events: The number of events that can be received from the f/w. This
* is needed so the user can what is the size of the h/w events
* array he needs to pass to the kernel when he wants to fetch
* the event counters.
* @device_id: PCI device ID of the ASIC.
* @module_id: Module ID of the ASIC for mezzanine cards in servers
* (From OCP spec).
* @first_available_interrupt_id: The first available interrupt ID for the user
* to be used when it works with user interrupts.
* @server_type: Server type that the Gaudi ASIC is currently installed in.
* The value is according to enum hl_server_type
* @cpld_version: CPLD version on the board.
* @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
* @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
* @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
* @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
* in some ASICs.
* @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
* for Goya/Gaudi only.
* @dram_enabled: Whether the DRAM is enabled.
* @cpucp_version: The CPUCP f/w version.
* @card_name: The card name as passed by the f/w.
* @dram_page_size: The DRAM physical page size.
*/
struct hl_info_hw_ip_info {
__u64 sram_base_address;
__u64 dram_base_address;
__u64 dram_size;
__u32 sram_size;
__u32 num_of_events;
__u32 device_id; /* PCI Device ID */
__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
__u32 device_id;
__u32 module_id;
__u32 reserved;
__u16 first_available_interrupt_id;
__u16 reserved2;
__u16 server_type;
__u32 cpld_version;
__u32 psoc_pci_pll_nr;
__u32 psoc_pci_pll_nf;
@@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
__u8 pad[2];
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
__u64 reserved3;
__u64 reserved2;
__u64 dram_page_size;
};
@@ -628,12 +670,21 @@ struct hl_cs_chunk {
__u64 cb_handle;
/* Relevant only when HL_CS_FLAGS_WAIT or
* HL_CS_FLAGS_COLLECTIVE_WAIT is set.
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
* This holds address of array of u64 values that contain
* signal CS sequence numbers. The wait described by this job
* will listen on all those signals (wait event per signal)
* signal CS sequence numbers. The wait described by
* this job will listen on all those signals
* (wait event per signal)
*/
__u64 signal_seq_arr;
/*
* Relevant only when HL_CS_FLAGS_WAIT or
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
* along with HL_CS_FLAGS_ENCAP_SIGNALS.
* This is the CS sequence which has the encapsulated signals.
*/
__u64 encaps_signal_seq;
};
/* Index of queue to put the CB on */
@@ -651,6 +702,17 @@ struct hl_cs_chunk {
* Number of entries in signal_seq_arr
*/
__u32 num_signal_seq_arr;
/* Relevant only when HL_CS_FLAGS_WAIT or
* HL_CS_FLAGS_COLLECTIVE_WAIT is set along
* with HL_CS_FLAGS_ENCAP_SIGNALS
* This set the signals range that the user want to wait for
* out of the whole reserved signals range.
* e.g if the signals range is 20, and user don't want
* to wait for signal 8, so he set this offset to 7, then
* he call the API again with 9 and so on till 20.
*/
__u32 encaps_signal_offset;
};
/* HL_CS_CHUNK_FLAGS_* */
@@ -678,6 +740,28 @@ struct hl_cs_chunk {
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
/*
* The encapsulated signals CS is merged into the existing CS ioctls.
* In order to use this feature need to follow the below procedure:
* 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
* the output of this API will be the SOB offset from CFG_BASE.
* this address will be used to patch CB cmds to do the signaling for this
* SOB by incrementing it's value.
* for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
* CS type, note that this might fail if out-of-sync happened to the SOB
* value, in case other signaling request to the same SOB occurred between
* reserve-unreserve calls.
* 2. Use the staged CS to do the encapsulated signaling jobs.
* use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
* along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
* field. This offset allows app to wait on part of the reserved signals.
* 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
* to wait for the encapsulated signals.
*/
#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800
#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
#define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
@@ -690,10 +774,35 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
/* Sequence number of a staged submission CS
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
*/
__u64 seq;
union {
/*
* Sequence number of a staged submission CS
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
*/
__u64 seq;
/*
* Encapsulated signals handle id
* Valid for two flows:
* 1. CS with encapsulated signals:
* when HL_CS_FLAGS_STAGED_SUBMISSION and
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
* and HL_CS_FLAGS_ENCAP_SIGNALS are set.
* 2. unreserve signals:
* valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
*/
__u32 encaps_sig_handle_id;
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
struct {
/* Encapsulated signals number */
__u32 encaps_signals_count;
/* Encapsulated signals queue index (stream) */
__u32 encaps_signals_q_idx;
};
};
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
@@ -718,14 +827,31 @@ struct hl_cs_in {
};
struct hl_cs_out {
/*
* seq holds the sequence number of the CS to pass to wait ioctl. All
* values are valid except for 0 and ULLONG_MAX
*/
__u64 seq;
/* HL_CS_STATUS_* */
union {
/*
* seq holds the sequence number of the CS to pass to wait
* ioctl. All values are valid except for 0 and ULLONG_MAX
*/
__u64 seq;
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
struct {
/* This is the resereved signal handle id */
__u32 handle_id;
/* This is the signals count */
__u32 count;
};
};
/* HL_CS_STATUS */
__u32 status;
__u32 pad;
/*
* SOB base address offset
* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
*/
__u32 sob_base_addr_offset;
};
union hl_cs_args {
@@ -735,11 +861,18 @@ union hl_cs_args {
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
struct hl_wait_cs_in {
union {
struct {
/* Command submission sequence number */
/*
* In case of wait_cs holds the CS sequence number.
* In case of wait for multi CS hold a user pointer to
* an array of CS sequence numbers
*/
__u64 seq;
/* Absolute timeout to wait for command submission
* in microseconds
@@ -767,19 +900,23 @@ struct hl_wait_cs_in {
/* Context ID - Currently not in use */
__u32 ctx_id;
/* HL_WAIT_CS_FLAGS_*
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
* not to specify an interrupt id ,set mask to all 1s.
*/
__u32 flags;
/* Multi CS API info- valid entries in multi-CS array */
__u8 seq_arr_len;
__u8 pad[7];
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
#define HL_WAIT_CS_STATUS_BUSY 1
#define HL_WAIT_CS_STATUS_TIMEDOUT 2
#define HL_WAIT_CS_STATUS_ABORTED 3
#define HL_WAIT_CS_STATUS_INTERRUPTED 4
#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1
#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2
@@ -789,8 +926,15 @@ struct hl_wait_cs_out {
__u32 status;
/* HL_WAIT_CS_STATUS_FLAG* */
__u32 flags;
/* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
/*
* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
* for wait_cs: timestamp of CS completion
* for wait_multi_cs: timestamp of FIRST CS completion
*/
__s64 timestamp_nsec;
/* multi CS completion bitmap */
__u32 cs_completion_map;
__u32 pad;
};
union hl_wait_cs_args {
@@ -813,6 +957,7 @@ union hl_wait_cs_args {
#define HL_MEM_CONTIGUOUS 0x1
#define HL_MEM_SHARED 0x2
#define HL_MEM_USERPTR 0x4
#define HL_MEM_FORCE_HINT 0x8
struct hl_mem_in {
union {
@@ -1140,7 +1285,8 @@ struct hl_debug_args {
* EIO - The CS was aborted (usually because the device was reset)
* ENODEV - The device wants to do hard-reset (so user need to close FD)
*
* The driver also returns a custom define inside the IOCTL which can be:
* The driver also returns a custom define in case the IOCTL call returned 0.
* The define can be one of the following:
*
* HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0)
* HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0)
@@ -1148,8 +1294,6 @@ struct hl_debug_args {
* (ETIMEDOUT)
* HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the
* device was reset (EIO)
* HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR)
*
*/
#define HL_IOCTL_WAIT_CS \