o
    ؾg                  
   @   sz  d dl mZ d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d
dlmZ d dlmZ d dlmZmZmZ d dlm Z m!Z! e
 Z"e"j#Z#e"j$Z%e"j&Z&dd Z'e%e(eddd Z)e%e(eddd Z*e%e(eddd Z+e%e(eddd Z,e%e(eddd Z-e%e d d!d" Z.e%e d#d$d% Z/e%e d&d'd( Z0e#ej1j2ej3d)d* Z4d a5d+d, Z6e#ej7j8ej9ej:d-d. Z;e#ej7j8ej<ej:e#ej7j8ej=ej:d/d0 Z>e#ej?j8ej9ej:d1d2 Z@e#ej?j8ej<ej:e#ej?j8ej=ej:d3d4 ZAe#ejBd5d6 ZCe#ejDd7d8 ZEe#ejFd9d: ZGe#ejHd;d< ZIe#ejHejJd=d> ZKe#ejLejJejJejJejJejJe#ejLejJejJejMejJejJe#ejLejJejJejNejJejJe#ejLejJejJejOejJejJd?d@ ZPe#ejQejJejJejRdAdB ZSe#ejTejJejJe#ejTejJejMe#ejTejJejNe#ejTejJejOdCdD ZUe#ejVejJejJe#ejVejJejMe#ejVejJejNe#ejVejJejOdEdF ZWe#ejXdGdH ZYe#ejZdIdJ Z[e#ej\ej:dKdL Z]e#ej^ej:ej:ej:dMdN Z_dOdP Z`eejaejbdQdR ZceejbejadSdT ZddUdV ZeeejaejfdWdX Zgeejfejaeej9ejadYdZ Zhd[d\ Zieiejjjkd] eiejld] eiejmd] eiejjjnd^ eiejod^ eiejpd^ eiejjjqd_ eiejrd_ eiejsd_ e#ejjjtejad`da Zue#ejvejadbdc Zwe#ejjjxejaddde Zye#ezejadfdg Z{e#ejjj|ejaejaejadhdi Z}e#ej~ejaejae#ejejaejadjdk ZdlZdmdn Ze#ejjjejaejaedo e#ejejaejaedo e#ejjjejaejaedp e#ejejaejaedp e#ejjjejaejaedq e#ejejaejaedq e#ejjjejaejaedr e#ejejaejaedr e#ejjjejaejaeds e#ejejaejaeds e#ejjjejaejaedt e#ejejaejaedt dudv Zeejjjdwdr eejjjdxdt ejdyejdziZe#ejeje#ejejd{d| Ze#ejejd}d~ Ze#ejejdd Ze#ejej:dd Ze#ejejJe#ejejdd Ze#ejejMe#ejejdd Ze#ejej:ej:ej:dd Ze#eejNejNdd Ze#eejOejNe#eejNejOe#eejOejOdd Ze#eejNejNdd Ze#eejOejNe#eejNejOe#eejOejOdd Ze#eejNe#eejOdd Ze#eejNejfe#eejOejfdd Zdd Zejd Zdej Ze#ejejNee e#ejejOee e#ejejNee e#ejejOee dd Zdd Ze#ejjlej3ejej:e#ejjlej3ej=ej:e#ejjlej3ej<ej:edd Ze#ejjoej3ejej:e#ejjoej3ej=ej:e#ejjoej3ej<ej:edd Ze#ejjej3ejej:e#ejjej3ej=ej:e#ejjej3ej<ej:edd Ze#ejjej3ejej:e#ejjej3ej=ej:e#ejjej3ej<ej:edd Zdd Zeejjd eejjd eejjd e#ejjej3ejej:e#ejjej3ej=ej:e#ejjej3ej<ej:edd Ze#ejjej3ejej:e#ejjej3ej<ej:e#ejjej3ej=ej:edd Ze#ejjej3ejej:e#ejjej3ej<ej:e#ejjej3ej=ej:edd Ze#ejjej3ejej:e#ejjej3ej<ej:e#ejjej3ej=ej:edd Ze#ejjej3ejej:e#ejjej3ej<ej:e#ejjej3ej=ej:edd Ze#ejjej3ej:ej:dd Ze#ejjej3ejej:ej:e#ejjej3ej<ej:ej:e#ejjej3ej=ej:ej:dd Ze#ejej̓dd Z	dddZe&e!dd Zeeѡ e# dS )    )reduceN)ir)Registry
lower_cast)parse_dtype)models)typescgutils)ufunc_db)register_ufuncs   )nvvm)cuda)	nvvmutilsstubserrors)dim3CUDADispatcherc                 C   sB   t | d| }t | d| }t | d| }t| |||fS )Nz%s.xz%s.yz%s.z)r   	call_sregr	   pack_struct)builderprefixxyz r   ]/var/www/html/backend_erp/backend_erp_env/lib/python3.10/site-packages/numba/cuda/cudaimpl.pyinitialize_dim3   s   r   	threadIdxc                 C   
   t |dS )Ntidr   contextr   sigargsr   r   r   cuda_threadIdx       
r&   blockDimc                 C   r   )Nntidr!   r"   r   r   r   cuda_blockDim%   r'   r*   blockIdxc                 C   r   )Nctaidr!   r"   r   r   r   cuda_blockIdx*   r'   r-   gridDimc                 C   r   )Nnctaidr!   r"   r   r   r   cuda_gridDim/   r'   r0   laneidc                 C   s   t |dS )Nr1   )r   r   r"   r   r   r   cuda_laneid4      r2   r   c                 C      | |dS Nr   extract_valuer"   r   r   r   dim3_x9   r3   r8   r   c                 C   r4   )Nr   r6   r"   r   r   r   dim3_y>   r3   r9   r   c                 C   r4   )N   r6   r"   r   r   r   dim3_zC   r3   r;   c                 C   s   |d S r5   r   r"   r   r   r   cuda_const_array_likeJ   s   r<   c                 C   s   t d7 a d| t S )zDue to bug with NVVM invalid internalizing of shared memory in the
    PTX output.  We can't mark shared memory to be internal. We have to
    ensure unique name is generated for shared memory symbol.
    r   z{0}_{1})_unique_smem_idformatnamer   r   r   _get_unique_smem_idT   s   rA   c              	   C   s8   |j d j}t|j d }t| ||f|tdtjddS )Nr   r   _cudapy_smemTshapedtypesymbol_name	addrspacecan_dynsized)r%   literal_valuer   _generic_arrayrA   r   ADDRSPACE_SHAREDr#   r   r$   r%   lengthrE   r   r   r   cuda_shared_array_integer^   s   rN   c              	   C   s>   dd |j d D }t|j d }t| |||tdtjddS )Nc                 S      g | ]}|j qS r   rI   .0sr   r   r   
<listcomp>k       z+cuda_shared_array_tuple.<locals>.<listcomp>r   r   rB   TrC   )r%   r   rJ   rA   r   rK   r#   r   r$   r%   rD   rE   r   r   r   cuda_shared_array_tupleh   s   
rW   c              	   C   s4   |j d j}t|j d }t| ||f|dtjddS )Nr   r   _cudapy_lmemFrC   )r%   rI   r   rJ   r   ADDRSPACE_LOCALrL   r   r   r   cuda_local_array_integers   s   rZ   c              	   C   s:   dd |j d D }t|j d }t| |||dtjddS )Nc                 S   rO   r   rP   rQ   r   r   r   rT      rU   z(ptx_lmem_alloc_array.<locals>.<listcomp>r   r   rX   FrC   )r%   r   rJ   r   rY   rV   r   r   r   ptx_lmem_alloc_array}   s   
r[   c                 C   D   |rJ d}|j }tt d}t|||}||d |  S )Nzllvm.nvvm.membar.ctar   moduler   FunctionTypeVoidTyper	   get_or_insert_functioncallget_dummy_valuer#   r   r$   r%   fnamelmodfntysyncr   r   r   ptx_threadfence_block      ri   c                 C   r\   )Nzllvm.nvvm.membar.sysr   r]   rd   r   r   r   ptx_threadfence_system   rj   rk   c                 C   r\   )Nzllvm.nvvm.membar.glr   r]   rd   r   r   r   ptx_threadfence_device   rj   rl   c                 C   s*   |  tjd}ttj}t| |||gS )Nl    )get_constantr   int32noneptx_syncwarp_mask)r#   r   r$   r%   maskmask_sigr   r   r   ptx_syncwarp   s   rs   c                 C   sD   d}|j }tt tdf}t|||}||| |  S )Nzllvm.nvvm.bar.warp.sync    )	r^   r   r_   r`   IntTyper	   ra   rb   rc   rd   r   r   r   rp      s   rp   c              
   C   s  |\}}}}}|j d }	|	tjv r||t|	j}d}
|j}tt	tdtdftdtdtdtdtdf}t
|||
}|	jdkr|||||||f}|	tjkr}||d}||d}||t }t
|||f}|S ||td}||| tjd}||td}|||||||f}|||||||f}||d}||d}||d}||td}||td}||| tjd}|||}|	tjkr||t }t
|||f}|S )a  
    The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic
    function supports both 32 and 64 bit ints and floats, so for feature parity,
    i64, f32, and f64 are implemented. Floats by way of bitcasting the float to
    an int, then shuffling, then bitcasting back. And 64-bit values by packing
    them into 2 32bit values, shuffling thoose, and then packing back together.
    r:   zllvm.nvvm.shfl.sync.i32rt   r   r   @   )r%   r   real_domainbitcastr   ru   bitwidthr^   r_   LiteralStructTyper	   ra   rb   float32r7   	FloatTypemake_anonymous_structtrunclshrrm   i8zextshlor_float64
DoubleType)r#   r   r$   r%   rq   modevalueindexclamp
value_typere   rf   rg   funcretrvpredfvvalue1
value_lshrvalue2ret1ret2rv1rv2rv1_64rv2_64rv_shlr   r   r   ptx_shfl_sync_i32   sJ   




r   c                 C   s^   d}|j }tttdtdftdtdtdf}t|||}|||S )Nzllvm.nvvm.vote.syncrt   r   )r^   r   r_   rz   ru   r	   ra   rb   )r#   r   r$   r%   re   rf   rg   r   r   r   r   ptx_vote_sync   s   r   c                 C   s   |\}}|j d j}|j d tjv r||t|}d|}|j}t	tdtdt|f}	t
||	|}
||
||fS )Nr   zllvm.nvvm.match.any.sync.i{}rt   )r%   ry   r   rw   rx   r   ru   r>   r^   r_   r	   ra   rb   r#   r   r$   r%   rq   r   widthre   rf   rg   r   r   r   r   ptx_match_any_sync   s   
"r   c                 C   s   |\}}|j d j}|j d tjv r||t|}d|}|j}t	t
tdtdftdt|f}	t||	|}
||
||fS )Nr   zllvm.nvvm.match.all.sync.i{}rt   )r%   ry   r   rw   rx   r   ru   r>   r^   r_   rz   r	   ra   rb   r   r   r   r   ptx_match_all_sync  s   
r   c                 C   ,   t jt t dg dddd}||g S )Nrt   zactivemask.b32 $0;=rTside_effectr   	InlineAsmr_   ru   rb   r#   r   r$   r%   
activemaskr   r   r   ptx_activemask  s   r   c                 C   r   )Nrt   zmov.u32 $0, %lanemask_lt;r   Tr   r   r   r   r   r   ptx_lanemask_lt$  s
   r   c                 C   s   | |d S r5   )ctpopr"   r   r   r   ptx_popc,     r   c                 C   s
   |j | S N)fmar"   r   r   r   ptx_fma1  r'   r   c                 C   s:   ddd}z||  W S  t y   d|  d}t|w )N)f32f)f64d)rt   rv   z$Conversion between float16 and float unsupportedKeyErrorr   CudaLoweringErrorry   typemapmsgr   r   r   float16_float_ty_constraint6  s   


r   c           	      C   sd   |j |j kr|S t|j \}}t| |tdg}t|d| dd| d}|||gS )N   zcvt..f16 $0, $1;=,h)ry   r   r   r_   get_value_typeru   r   rb   	r#   r   fromtytotyvalty
constraintrg   asmr   r   r   float16_to_float_cast@  s   r   c           	      C   sb   |j |j kr|S t|j \}}ttd| |g}t|d| dd| }|||gS )Nr   cvt.rn.f16. $0, $1;=h,)ry   r   r   r_   ru   r   r   rb   r   r   r   r   float_to_float16_castL  s   r   c                 C   s>   ddddd}z||  W S  t y   d|  d}t|w )Nchrl)   r   rt   rv   z"Conversion between float16 and intr   r   r   r   r   r   float16_int_constraintX  s   

r   c           
      C   sf   |j }t|}|jrdnd}t| |tdg}t|d| | dd| d}	||	|gS )NrS   ur   zcvt.rni.r   r   r   )	ry   r   signedr   r_   r   ru   r   rb   
r#   r   r   r   r   ry   r   
signednessrg   r   r   r   r   float16_to_integer_castb  s   
r   c           
      C   sd   |j }t|}|jrdnd}ttd| |g}t|d| | dd| }	||	|gS )NrS   r   r   r   r   r   )	ry   r   r   r   r_   ru   r   r   rb   r   r   r   r   integer_to_float16_casto  s   
r   c                    s    t | tjtj fdd}d S )Nc                    sB   t t dt dt dg}t |  dd}|||S )Nr   z.f16 $0,$1,$2;=h,h,hr   r_   ru   r   rb   r#   r   r$   r%   rg   r   opr   r   ptx_fp16_binary  s
   z*lower_fp16_binary.<locals>.ptx_fp16_binarylowerr   float16)fnr   r   r   r   r   lower_fp16_binary~     r   addsubmulc                 C   4   t t dt dg}t |dd}|||S )Nr   zneg.f16 $0, $1;=h,hr   r   r   r   r   ptx_fp16_hneg     r   c                 C      t | |||S r   )r   r"   r   r   r   operator_hneg  r   r   c                 C   r   )Nr   zabs.f16 $0, $1;r   r   r   r   r   r   ptx_fp16_habs  r   r   c                 C   r   r   )r   r"   r   r   r   operator_habs  r   r   c                 C   sH   t dt dt dg}t t d|}t |dd}|||S )Nr   zfma.rn.f16 $0,$1,$2,$3;z=h,h,h,h)r   ru   r_   r   rb   )r#   r   r$   r%   argtysrg   r   r   r   r   ptx_hfma  s   r   c                 C      dd }|  ||||S )Nc                 S   s   t j| |S r   )r   fp16hdiv)r   r   r   r   r   fp16_div  s   zfp16_div_impl.<locals>.fp16_divcompile_internal)r#   r   r$   r%   r   r   r   r   fp16_div_impl  s   r   z{{
          .reg .pred __$$f16_cmp_tmp;
          setp.{op}.f16 __$$f16_cmp_tmp, $1, $2;
          selp.u16 $0, 1, 0, __$$f16_cmp_tmp;
        }}c                        fdd}|S )Nc           	         sr   t t dt dt dg}t |tj dd}|||}| tj	d}|
|t d}|d||S )Nr   r   r   r   z!=)r   r_   ru   r   	_fp16_cmpr>   rb   rm   r   int16rx   icmp_unsigned)	r#   r   r$   r%   rg   r   resultzero
int_resultr   r   r   ptx_fp16_comparison  s   "z*_gen_fp16_cmp.<locals>.ptx_fp16_comparisonr   )r   r   r   r   r   _gen_fp16_cmp  s   r  eqnegegtleltc                    s    t | tjtj fdd}d S )Nc                    s(   t  | |||}|||d |d S )Nr   r   )r  select)r#   r   r$   r%   choicer   r   r   ptx_fp16_minmax  s   z*lower_fp16_minmax.<locals>.ptx_fp16_minmaxr   )r   re   r   r
  r   r   r   lower_fp16_minmax  r   r  maxmin
__nv_cbrtf	__nv_cbrtc           
      C   sF   |j }t| }| |}|j}t||g}t|||}	||	|S r   )	return_type
cbrt_funcsr   r^   r   r_   r	   ra   rb   )
r#   r   r$   r%   r   re   ftyrf   rg   r   r   r   r   ptx_cbrt  s   
r  c              	   C   2   t |jttdtdfd}|||S )Nrt   	__nv_brevr	   ra   r^   r   r_   ru   rb   r#   r   r$   r%   r   r   r   r   ptx_brev_u4     r  c              	   C   r  )Nrv   __nv_brevllr  r  r   r   r   ptx_brev_u8	  r  r  c                 C   s   | |d | tjdS r5   )ctlzrm   r   booleanr"   r   r   r   ptx_clz  s   r  c              	   C   r  )Nrt   __nv_ffsr  r  r   r   r   
ptx_ffs_32     r   c              	   C   s2   t |jttdtdfd}|||S )Nrt   rv   
__nv_ffsllr  r  r   r   r   
ptx_ffs_64&  r!  r#  c                 C   s   |\}}}| |||S r   )r  )r#   r   r$   r%   testabr   r   r   ptx_selp0  s   
r'  c              	   C   4   t |jtt t t fd}|||S )N
__nv_fmaxfr	   ra   r^   r   r_   r|   rb   r  r   r   r   
ptx_max_f46     r+  c              
   C   h   t |jtt t t fd}||| ||d |jd t	j
| ||d |jd t	j
gS )N	__nv_fmaxr   r   r	   ra   r^   r   r_   r   rb   castr%   r   doubler  r   r   r   
ptx_max_f8A     r2  c              	   C   r(  )N
__nv_fminfr*  r  r   r   r   
ptx_min_f4R  r,  r5  c              
   C   r-  )N	__nv_fminr   r   r/  r  r   r   r   
ptx_min_f8]  r3  r7  c              	   C   sJ   t |jttdt fd}||| ||d |j	d t
jgS )Nrv   __nv_llrintr   )r	   ra   r^   r   r_   ru   r   rb   r0  r%   r   r1  r  r   r   r   	ptx_roundn  s   r9  c                 C   r   )Nc                 S   s   t | s
t | r| S |dkr1|dkrd|d  }d}nd| }d}| | | }t |r0| S n	d|  }| | }t|}t || dkrOdt|d  }|dkr[|| | }|S ||9 }|S )Nr      g      $@gMDg      ?g      ?g       @)mathisinfisnanroundfabs)r   ndigitspow1pow2r   r   r   r   r   round_ndigits  s,   

z$round_to_impl.<locals>.round_ndigitsr   )r#   r   r$   r%   rC  r   r   r   round_to_impl  s   !rD  c                    r   )Nc                    s$   |j \}| | }|||d S r5   )r%   rm   fmul)r#   r   r$   r%   argtyfactorconstr   r   impl  s   zgen_deg_rad.<locals>.implr   )rI  rJ  r   rH  r   gen_deg_rad  s   rK  g     f@c                    s   |t jv rt j|dd}|g}n
tj |t|d} fddt||D }|j}||kr6td||f |j	t|krHtd|j	t|f ||fS )z4
    Convert integer indices into tuple of intp
    r   )rE   count)rL  c                    s"   g | ]\}}  ||tjqS r   )r0  r   intp)rR   tir   r#   r   r   rT     s    z&_normalize_indices.<locals>.<listcomp>zexpect %s but got %sz#indexing %d-D array with %d-D index)
r   integer_domainUniTupler	   unpack_tuplelenziprE   	TypeErrorndim)r#   r   indtyindsarytyvaltyindicesrE   r   rP  r   _normalize_indices  s   
r]  c                    r   )Nc                    sj   |j \}}}|\}}}	|j}
t| |||||\}}| || ||}tj| ||||dd} | ||
||	S )NT
wraparound)r%   rE   r]  
make_arrayr	   get_item_pointer)r#   r   r$   r%   rZ  rX  r[  aryrY  r   rE   r\  laryptrdispatch_fnr   r   imp  s   

z_atomic_dispatcher.<locals>.impr   )rf  rg  r   re  r   _atomic_dispatcher  s   rh  c                 C   \   |t jkr|j}|t|||fS |t jkr&|j}|t|||fS |d||dS )Nr   	monotonic)	r   r{   r^   rb   r   declare_atomic_add_float32r   declare_atomic_add_float64
atomic_rmwr#   r   rE   rd  r   rf   r   r   r   ptx_atomic_add_tuple     

ro  c                 C   ri  )Nr   rj  )	r   r{   r^   rb   r   declare_atomic_sub_float32r   declare_atomic_sub_float64rm  rn  r   r   r   ptx_atomic_sub  rp  rs  c                 C   L   |t jjv r|j}|j}ttd| }|||||fS td| d)Ndeclare_atomic_inc_intzUnimplemented atomic inc with  array	r   cudadeclunsigned_int_numba_typesry   r^   getattrr   rb   rV  r#   r   rE   rd  r   bwrf   r   r   r   r   ptx_atomic_inc     r}  c                 C   rt  )Ndeclare_atomic_dec_intzUnimplemented atomic dec with rv  rw  r{  r   r   r   ptx_atomic_dec  r~  r  c                    s@   t  fdd}tjtjtjfD ]}t| tj|tj| qd S )Nc                    s2   |t jjv r| ||dS td  d| d)Nrj  zUnimplemented atomic z with rv  r   rx  integer_numba_typesrm  rV  r#   r   rE   rd  r   r   r   r   impl_ptx_atomic  s   z+ptx_atomic_bitwise.<locals>.impl_ptx_atomic)rh  r   rM  rR  Tupler   ArrayAny)stubr   r  r   r   r   r   ptx_atomic_bitwise  s
   r  andorxorc                 C   s,   |t jjv r|d||dS td| d)Nxchgrj  zUnimplemented atomic exch with rv  r  r  r   r   r   ptx_atomic_exch/  s   r  c                 C      |j }|tjkr|t|||fS |tjkr#|t|||fS |tjtj	fv r4|j
d||ddS |tjtjfv rE|j
d||ddS td| Nr  rj  orderingumaxz&Unimplemented atomic max with %s array)r^   r   r   rb   r   declare_atomic_max_float64r{   declare_atomic_max_float32rn   int64rm  uint32uint64rV  rn  r   r   r   ptx_atomic_max:     

r  c                 C   r  Nr  rj  r  uminz&Unimplemented atomic min with %s array)r^   r   r   rb   r   declare_atomic_min_float64r{   declare_atomic_min_float32rn   r  rm  r  r  rV  rn  r   r   r   ptx_atomic_minN  r  r  c                 C   r  r  )r^   r   r   rb   r   declare_atomic_nanmax_float64r{   declare_atomic_nanmax_float32rn   r  rm  r  r  rV  rn  r   r   r   ptx_atomic_nanmaxb  r  r  c                 C   r  r  )r^   r   r   rb   r   declare_atomic_nanmin_float64r{   declare_atomic_nanmin_float32rn   r  rm  r  r  rV  rn  r   r   r   ptx_atomic_nanminv  r  r  c                 C   sT   | |jd tj|jd |jd }|d | tjd|d |d f}t| |||S )Nr   r   r:   )r  r%   r   rM  rm   ptx_atomic_casr"   r   r   r   ptx_atomic_compare_and_swap  s   $"r  c                 C   s   |j \}}}}|\}}	}
}t| |||	||\}}| || ||}tj| ||||dd}|jtjjv rD|j	}|jj
}t|||||
|S td|j )NTr^  z&Unimplemented atomic cas with %s array)r%   r]  r`  r	   ra  rE   r   rx  r  r^   ry   r   atomic_cmpxchgrV  )r#   r   r$   r%   rZ  rX  oldtyr[  rb  rY  oldr   r\  rc  rd  rf   ry   r   r   r   r    s   r  c                 C   s@   t jt t  t dgdddd}|d }|||g d S )Nrt   znanosleep.u32 $0;r   Tr   r   )r   r   r_   r`   ru   rb   )r#   r   r$   r%   	nanosleepnsr   r   r   ptx_nanosleep  s
   r  Fc               	      sb  t tj|d}|dko|ot|dk}|dkr|std j| }	t|tjtj	fp5t|	t
jp5|tjk}
|tjvrC|
sCtd|  |}t||}|tjkr\tj|||d}n4|j}t||||} |}d|d  > |_|r{d|_nt|tj|_||t t!dd}t"#t$ j%} |}|&|}|}g }t't(|D ]\}}|)| ||9 }qd	d
 t(|D } fdd
|D }|rtj*t+t!dg dddd}|,|-|g t!d} .tj/|}|0||g}n	 fdd
|D }t|}tj1||dd} 2| |} j3||4||j5j6|| .tj/|d d |7 S )Nr   r   zarray length <= 0zunsupported type: %sr?   externalr   genericc                 S   s   g | ]}|qS r   r   rQ   r   r   r   rT     s    z"_generic_array.<locals>.<listcomp>c                       g | ]	}  tj|qS r   rm   r   rM  rQ   r#   r   r   rT         rt   zmov.u32 $0, %dynamic_smem_size;r   Tr   rv   c                    r  r   r  rQ   r  r   r   rT     r  C)rE   rW  layout)datarD   stridesitemsizememinfo)8r   operatorr   rT  
ValueErrordata_model_manager
isinstancer   RecordBooleanr   StructModelr   number_domainrV  get_data_typer   	ArrayTyper   rY   r	   alloca_oncer^   add_global_variableget_abi_sizeof
bit_lengthalignlinkageConstant	UndefinedinitializeraddrspacecastPointerTyperu   llcreate_target_dataNVVMdata_layoutget_abi_size	enumeratereversedappendr   r_   r   rb   rm   rM  udivr  r`  populate_arrayrx   r  type	_getvalue) r#   r   rD   rE   rF   rG   rH   	elemcountdynamic_smem
data_modelother_supported_typelldtypelarytydataptrrf   gvmemr  
targetdatar  
laststriderstridesrO  lastsizer  kstridesget_dynshared_sizedynsmem_size	kitemsizekshaperW  rZ  rb  r   r  r   rJ     sx   









rJ   c                 C   s   |   S r   )rc   )r#   r   r   pyvalr   r   r   cuda_dispatcher_const  s   r  )F)	functoolsr   r  r;  llvmliter   llvmlite.bindingbindingr  numba.core.imputilsr   r   numba.core.typing.npydeclr   numba.core.datamodelr   
numba.corer   r	   numba.npr
   numba.np.npyimplr   cudadrvr   numbar   
numba.cudar   r   r   numba.cuda.typesr   r   registryr   lower_getattr
lower_attrlower_constantr   Moduler&   r*   r-   r0   r2   r8   r9   r;   rI  
array_liker  r<   r=   rA   sharedarrayIntegerLiteralr  rN   r  rR  rW   localrZ   r[   threadfence_blockri   threadfence_systemrk   threadfencerl   syncwarprs   i4rp   shfl_sync_intrinsicr   f4f8r   vote_sync_intrinsicr  r   match_any_syncr   match_all_syncr   r   r   lanemask_ltr   popcr   r   r   r   r   Floatr   r   r   Integerr   r   r   r   haddr   iaddhsubr   isubhmulr   imulhnegr   negr   habsr   absr   hfmar   truedivitruedivr   r   r  heqr  hner  hger  hgtr  hler  hltr  r  hmaxhminr{   r   r  cbrtr  brevu4r  u8r  clzr  ffsr   r#  selpr'  r  r+  r2  r  r5  r7  r>  r9  rD  rK  pi_deg2rad_rad2degradiansdegreesr]  rh  atomicrM  ro  rs  incr}  decr  r  and_r   r  exchr  r  r  nanmaxr  nanminr  compare_and_swapr  casr  r  r  r  rJ   r  
get_ufuncsr   r   r   r   <module>   s   










		
		







	.










	
















%






d
