
    i?1i	>                     ~   d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddl m!Z! ddl m"Z"  ejF                  e$      Z% G d de&      Z' G d d      Z( G d de(      Z) G d de(      Z* G d de*      Z+ G d de(      Z, G d de,      Z- G d de)      Z. G d d e,      Z/ G d! d"e)      Z0 G d# d$e,      Z1 G d% d&      Z2 G d' d(ee         Z3d)ee4   d*dfd+Z5e$d,k(  r e5ejl                         yy)-a   Adobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on the Adobe website:

  http://opensource.adobe.com/wiki/display/cmap/CMap+Resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalTextIOTupleUnioncastSet   )name2unicode)KWD)PSEOF)	PSKeyword)	PSLiteral)PSStackParser)PSSyntaxError)literal_name)choplist)nunpackc                       e Zd Zy)	CMapErrorN__name__
__module____qualname__     Y/home/www/therecruiter.miabetepe.com/venv/lib/python3.12/site-packages/pdfminer/cmapdb.pyr   r   1   s    r"   r   c                       e Zd ZdZdeddfdZdefdZdededdfd	Z	d
ede
ddfdZde
d
eeee
f   ddfdZddZd
edee
   fdZy)CMapBaser   kwargsreturnNc                 .    |j                         | _        y N)copyattrsselfr&   s     r#   __init__zCMapBase.__init__9   s    28++-
r"   c                 @    | j                   j                  dd      dk7  S )NWModer   r+   getr-   s    r#   is_verticalzCMapBase.is_vertical<   s    zz~~gq)Q..r"   kvc                 "    || j                   |<   y r)   )r+   )r-   r5   r6   s      r#   set_attrzCMapBase.set_attr?   s    

1r"   codecidc                      y r)   r!   )r-   r9   r:   s      r#   add_code2cidzCMapBase.add_code2cidB       r"   c                      y r)   r!   r-   r:   r9   s      r#   add_cid2unichrzCMapBase.add_cid2unichrE   r=   r"   c                      y r)   r!   )r-   cmaps     r#   use_cmapzCMapBase.use_cmapH   r=   r"   c                     t         r)   )NotImplementedError)r-   r9   s     r#   decodezCMapBase.decodeK   s    !!r"   )rB   r%   r'   N)r   r   r    debugobjectr.   boolr4   strr8   intr<   r   r   bytesr@   rC   r   rF   r!   r"   r#   r%   r%   5   s    E@ @D @/T /# & T  3 4 # U9eS3H-I d "5 "Xc] "r"   r%   c            	           e Zd Zdeeef   ddfdZdefdZdeddfdZ	de
dee   fd	Zej                  dd
fdedeeeef      deedf   ddfdZy)CMapr&   r'   Nc                 >    t        j                  | fi | i | _        y r)   )r%   r.   code2cidr,   s     r#   r.   zCMap.__init__P   s    $)&)+-r"   c                 >    d| j                   j                  d      z  S )Nz
<CMap: %s>CMapNamer1   r3   s    r#   __repr__zCMap.__repr__T   s    djjnnZ888r"   rB   c                     t        |t              sJ t        t        |                   dt        t
        t        f   dt        t
        t        f   dd ffd | j                  |j                         y )Ndstsrcr'   c                     |j                         D ]+  \  }}t        |t              ri }|| |<    ||       '|| |<   - y r)   )items
isinstancedict)rU   rV   r5   r6   dr*   s        r#   r*   zCMap.use_cmap.<locals>.copyZ   sF    ))+ Aa&+-ACFAJCFr"   )rY   rN   rJ   typer   rK   rH   rP   )r-   rB   r*   s     @r#   rC   zCMap.use_cmapW   s`    $%6s4:6%	d3;' 	d3;.? 	D 	 	T]]DMM*r"   r9   c              #     K   t         j                  d| |       | j                  }t        |      D ]V  }||v rD||   }t	        |t
              r| | j                  }-t        t        t
        t        f   |      }K| j                  }X y w)Nzdecode: %r, %r)	logrG   rP   iterrY   rK   r   r   rH   )r-   r9   r[   ixs        r#   rF   zCMap.decodee   sy     		"D$/MMd 		"AAvaDa%GAT#v+.2AMM		"s   B	Br!   outrP   .c           	         || j                   }d}t        |j                               D ]a  \  }}||fz   }t        |t              r|j                  d||fz         3| j                  |t        t        t        t        f   |      |       c y )Nr!   zcode %r = cid %d
)rb   rP   r9   )
rP   sortedrX   rY   rK   writedumpr   r   rH   )r-   rb   rP   r9   r5   r6   cs          r#   rf   z	CMap.dumps   s     }}HDX^^-. 	PFQtA!S!		.!Q78		cDc6k1BA,FQ	O	Pr"   )r   r   r    r   rJ   rK   r.   rS   r%   rC   rL   r   rF   sysstdoutr   r
   r   rH   r   rf   r!   r"   r#   rN   rN   O   s    .sCx .T .9# 9+X +$ +"5 "Xc] "  jj04 "	PP 4V,-P CHo	P
 
Pr"   rN   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapr9   r'   .c                 V    t        |      dz  }|rt        j                  d|z  |      S y)N   z>%dHr!   lenstructunpackr-   r9   ns      r#   rF   zIdentityCMap.decode   s*    IN==!T22r"   Nr   r   r    rL   r   rK   rF   r!   r"   r#   rk   rk          5 U38_ r"   rk   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapByter9   r'   .c                 P    t        |      }|rt        j                  d|z  |      S y)Nz>%dBr!   rn   rr   s      r#   rF   zIdentityCMapByte.decode   s&    I==!T22r"   Nrt   r!   r"   r#   rw   rw      ru   r"   rw   c                   j    e Zd Zdeeef   ddfdZdefdZdedefdZe	j                  fdeddfd	Zy)

UnicodeMapr&   r'   Nc                 >    t        j                  | fi | i | _        y r)   )r%   r.   
cid2unichrr,   s     r#   r.   zUnicodeMap.__init__   s    $)&)*,r"   c                 >    d| j                   j                  d      z  S )Nz<UnicodeMap: %s>rR   r1   r3   s    r#   rS   zUnicodeMap.__repr__   s    !DJJNN:$>>>r"   r:   c                 N    t         j                  d| |       | j                  |   S )Nget_unichr: %r, %r)r^   rG   r|   r-   r:   s     r#   
get_unichrzUnicodeMap.get_unichr   s"    		&c2s##r"   rb   c                     t        | j                  j                               D ]  \  }}|j                  d||fz          y )Nzcid %d = unicode %r
)rd   r|   rX   re   )r-   rb   r5   r6   s       r#   rf   zUnicodeMap.dump   s=    T__2245 	8FQII-A67	8r"   )r   r   r    r   rJ   rK   r.   rS   r   rh   ri   r   rf   r!   r"   r#   rz   rz      sX    -sCx -T -?# ?$c $c $ "% 8 8 8r"   rz   c                       e Zd ZdedefdZy)IdentityUnicodeMapr:   r'   c                 F    t         j                  d| |       t        |      S )z+Interpret character id as unicode codepointr   )r^   rG   chrr   s     r#   r   zIdentityUnicodeMap.get_unichr   s    		&c23xr"   N)r   r   r    rK   rJ   r   r!   r"   r#   r   r      s    c c r"   r   c                        e Zd ZdededdfdZy)FileCMapr9   r:   r'   Nc                 P   t        |t              rt        |t              s$J t        t        |      t        |      f             | j                  }|d d D ];  }t        |      }||v r!t        t        t        t        f   ||         }3i }|||<   |}= t        |d         }|||<   y )N)	rY   rJ   rK   r\   rP   ordr   r   rH   )r-   r9   r:   r[   rg   cits          r#   r<   zFileCMap.add_code2cid   s    $$C)= 	
s$Zc#@
 	
= MMcr 	AQBQwc6k*AbE2')"	 b]"r"   )r   r   r    rJ   rK   r<   r!   r"   r#   r   r      s     3 4 r"   r   c                   ,    e Zd Zdedeeeef   ddfdZy)FileUnicodeMapr:   r9   r'   Nc                    t        |t              sJ t        t        |                   t        |t              r?t        |j
                  t              sJ t        |j
                        | j                  |<   y t        |t              r |j                  dd      | j                  |<   y t        |t              rt        |      | j                  |<   y t        |      )NzUTF-16BEignore)rY   rK   rJ   r\   r   namer   r|   rL   rF   r   	TypeErrorr?   s      r#   r@   zFileUnicodeMap.add_cid2unichr   s    #s#3Sc^3#dI&dii---#/		#:DOOC e$#';;z8#DDOOC c"#&t9DOOC D/!r"   )r   r   r    rK   r   r   rL   r@   r!   r"   r#   r   r      s(    "# "U9eS3H-I "d "r"   r   c                   ,     e Zd Zdededdf fdZ xZS )PyCMapr   moduler'   Nc                     t         |   |       |j                  | _        |j                  rd| j
                  d<   y y N)rR   r   r0   )superr.   CODE2CIDrP   IS_VERTICALr+   )r-   r   r   	__class__s      r#   r.   zPyCMap.__init__   s:    $'"#DJJw r"   )r   r   r    rJ   r   r.   __classcell__r   s   @r#   r   r      s"    $S $# $$ $ $r"   r   c                   0     e Zd Zdedededdf fdZ xZS )PyUnicodeMapr   r   verticalr'   Nc                     t         |   |       |r!|j                  | _        d| j                  d<   y |j
                  | _        y r   )r   r.   CID2UNICHR_Vr|   r+   CID2UNICHR_H)r-   r   r   r   r   s       r#   r.   zPyUnicodeMap.__init__   s>    $'$11DO"#DJJw$11DOr"   )r   r   r    rJ   r   rI   r.   r   r   s   @r#   r   r      s)    2S 2# 2 2$ 2 2r"   r   c                       e Zd ZU i Zeeef   ed<   i Zeee	e
   f   ed<    G d de      Zededefd       Zededefd       Zedded	edefd
       Zy)CMapDB_cmap_cache_umap_cachec                       e Zd Zy)CMapDB.CMapNotFoundNr   r!   r"   r#   CMapNotFoundr      s    r"   r   r   r'   c           	         |j                  dd      }d|z  }t        j                  d|       t        j                  j                  dd      t        j                  j                  t        j                  j                  t              d      f}|D ]  }t        j                  j                  ||      }t        j                  j                  |      sCt        j                  |      }	 t        t        |      dt        j                   |j#                                     |j%                          c S  t&        j)                  |      # |j%                          w xY w)	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/rB   r!   )replacer^   rG   osenvironr2   pathjoindirname__file__existsgzipopenr\   rJ   pickleloadsreadcloser   r   )clsr   filename
cmap_paths	directoryr   gzfiles          r#   
_load_datazCMapDB._load_data   s    ||D"%!D(		-&JJNN;(>?GGLL2F;

 $ 		,I77<<	84Dww~~d#4#D	2v||FKKM/JKLLN		, %%d++ LLNs   *6E		Ec                 $   |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S 	 | j                  |   S # t        $ r Y nw xY w| j	                  |      }t        ||      x| j                  |<   }|S )Nz
Identity-Hr   )r0   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rk   rw   r   KeyErrorr   r   )r   r   datarB   s       r#   get_cmapzCMapDB.get_cmap   s    <a((\!a((''#!,,''#!,,	??4(( 		~~d#'-dD'99s   A 	A! A!r   c                     	 | j                   |   |   S # t        $ r Y nw xY w| j                  d|z        }dD cg c]  }t        |||       nc c}w c}| j                   |<   | j                   |   |   S )Nzto-unicode-%s)FT)r   r   r   r   )r   r   r   r   r6   s        r#   get_unicode_mapzCMapDB.get_unicode_map  sy    	??4(22 		~~o45FS TdD!!< T Tt$X..s    	  AN)F)r   r   r    r   r   rJ   r   __annotations__r   r   r   r   r   classmethodr   r   r%   r   rI   rz   r   r!   r"   r#   r   r      s    %'Kc6k"'13Kc4--.3y  ,c ,c , ,& C H  " /3 /$ /: / /r"   r   c                   L   e Zd ZdededdfdZddZ ed      Z ed      Z	 ed	      Z
 ed
      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      ZdededdfdZdeddfdZy)
CMapParserrB   fpr'   Nc                 j    t        j                  | |       || _        d| _        t	               | _        y )NT)r   r.   rB   _in_cmapset	_warnings)r-   rB   r   s      r#   r.   zCMapParser.__init__  s,    tR(	#&5r"   c                 D    	 | j                          y # t        $ r Y y w xY wr)   )
nextobjectr   r3   s    r#   runzCMapParser.run$  s-    	OO 	  		s    	s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 4   || j                   u rd| _        | j                          y|| j                  u rd| _        y| j                  sy|| j                  u rA	 | j                  d      \  \  }}\  }}| j                  j                  t        |      |       y|| j                  u rO	 | j                  d      \  \  }}| j                  j                  t        j                  t        |                   y|| j                  u r| j                          y|| j                   u r| j                          y|| j"                  u r| j                          y|| j$                  u rj| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]7  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt)        |t.              s| j-                  d	       nt1        |
      t1        |      k7  r| j-                  d
       |
dd }|dd }||k7  r| j-                  d       |
dd }|dd }t3        |      }t3        |      }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C : y|| j<                  u r| j                          y|| j>                  u ru| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]C  \  }}t)        |t*              st)        |t.              s(| j                  j;                  ||       E y|| j@                  u r| j                          y|| jB                  u r| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt1        |
      t1        |      k7  r| j-                  d       ut3        |
      }t3        |      }t)        |tD              rct1        |      ||z
  dz   k7  r| j-                  d       tG        t5        ||dz         |      D ]!  \  }}| j                  j;                  ||       # t)        |t*              sJ |dd }t3        |      }|dd }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C  y|| jH                  u r| j                          y|| jJ                  u r~| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]L  \  }}t)        |t*              st)        |t*              s(| j                  j;                  t3        |      |       N y|| jL                  u r| j                          y|| jN                  u r| j                          y| jQ                  ||f       y# t        $ r Y yw xY w# t        $ r Y yt        j                  $ r Y yw xY wc c}}w c c}}w c c}}w c c}}w )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrm   r      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>LzThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpoprB   r8   r   r   KEYWORD_USECMAPrC   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rY   rL   
_warn_oncerK   ro   r   rangerp   packr@   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r-   r   r   _r5   r6   cmapname__objobjs
start_byteend_byter:   start_prefix
end_prefixsvarevarstartendvlenr`   ra   r9   unicode_valuevarbaseprefixs                              r#   
do_keywordzCMapParser.do_keyword<  s0   
 D*** DMKKMd***!DM}}D$$$#'88A; !Q!Q		""<?A6 D(((#'88A; !X		""6??<3I#JK
 D444KKMD222KKMD...KKMD,,,)-7IRC7D7/74/@ 9+Xs!*e4OO$VW!(E2OO$TU!#s+OO$TUz?c(m3OO- )#2%cr]
:-OO: !"#}dm4ysU{Q/ 9A$v{{4'CTEF'KKAII,,S1Wa89;9@ D---KKMD+++)-7IRC7D7'40 8ddE*z#s/CII,,S$78 D---KKMD+++)-7IRC7D708D0A ?,Xt!*e4OO$EF!(E2OO$CDz?c(m3OO$TU
+h'dD)4yC%K!O3F /2%sQw2G.N E*]		00mDE &dE222rs)C"3<D!#2YFs8D"3;?3 ?"V[[tax%@$%HH		00A>?5?: D,,,KKMD***)-7IRC7D7'40 Adc5)ju.EII,,WS\4@A D111KKMD///KKM		3,A !  !   &&   8P 8 8J 8sC   ?Y (AY 5ZZ/ZZ	YY	Y?*Y?>Y?msgc                     || j                   vr6| j                   j                  |       d}t        j                  ||z          yy)z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addr^   warning)r-   r  base_msgs      r#   r   zCMapParser._warn_once  sA    dnn$NNs#/ 
 KK3' %r"   )r'   N)r   r   r    r%   r   r.   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   r  rJ   r   r!   r"   r#   r   r     s   X 8   L)*oO*oOf+K"%&<"= #$8 9 01n-/]+/]+n-L)"#67 !23U c U ) U  U n	(c 	(d 	(r"   r   argvr'   c                     ddl m}  |dt               | dd  }|D ]R  }t        |d      }t	               }t        ||      j                          |j                          |j                          T y )Nr   )warnzThe function main() from cmapdb.py will be removed in 2023. It was probably introduced for testing purposes a long time ago, and no longer relevant. Feel free to create a GitHub issue if you disagree.r   rb)	warningsr  DeprecationWarningr   r   r   r   r   rf   )r  r  argsfnamer   rB   s         r#   mainr    sq    	> 		 8D %4  "

		 r"   __main__)7__doc__r   loggingr   os.pathr   rp   rh   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r   
encodingdbr   psparserr   r   r   r   r   r   r   utilsr   r   	getLoggerr   r^   	Exceptionr   r%   rN   rk   rw   rz   r   r   r   r   r   r   r   rJ   r  r  r!   r"   r#   <module>r      s<  	   	    
     %     # # "  g!		 	" "42P8 2Pj8 | 8 8" t $"Z " $T $2: 26/ 6/rA(y) A(HtCy T ( zN r"   