
    i?1i                        d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddlm!Z! ddlm"Z" ddlm#Z# ddlm$Z$ ddlm%Z% ddlm&Z& ddlm'Z' ddlm(Z( ddl)m*Z* ddl+m,Z, ddl+m-Z- ddl.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddlm5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddlm;Z; dd lm<Z< dd!lm=Z= dd"lm>Z>  ej~                  e@      ZA G d# d$e*      ZB G d% d&eB      ZC ed'e
ee5      ZD G d( d)eBeeD         ZE G d* d+eEe5         ZF G d, d-eEe5         ZG G d. d/eEe5         ZH G d0 d1eEe5         ZIy)2    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)PDFColorSpace   )utils)ImageWriter)LAParamsLTComponentTextGroupElement)LTAnno)LTChar)LTContainer)LTCurve)LTFigure)LTImageLTItem)LTLayoutContainer)LTLine)LTPage)LTRect)LTText)	LTTextBox)LTTextBoxVertical)LTTextGroup)
LTTextLine)PDFTextDevice)PDFFont)PDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)AnyIOPointMatrixRectPathSegmentmake_compat_str)apply_matrix_pt)bbox2str)enc)mult_matrixc                      e Zd ZU eed<   eed<   	 	 d%dededee	   ddfdZ
d	ededdfd
Zd	eddfdZdedededdfdZdeddfdZdededdfdZdededededee   ddfdZdededededededed edefd!Zdededefd"Zd#eddfd$Zy)&PDFLayoutAnalyzercur_itemctmNrsrcmgrpagenolaparamsreturnc                 Z    t        j                  | |       || _        || _        g | _        y N)r&   __init__r<   r=   _stackselfr;   r<   r=   s       \/home/www/therecruiter.miabetepe.com/venv/lib/python3.12/site-packages/pdfminer/converter.pyrA   zPDFLayoutAnalyzer.__init__9   s)     	tW- /1    pagec                     |j                   \  }}}}t        |||f      \  }}t        |||f      \  }}ddt        ||z
        t        ||z
        f}t        | j                  |      | _        y )Nr   )mediaboxr3   absr   r<   r9   )rD   rG   r:   x0y0x1y1rI   s           rE   
begin_pagezPDFLayoutAnalyzer.begin_pageD   sl    ==RR"3R1R"3R1Rq#b2g,BG5t{{H5rF   c                    | j                   r#J t        t        | j                                      t        | j                  t
              s#J t        t        | j                                     | j                  %| j                  j                  | j                         | xj                  dz  c_	        | j                  | j                         y )Nr   )rB   strlen
isinstancer9   r   typer=   analyzer<   receive_layout)rD   rG   s     rE   end_pagezPDFLayoutAnalyzer.end_pageK   s    ;;5C$4 55$--0J#d4==6I2JJ0==$MM!!$--0qDMM*rF   namebboxmatrixc                     | j                   j                  | j                         t        ||t	        || j
                              | _        y r@   )rB   appendr9   r   r6   r:   )rD   rX   rY   rZ   s       rE   begin_figurezPDFLayoutAnalyzer.begin_figureS   s3    4==) t[-JKrF   _c                 
   | j                   }t        | j                   t              s#J t        t	        | j                                      | j
                  j                         | _         | j                   j                  |       y r@   )r9   rS   r   rQ   rT   rB   popadd)rD   r^   figs      rE   
end_figurezPDFLayoutAnalyzer.end_figureW   sV    mm$--2LCT]]8K4LL2)#rF   streamc                 v   t        | j                  t              s#J t        t	        | j                                     t        ||| j                  j                  | j                  j                  | j                  j                  | j                  j                  f      }| j                  j                  |       y r@   )rS   r9   r   rQ   rT   r   rK   rL   rM   rN   ra   )rD   rX   rd   items       rE   render_imagezPDFLayoutAnalyzer.render_image]   s    $--2LCT]]8K4LL2]]t}}//1A1A4==CSCST

 	$rF   gstatestrokefillevenoddpathc           
         dj                  d |D              }|dd dk7  ry|j                  d      dkD  rTt        j                  d|      D ]:  }||j	                  d      |j                  d       }| j                  |||||       < y|D 	cg c]%  }	t        t        |	d   dk7  r|	d	d n|d   d	d       ' }
}	|
D cg c]  }t        | j                  |       }}|d
v rRt        |j                  |d   |d   ||||j                  |j                        }| j                  j!                  |       y|dv r|\  \  }}\  }}\  }}\  }}}|d   |d   k(  }||k(  xr ||k(  xr ||k(  xr ||k(  xs ||k(  xr ||k(  xr ||k(  xr ||k(  }|rX|rVt#        |j                  g |d   |d   ||||j                  |j                        }| j                  j!                  |       yt%        |j                  |||||j                  |j                        }| j                  j!                  |       yt%        |j                  |||||j                  |j                        }| j                  j!                  |       yc c}	w c c}w )z@Paint paths described in section 4.4 of the PDF reference manual c              3   &   K   | ]	  }|d      yw)r   N ).0xs     rE   	<genexpr>z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>o   s     +!+s   Nr   mzm[^m]+r   h>   mlmlh>   mlllhmllll      )joincountrefinditerstartend
paint_pathr   r.   r3   r:   r   	linewidthscolorncolorr9   ra   r    r   )rD   rh   ri   rj   rk   rl   shapert   subpathpraw_ptsptptslinerK   rL   rM   rN   x2y2x3y3r^   is_closed_loophas_square_coordinatesrectcurves                              rE   r   zPDFLayoutAnalyzer.paint_pathf   s    +d++!9 [[![[E2 HqwwqzAEE!H5gwGH OSIJUadckAbcFtAwrs|DG  <CCR?488R0CCC%
 $$FFMMMM	 !!$',,<?9R(2rHRhr2!$Q3q6!1"HCrCbBhC28*GBhE28EbER2X ' "&<!((*#a&*3q6*D MM%%d+#((E MM%%e,  $$MMMM !!%(} Ds   *I<>Jfontfontsizescalingrisecidncsgraphicstatec	                 v   	 |j                  |      }	t        |	t              sJ t        t        |	                   	 |j                  |      }
|j                  |      }t        ||||||	|
|||
      }| j                  j                  |       |j                  S # t        $ r | j                  ||      }	Y zw xY wr@   )	to_unichrrS   rQ   rT   r(   handle_undefined_char
char_width	char_dispr   r9   ra   adv)rD   rZ   r   r   r   r   r   r   r   text	textwidthtextdisprf   s                rE   render_charzPDFLayoutAnalyzer.render_char   s    	9>>#&DdC(9#d4j/9( OOC(	>>#&
 	$xx# $ 	9--dC8D	9s   :B B87B8c                 :    t         j                  d||       d|z  S )Nzundefined: %r, %rz(cid:%d))logdebug)rD   r   r   s      rE   r   z'PDFLayoutAnalyzer.handle_undefined_char   s    		%tS1CrF   ltpagec                      y r@   rp   rD   r   s     rE   rV   z PDFLayoutAnalyzer.receive_layout   s    rF   r   N) __name__
__module____qualname__r   __annotations__r/   r*   intr   r   rA   r+   rO   rW   rQ   r0   r]   rc   r,   rg   r)   boolr   r1   r   r'   floatr   r   r   r   rV   rp   rF   rE   r8   r8   5   s   	K
 '+		2#	2 	2 8$		2
 
	26w 6V 6 6+W + +L LD L& LT LC D    i  D  `)`) `) 	`)
 `) {#`) 
`)D  	
     & 
B '      V  rF   r8   c            	       L    e Zd Z	 	 d
dededee   ddfdZdeddfdZ	defd	Z
y)PDFPageAggregatorNr;   r<   r=   r>   c                 D    t         j                  | |||       d | _        y N)r<   r=   )r8   rA   resultrC   s       rE   rA   zPDFPageAggregator.__init__   s"     	""4("S(,rF   r   c                     || _         y r@   r   r   s     rE   rV   z PDFPageAggregator.receive_layout   s	    rF   c                 6    | j                   J | j                   S r@   r   rD   s    rE   
get_resultzPDFPageAggregator.get_result   s    {{&&&{{rF   r   )r   r   r   r*   r   r   r   rA   r   rV   r   rp   rF   rE   r   r      sY     '+	-#- - 8$	-
 
-V  F rF   r   IOTypec                   T    e Zd Z	 	 	 d
dededededee   ddfdZ	e
dedefd	       Zy)PDFConverterNr;   outfpcodecr<   r=   r>   c                     t         j                  | |||       || _        || _        | j	                  | j                        | _        y r   )r8   rA   r   r   _is_binary_streamoutfp_binary)rD   r;   r   r   r<   r=   s         rE   rA   zPDFConverter.__init__  s@     	""4("S"

 224::>rF   c                     dt        | dd      v ryt        | d      ryt        | t        j                        ryt        | t        j
                        ryt        | t        j                        ryy)z"Test if an stream is binary or notbmodern   TF)getattrhasattrrS   ioBytesIOStringIO
TextIOBase)r   s    rE   r   zPDFConverter._is_binary_stream  s[     '%,,UF#rzz*r{{+r}}-rF   )utf-8r   N)r   r   r   r*   r   rQ   r   r   r   rA   staticmethodr-   r   r   rp   rF   rE   r   r     ss    
 '+?#? ? 	?
 ? 8$? 
?  4  rF   r   c                        e Zd Z	 	 	 	 	 ddededededee   de	dee
   d	df fd
Zded	dfdZded	dfdZdeded	dfdZdede	de	de	dee   d	dfdZ xZS )TextConverterNr;   r   r   r<   r=   
showpagenoimagewriterr>   c                 J    t         |   |||||       || _        || _        y )Nr   r<   r=   )superrA   r   r   )	rD   r;   r   r   r<   r=   r   r   	__class__s	           rE   rA   zTextConverter.__init__(  s,     	%uVhW$&rF   r   c                     t        j                  || j                  d      }| j                  r8t	        t
        | j                        j                  |j                                y t	        t        | j                        j                  |       y )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder	   rD   r   s     rE   
write_textzTextConverter.write_text6  s[    --dDJJI4::&,,T[[];$**40rF   r   c                      dt         dd f fd j                  r j                  d|j                  z          |        j                  d       y )Nrf   r>   c                 Z   t        | t              r| D ]
  } |        n/t        | t              rj                  | j	                                t        | t
              rj                  d       y t        | t              r)j                  j                  j                  |        y y y )N
)	rS   r   r!   r   get_textr"   r   r   export_image)rf   childrenderrD   s     rE   r   z,TextConverter.receive_layout.<locals>.render>  s    $,! "E5M"D&)0$	*%D'*##/$$11$7 0 +rF   zPage %s
)r   r   r   pageidrD   r   r   s   ` @rE   rV   zTextConverter.receive_layout=  sG    
	8 
	8D 
	8 ??OOK&--78vrF   rX   rd   c                 L    | j                   y t        j                  | ||       y r@   )r   r   rg   )rD   rX   rd   s      rE   rg   zTextConverter.render_imageR  s&    #!!$f5rF   rh   ri   rj   rk   rl   c                      y r@   rp   )rD   rh   ri   rj   rk   rl   s         rE   r   zTextConverter.paint_pathX  s     	rF   )r   r   NFN)r   r   r   r*   r-   rQ   r   r   r   r   r   rA   r   r   rV   r,   rg   r)   r   r1   r   __classcell__)r   s   @rE   r   r   '  s    
 '+ -1'#' ' 	'
 ' 8$' ' k*' 
'1s 1t 1V  * i D   	
  {# 
rF   r   c                       e Zd ZdddddddZddd	Z	 	 	 	 	 	 	 	 	 	 	 	 d6dedededede	e
   dededededede	e   dede	eeef      de	eeef      dd
fdZdedd
fdZd7dZd7dZdedd
fdZd ed!ed"ed#ed$ed%edd
fd&Zd ed!ed'edd
fd(Zd'ed!ed"ed#ed$ed%edd
fd)Zd eded"ed#ed*edd
fd+Z	 d8d ed!ed"ed#ed$ed%ed,edd
fd-Zd edd
fd.Zded/ed0edd
fd1Zd7d2Zd3edd
fd4Zd7d5Z y
)9HTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rG   blue)r   charNr;   r   r   r<   r=   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr>   c                    t         j                  | |||||       | j                  | j                   k(  rt	        d      |ddi}|ddd}|| _        || _        || _        |	| _        |
| _	        || _
        || _        || _        |rJ| j                  j                  | j                         | j                  j                  | j                         | j                  | _        d | _        g | _        | j'                          y )Nr   )Codec is required for a binary I/O outputr   r   r   )r   rG   )r   rA   r   r   
ValueErrorr   r   r   r   r   r   r   r  updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rD   r;   r   r   r<   r=   r   r   r   r   r   r   r   r   r  s                  rE   rA   zHTMLConverter.__init__r  s    " 	'5fx 	 	

 TZZ0HII!7+K$+V<K
"$$$&&&##D$4$45##D$4$45#26
=?rF   r   c                     | j                   rCt        t        | j                        j	                  |j                  | j                                y t        t        | j                        j	                  |       y r@   r   r   r   r   r   r   r	   r   s     rE   r   zHTMLConverter.write  R    ::4::&,,T[[-DE 	 $**40rF   c                     | j                  d       | j                  rd| j                  z  }nd}| j                  |       | j                  d       y )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rD   ss     rE   r  zHTMLConverter.write_header  sO    

#$::!#'::. 
 IA

1

$%rF   c                     t        d| j                        D cg c]  }dj                  ||       }}ddj                  |      z  }| j	                  |       | j	                  d       y c c}w )Nr   z<a href="#{}">{}</a>z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger<   formatr}   r   )rD   i
page_linksr  s       rE   write_footerzHTMLConverter.write_footer  ss    9>q$++9N
45"))!Q/

 
 H$))K
 
 	

1

%&
s   A+c                 8    | j                  t        |             y r@   )r   r5   r   s     rE   r   zHTMLConverter.write_text  s    

3t9rF   colorborderwidthrr   ywru   c                     | j                   j                  |      }|]d|||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j	                  |       y )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r   getr   r  r   )	rD   r  r  rr   r  r  ru   color2r  s	            rE   
place_rectzHTMLConverter.place_rect  s     !!%%e,K 

N]]Q&$**4

N

N	  JJqMrF   rf   c                     | j                  |||j                  |j                  |j                  |j                         y r@   )r  rK   rN   widthheight)rD   r  r  rf   s       rE   place_borderzHTMLConverter.place_border  s+    {DGGTWWdjj$++VrF   c                    | j                   | j                   j                  |      }dt        |      ||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j                  |       y )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r5   r   r  r   )	rD   rf   r  rr   r  r  ru   rX   r  s	            rE   place_imagezHTMLConverter.place_image  s     '##006DD I

N]]Q&$**4

N

N	  JJqMrF   sizec                 8   | j                   j                  |      }|}d||| j                  z  | j                  |z
  | j                  z  || j                  z  | j                  z  fz  }| j                  |       | j                  |       | j                  d       y )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r  r  r   r  r   r   r   )rD   r  r   rr   r  r&  r  r  s           rE   
place_textzHTMLConverter.place_text  s     !!%%e,. 

N]]Q&$**44::%6	  JJqMOOD!JJ{#rF   writing_modec           	         | j                   j                  | j                         d | _        d||||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j                  |       y )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r
  r\   r	  r   r  r   )	rD   r  r  rr   r  r  ru   r*  r  s	            rE   	begin_divzHTMLConverter.begin_div  s     	tzz*
 DJJ"djj0DJJDJJ 	
 	

1rF   c                     | j                   | j                  d       | j                  j                         | _         | j                  d       y )N</span>z</div>)r	  r   r
  r`   )rD   r  s     rE   end_divzHTMLConverter.end_div!  s;    ::!JJy!__((*


8rF   fontnamer   c                    ||f}|| j                   k7  rh| j                   | j                  d       |j                  d      d   }| j                  d||| j                  z  | j                  z  fz         || _         | j                  |       y )Nr.  +z.<span style="font-family: %s; font-size:%dpx">)r	  r   splitr   r   r   )rD   r   r0  r   r   fontname_without_subset_tags         rE   put_textzHTMLConverter.put_text(  s    (#4::zz%

9%*2..*=b*A'JJ@.4::0E0VWX DJrF   c                 &    | j                  d       y )Nz<br>r   r   s    rE   put_newlinezHTMLConverter.put_newline7  s    

6rF   r   c                      dt         t        t        f   dd f fddt        dd f fd |        xj                   j
                  z  c_        y )Nrf   r>   c                 j    t        | t              r"j                  dd|        | D ]
  } |        y )Nr   r   )rS   r$   r#  rf   r   rD   
show_groups     rE   r=  z0HTMLConverter.receive_layout.<locals>.show_group<  s9    $,!!+q$7! &Eu%&rF   c           
      X   t        | t              rxj                  | j                  z  c_        j	                  dd|        j
                  rmj                  dj                  | j                  z
  j                  z  z         j                  dj                  | j                  | j                               | D ]
  } |        | j                  | j                  D ]
  } |        y t        | t              rj	                  dd|        y t        | t              r_j                  dd| j                  | j                  | j                  | j                          | D ]
  } |        j#                  d       y t        | t$              r?j'                  | d| j                  | j                  | j                  | j                          y j(                  dk(  r
t        | t*              r#j	                  dd|        | D ]
  } |        y t        | t,              rbj	                  d	d|        j/                  d	t1        | j2                  dz         | j                  | j                  d
       | D ]
  } |        y t        | t4              rTj	                  dd|        j/                  d| j7                         | j                  | j                  | j8                         y t        | t*              r/| D ]
  } |        j(                  dk7  rj;                          y t        | t,              rnj                  d	d| j                  | j                  | j                  | j                   | j=                                | D ]
  } |        j#                  d	       y t        | t4              rAt?        | j@                        }jC                  | j7                         || j8                         y t        | tD              rjG                  | j7                                y )NrG   r   z*<div style="position:absolute; top:%dpx;">z<a name="{}">Page {}</a></div>
r   r   exactr   r      r   loose)$rS   r   r  rN   r#  r   r   r   r  r   groupsr   r   r,  rK   r!  r"  r/  r   r%  r   r%   r"   r)  rQ   indexr   r   r&  r9  get_writing_moder2   r0  r6  r!   r   )rf   r   groupr0  r   rD   r=  s       rE   r   z,HTMLConverter.receive_layout.<locals>.renderC  s   $'(!!&!T2??JJD MMDGG3tzzAC JJ:AA KK
 " "E5M";;*!% *"5)*j g D'*!!'1d3d c D(+xDGGTWWdjj$++V! "E5M"X&Z Y D'*  q$''477DJJTV S ??g-!$
3))*a>%) *E"5M*L I $D)4)))Q=%s4::>':DGGTWWb &* *E"5M*> ; $D&1))&!T:"DMMOTWWdggtyy6 / "$
3%) *E"5M*??g5 ,,.& % $D)4% GG GG JJ KK 113 &* *E"5M*Y/  $D&1#24==#AdmmoxK  $D&18rF   )r   r$   r   r   r  r   rD   r   r   r=  s   ` @@rE   rV   zHTMLConverter.receive_layout;  sV    	U;0@#@A 	d 	G	 G	D G	R 	v(rF   c                 $    | j                          y r@   r  r   s    rE   closezHTMLConverter.close      rF   )r   r   Nr   g      ?normalT2   Nr   NNr>   N)False)!r   r   r   r  r  r*   r-   rQ   r   r   r   r   r   r   r   rA   r   r  r  r   r  r   r#  r   r%  r)  r,  r/  r6  r9  r   rV   rI  rp   rF   rE   r   r   c  s   K K '+"-10404-#- - 	-
 - 8$- - - - - - k*- - d38n-- d38n--  
!-^# $ 	s t '*/49>CHMR	(# C { t *-27<AFKPU	( #(-27?D	8 $  	
     
:S T S C 5 T SV S SjrF   r   c                       e Zd Z ej                  d      Z	 	 	 	 	 ddededede	de
e   de
e   d	ed
dfdZded
dfdZddZddZded
dfdZded
dfdZddZy)XMLConverterz[ ---]Nr;   r   r   r<   r=   r   stripcontrolr>   c                     t         j                  | |||||       | j                  | j                   k(  rt	        d      || _        || _        | j                          y )Nr   r  )r   rA   r   r   r  r   rQ  r  )rD   r;   r   r   r<   r=   r   rQ  s           rE   rA   zXMLConverter.__init__  se     	'5fx 	 	

 TZZ0HII&(rF   r   c                     | j                   rCt        t        | j                        j	                  |j                  | j                                y t        t        | j                        j	                  |       y r@   r  r   s     rE   r   zXMLConverter.write  r  rF   c                     | j                   r| j                  d| j                   z         n| j                  d       | j                  d       y )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   r   s    rE   r  zXMLConverter.write_header  s;    ::JJ?$**LMJJ12

;rF   c                 &    | j                  d       y )Nz	</pages>
r8  r   s    rE   r  zXMLConverter.write_footer  s    

< rF   c                     | j                   r| j                  j                  d|      }| j                  t	        |             y Nrn   )rQ  CONTROLsubr   r5   r   s     rE   r   zXMLConverter.write_text  s4    <<##B-D

3t9rF   r   c                 X     dt         dd f fddt         dd f fd |       y )Nrf   r>   c                 <   t        | t              r4j                  d| j                  t	        | j
                        fz         y t        | t              rGj                  dt	        | j
                        z         | D ]
  } |        j                  d       y )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rS   r"   r   rC  r4   rY   r$   r<  s     rE   r=  z/XMLConverter.receive_layout.<locals>.show_group  s    $	*

5zz8DII#678  D+.

4x		7JJK! &Eu%&

+,rF   c                    t        | t              rd| j                  t        | j                        | j
                  fz  }j                  |       | D ]
  } |        | j                  ;j                  d       | j                  D ]
  } |        j                  d       j                  d       y t        | t              r6d| j                  t        | j                        fz  }j                  |       y t        | t              r6d| j                  t        | j                        fz  }j                  |       y t        | t              rEd| j                  t        | j                        | j                         fz  }j                  |       y t        | t              rXd| j                  d	t        | j                        d
}j                  |       | D ]
  } |        j                  d       y t        | t              rHj                  dt        | j                        z         | D ]
  } |        j                  d       y t        | t               rkd}t        | t"              rd}d| j$                  t        | j                        |fz  }j                  |       | D ]
  } |        j                  d       y t        | t&              rdt)        | j*                        t        | j                        | j,                  j                  | j.                  j0                  | j2                  fz  }j                  |       j5                  | j7                                j                  d       y t        | t8              r#j                  d| j7                         z         y t        | t:              rj<                  Pj<                  j?                  |       }j                  dt)        |      | j@                  | jB                  fz         y j                  d| j@                  | jB                  fz         y J tE        d| f             )Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="z" bbox="z">
z
</figure>
z<textline bbox="%s">
z</textline>
rn   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
	Unhandled)#rS   r   r   r4   rY   rotater   rB  r   r   r    r   get_ptsr   rX   r%   r"   r#   rC  r   r5   r0  r   r   r   r&  r   r   r!   r   r   r   r!  r"  rQ   )	rf   r  r   rE  wmoderX   r   rD   r=  s	         rE   r   z+XMLConverter.receive_layout.<locals>.render  s   $'<KKTYY'KK@ 
 

1! "E5M";;*JJ|,!% *"5)*JJ}-

;'T S D&)9NNTYY'=  

1H G D&)9NNTYY'=  

1| { D'*BNNTYY'LLNF 
 

1n m D(+8<		8DIICVW

1! "E5M"

=)b a D*-

3htyy6IIJ! "E5M"

?+X W D),d$56/E5JJTYY'9 
 

1! "E5M"

>*> = D&)0 DMM* +))00		  

10

;'   D&)

.@A  D'*##/++88>DJJEt9djj$++>?  JJ<

DKK?XX
  7c;"566urF   r   rF  s   ` @@rE   rV   zXMLConverter.receive_layout  s9    	V 	 	Z	 Z	D Z	x 	vrF   c                 $    | j                          y r@   rH  r   s    rE   rI  zXMLConverter.close6  rJ  rF   )r   r   NNFrM  )r   r   r   r   compilerY  r*   r-   rQ   r   r   r   r   r   rA   r   r  r  r   r   rV   rI  rp   rF   rE   rP  rP    s    bjj89G '+-1"#  	
  8$ k*  
.# $ s t kV k kZrF   rP  c                       e Zd ZdZ ej
                  d      Z	 	 	 	 ddedede	de
dee   d	efd
Zdede	fdZde	ddfdZddZddZde	ddfdZddZdeddfdZddZy)HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]Nr;   r   r   r<   r=   rQ  c                 v    t         j                  | |||||       || _        d| _        | j	                          y )Nr   F)r   rA   rQ  within_charsr  )rD   r;   r   r   r<   r=   rQ  s          rE   rA   zHOCRConverter.__init__N  sB     	'5fx 	 	
 )!rF   rY   r>   c                     |\  }}}}t        |      }t        | j                  d   |z
        }t        |      }t        | j                  d   |z
        }	d| d| d| d|	 S )N   zbbox  )r   	page_bbox)
rD   rY   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1s
             rE   	bbox_reprzHOCRConverter.bbox_repr^  sq    '+$ueUT^^A&./UT^^A&./vhaxq&::rF   r   c                     | j                   rE|j                  | j                         }t        t        | j                        j                  |       y t        t        | j                        j                  |       y r@   )r   r   r   r   r   r   r	   )rD   r   encoded_texts      rE   r   zHOCRConverter.writeg  sM    ::;;tzz2L4::&,,\:$**40rF   c                 j   | j                   r| j                  d| j                   z         n| j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d	       y )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
rU  r   s    rE   r  zHOCRConverter.write_headern  s    ::JJ:<@JJG
 JJ- 	

:

&'

W	
 	

S	
 	

C	
 	

;

:rF   c                 H    | j                  d       | j                  d       y )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
r8  r   s    rE   r  zHOCRConverter.write_footer  s    

FG

V	
rF   c                 v    | j                   r| j                  j                  d|      }| j                  |       y rX  )rQ  rY  rZ  r   r   s     rE   r   zHOCRConverter.write_text  s-    <<##B-D

4rF   c                 t   t        | j                        dkD  rd}d| j                  v rd}d| j                  v r|dz  }| j                  d| j                  | j                  || j                  | j                        | j                  | j                  | j                  j                         fz         d| _        y )	Nr   rn   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rR   working_textworking_fontr   working_sizert  working_bboxstriprg  )rD   bold_and_italic_styless     rE   
write_wordzHOCRConverter.write_word  s    t  !A%%'"4,,,)?&***&*??&JJ(
 )))).t'8'89))))))//1	  "rF   r   c                 6     dt         dd f fd |       y )Nrf   r>   c                 d   j                   r t        | t              rj                          t        | t              rm| j
                  _        j                  d| j                  dj                  | j
                        d       | D ]
  } |        j                  d       y t        | t              rNj                  dj                  | j
                        z         | D ]
  } |        j                  d       y t        | t              rZj                  d| j                  j                  | j
                        fz         | D ]
  } |        j                  d       y t        | t              rj                   sPd_         | j                         _        | j
                  _        | j"                  _        | j&                  _        y t+        | j                         j-                               d	k(  r0j                          j                  | j                                y j                   d
   | j
                  d
   k7  s2j$                  | j"                  k7  sj(                  | j&                  k7  rCj                          | j
                  _        | j"                  _        | j&                  _        xj                  | j                         z  c_        j                   d	   j                   d
   | j
                  d   j                   d   f_        y y )Nz<div class='ocr_page' id='z	' title='z'>
z</div>
z"<span class='ocr_line' title='%s'>r(  z+<div class='ocr_block' id='%d' title='%s'>
Tr   r   r|   ri  )rg  rS   r   r  r   rY   rk  r   r   rt  r%   r"   rC  r   r   r}  r  r0  r~  r&  r  rR   r  )rf   r   
child_liner   rD   s      rE   r   z,HOCRConverter.receive_layout.<locals>.render  sy     Zf%=!$'!%

{{DNN499$=? " "E5M"

:&D*-

8T^^DII=VX #' 'J:&'

;'D),

Bzz4>>$))#<=> " "E5M"

:&D&)(((,D%(,D%(,		D%(,D%(,		D%4==?0023q8)

4==?3 !--a0DIIaL@#00DMMA#00DII= OO-04		D-04D-04		D-))T]]_<) --a0 --a0 IIaL --a0	-)- *rF   r   r   s   ` @rE   rV   zHOCRConverter.receive_layout  s     6	 6	D 6	p 	vrF   c                 $    | j                          y r@   rH  r   s    rE   rI  zHOCRConverter.close  s    rF   )utf8r   NFrM  )r   r   r   __doc__r   rc  rY  r*   r-   rQ   r   r   r   r   rA   r0   rt  r   r  r  r   r  r   rV   rI  rp   rF   rE   re  re  ;  s    U  bjj9:G '+"#  	
  8$  ;d ;s ;1# 1$ 14
s t 
"29V 9 9vrF   re  )Jr   loggingr   typingr   r   r   r   r   r   r	   r
   r   r   r   pdfminer.pdfcolorr   rn   r   imager   layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   	pdfdevicer&   pdffontr'   r(   	pdfinterpr)   r*   pdfpager+   pdftypesr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   	getLoggerr   r   r8   r   r   r   r   r   rP  re  rp   rF   rE   <module>r     s(   	  	    ,   ; ;        %      %   $  ) :   K K "   g!y yx) & 
68U	3$gfo @9L' 9xoL' od	c<& cLmL' mrF   