U
    '1eG                  
   @   s  d Z dZzBdZddlZdd ZejddddedddZdddi fdd	ZW n ek
rb   dZY nX G d
d dZ	e	 Z	dZ
dZdZdZeeZdZdddgZdd ZddefddZereZdd ZdZdd ZddefddZdd Zd+ddZd,d d!Zed"kredd# ddlZddlZdd$lmZ ddlZe Z dZ!ej"dd D ]fZ#ej$%e#sje&d% nHe'e#Z(e() Z*W 5 Q R X e&d&e#d'ee*  ee*dd# e!d7 Z!qNe!re&d(e e  d)d* dS )-at  Very simple and fast XML parser, used for intra-paragraph text.

Devised by Aaron Watters in the bad old days before Python had fast
parsers available.  Constructs the lightest possible in-memory
representation; parses most files we have seen in pure python very
quickly.

The output structure is the same as the one produced by pyRXP,
our validating C-based parser, which was written later.  It will
use pyRXP if available.

This is used to parse intra-paragraph markup.

Example parse::

    <this type="xml">text <b>in</b> xml</this>

    ( "this",
      {"type": "xml"},
      [ "text ",
        ("b", None, ["in"], None),
        " xml"
        ]
       None )

    { 0: "this"
      "type": "xml"
      1: ["text ",
          {0: "b", 1:["in"]},
          " xml"]
    }

Ie, xml tag translates to a tuple:
 (name, dictofattributes, contentlist, miscellaneousinfo)

where miscellaneousinfo can be anything, (but defaults to None)
(with the intention of adding, eg, line number information)

special cases: name of "" means "top level, no containing tag".
Top level parse always looks like this::

    ("", list, None, None)

 contained text of None means <simple_tag/>

In order to support stuff like::

    <this></this><one></one>

AT THE MOMENT &amp; ETCETERA ARE IGNORED. THEY MUST BE PROCESSED
IN A POST-PROCESSING STEP.

PROLOGUES ARE NOT UNDERSTOOD.  OTHER STUFF IS PROBABLY MISSING.
    Nc                 C   s   t |  d S )N)print)s r   ;/tmp/pip-unpacked-wheel-109iniqw/reportlab/lib/rparsexml.pywarnCB>   s    r      zstring input)ZErrorOnValidityErrorsZNoNoDTDWarningZExpandCharacterEntitiesZExpandGeneralEntitiesr   ZsrcNameZ
ReturnUTF8c                 C   s*   |t _t j| f|}|r|p(dd |gd fS )N )pyRXP_parsereoCBparse)ZxmlTextoneOutermostTagr
   entityReplacerZ	parseOptspr   r   r   parsexmlI   s    r   c                   @   s   e Zd Zedd ZdS )smartDecodec                    s0   t d dd l  fdd}t|tj_|| S )Ninitialr   c                    s.   t | tr| S  | }td | |d S )Nfinalencoding)
isinstancestrdetectr   decode)r   Zcddchardetr   r   __call__U   s
    
 
z&smartDecode.__call__.<locals>.__call__)r   r   staticmethodr   	__class__r   )r   r   r   r   r   r   Q   s
    zsmartDecode.__call__N)__name__
__module____qualname__r   r   r   r   r   r   r   P   s   r   r   	<![CDATA[z]]>)z&lt;<)z&gt;>)z&amp;&c                 C   s>   g }| D ]0}d|kr.t D ]\}}|||}q|| q|S )Nr#   )replacelistreplaceappend)ZcontentListresulteoldnewr   r   r   unEscapeContentListf   s    r+   c                 C   s4   t rtdt| |d\}}|r,|d d S |S dS )z.official interface: discard unused cursor infoz)pyRXP not found, fallback parser disabled)r      r   N)RequirePyRXPImportError	parsexml0)xmltextr   r
   r   r'   cursorr   r   r   parsexmlSimpleo   s    r2   c                 C   s   t | d }t|S )Nr)openreadr   )filenamerawr   r   r   	parseFile|   s    r8   c           	      C   s   d}d}|dkr|  d|}|dk r&q|d }d}|D ]L}t|}| |||  |kr6d}|  d|}|dk rztd| |d }q6|dkrd}q|S )zBskip any prologue found after cursor, return index of rest of text)z!DOCTYPEz?xmlz!--Nr!   r   r   r"   zcan't close prologue %r)findlen
ValueError)	textr1   Zprologue_elementsdoneZopenbracketZpastfoundr(   ler   r   r   skip_prologue   s&     
r@   c           1      C   s  t | } t}d } }}|dk	r(|  } |}| d|}	| |	d |	d  }
d}|	dk r|dk	r| |d g}|rz||}||||ft| fS tdt| ||d   g }|dk	rt }}t| |}n|	dk rtdt| dd	  n|
d
krp| |	|	d  dkrp|	d }| t|}|dk rJtdt| ||d   t	}| || g}|tt }d}nz|
dkr| |	|	d  dkr| d|	d }||	k rtdt| ||d   |d }| | dkrtdt| ||d   d|d fS | d|	}|dk }|d }|	d }| || }d|krp|d dkr\|dd }d}| }|}|}nzd|krd}|st|d 
dd rd}|dkr| d|}|d }|dk }| || }|st|d 
dd rd}q|rtdt| |	|	d   |}| |d  dkrH|d }|dd }d}| }|
d}|d }|
 }|d }|}|d }|d d |d< i  }}d}t|}||k r|| }|d }| }|d dkrtdt| d|dd kr.||krtdt| || } |d }d|| f }q| }|
 }!|!d }"|dt|"  }#|# }#z|#d }$|#d }%W n"   td t|#||!f Y nX |$|%  krdksn |$|%  krd!krn n|#dd }#|#||< |"}q|dk	r|}|dk	r| d|}&|&|k rp|tkrLd}| |d }'t| }|'rl||' n td"|t| ||d  f q| |&d  dkrj| d|&}(|(|&k rtd#t| |&|&d   | |&d |( })|)
 }*|*d }+||+kr@| d| },t|,
d$}-| d| },t|,
d$}.td%|.|-t|t|+t| ||d&  f | ||& }'|'r\||' |(d }d}n@| ||& }'|'r||' t| |&d|d'\}/}|/r||/ q|r|r||}||||f}0|0|fS )(zsimple recursive descent xml parser...
       return (dictionary, endcharacter)
       special case: comment returns (None, endcharacter)Nr!   r      r   zno tags at non-toplevel %s   z.non top level entry should be at start tag: %s
   z![	   r    zunclosed CDATA %sz!-   z<!--z--zunterminated comment %sr,   r"   z*invalid comment: contains double dashes %s=/".zunclosed start tag %sz fz-attribute value must start with double quoteszunclosed value %s=%szattvalue,attentry,attlist='z&no close bracket for %s found after %szunclosed close tag %s
z8at lines %s...%s close tag name doesn't match %s...%s %sd   )
startingattoplevelr   )r   NONAMEstripr9   r:   r;   reprr@   CDATAENDMARKERCDATAMARKERsplitr&   r/   )1r0   rO   rP   r   Z
NameStringZContentListZAttDictZ
ExtraStuffr1   ZfirstbracketZafterbracket2charZ
docontentsLnameZ
startcdataZendcdataZendcommentdashesZ
endcommentZclosebracketZnocloseZstartsearchZpastfirstbracketZ
tagcontentstopZtaglistZtaglist0Ztaglist0listattributenameDZtaglistindexZlasttaglistindexZattentryZnextattentryattlistZnextattnameZattvaluefirstlastZnextopenbracket	remainderZnextclosebracketZclosetagcontentsZclosetaglistZ	closenameprefixZ
endlinenumlinenumZ	parsetreetr   r   r   r/      s2   	  
 






 6



$

    

 r/   c                 C   s   t | ttfr| S | \}}}}|s&i }g }| D ]"}|| }|d|t|f  q2d|}|sp|rptd|dk	rtt	t
|}	d|	}
|s|
S |
d}dd| }
d|||
|f S d	||f S )
z!pretty printer mainly for testingrK    zname missing with attributes???NrM   z   z
   z<%s %s>
%s
</%s>z<%s %s/>)r   r   byteskeysr&   rS   joinr;   listmappprettyprintrV   )Z	parsedxmlrX   ZattdictZtextlistextrar\   kv
attributesZtextlistpprintZ
textpprintZnllistr   r   r   ri     s*     


ri   c                 C   st   ddl m } ddlm} | }t  t| dd}td| |  |d@ rP|| |d@ rptd t|}t| d S )	Nr   time)pprintr   )r   ZDONErE   z============== reformatting)ro   rp   
breakpointr2   r   ri   )r   dumpro   rp   nowr[   r   r   r   r   	testparse  s    rt   c                 C   s   t d| d d S )Na-  <this type="xml">text &lt;&gt;<b>in</b> <funnytag foo="bar"/> xml</this>
                 <!-- comment -->
                 <![CDATA[
                 <this type="xml">text <b>in</b> xml</this> ]]>
                 <tag with="<brackets in values>">just testing brackets feature</tag>
                 rr   )rt   ru   r   r   r   test  s    rv   __main__ru   rn   z!!!!! no file at {f!r}zparsing z |t|=z	timed at z.2fz secs.)r   )r   )+__doc__r-   ZsimpleparseZpyRXPUr   Parserr	   r   r.   r   rQ   ZNAMEKEYZCONTENTSKEYrU   r:   ZLENCDATAMARKERrT   r$   r+   r2   r8   verboser@   r/   ri   rt   rv   r   sysosro   Z	reportlabrs   seenargvfpathisfiler   r4   _fr5   rb   r   r   r   r   <module>   sr   7	

	

 i





