a  zeC@sddlZddlZddlmZdgZedZedZedZedZ edZ ed Z ed Z ed Z ed Zed ejZed ZedZGdddejZdS)N)unescape HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) \s* # possibly followed by a space )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#c@seZdZdZddddZddZdd Zd d Zd Zd dZ ddZ ddZ ddZ ddZ d6ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd S)7r)ZscriptstyleT)convert_charrefscCs||_|dSN)rreset)selfrr ?/opt/bitninja-python-dojo/embedded/lib/python3.9/html/parser.py__init__VszHTMLParser.__init__cCs(d|_d|_t|_d|_tj|dS)Nz???)rawdatalasttaginteresting_normal interesting cdata_elem _markupbase ParserBaserr r r r r_s zHTMLParser.resetcCs|j||_|ddS)Nr)rgoaheadr datar r r feedgs zHTMLParser.feedcCs|ddS)N)rrr r r closepszHTMLParser.closeNcCs|jSr)_HTMLParser__starttag_textrr r r get_starttag_textvszHTMLParser.get_starttag_textcCs$||_td|jtj|_dS)Nz )lowerrrecompileIr)r elemr r r set_cdata_modezs zHTMLParser.set_cdata_modecCst|_d|_dSr)rrrrr r r clear_cdata_mode~szHTMLParser.clear_cdata_modec CsJ|j}d}t|}||kr|jrv|jsv|d|}|dkr|dt||d}|dkrptd ||spq|}n*|j ||}|r| }n|jrq|}||kr|jr|js| t |||n| ||||||}||krq|j}|d|rJt||r"||} n|d|r:||} nn|d|rR||} nV|d|rj||} n>|d |r||} n&|d |kr| d|d } nq| dkr<|sq|d |d } | dkr|d|d } | dkr|d } n| d 7} |jr*|js*| t ||| n| ||| ||| }q|d |rt||}|r|d d} || |} |d| d s| d } ||| }qnrinjZampposr5r3knamer r r rs                                    zHTMLParser.goaheadcCs|j}|||ddkr$||S|||ddkrB||S|||ddkr|d|d}|dkrvdS|||d ||d S||SdS) Nr(z)r rBrr5rDr r r r9 szHTMLParser.parse_picCsd|_||}|dkr|S|j}||||_g}t||d}|}|d|_}||kr t ||}|s~q |ddd\} } } | sd} nZ| dddkr| ddksn| dddkr| ddkrnn | dd} | rt | } | | | f|}q`||| } | dvr| \} }d |jvrz| |jd } t|j|jd }n|t|j}|||||S| d r|||n"|||||jvr|||S) Nrrr)rH'r*")r/> rT)rcheck_for_whole_start_tagrtagfind_tolerantr5r>r<rrattrfind_tolerantrappendstripZgetposcountr,r.r2endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr#)r rBendposrattrsr5rEtagmattrnamerestZ attrvaluer>linenooffsetr r r r6,sX   &            zHTMLParser.parse_starttagcCs|j}t||}|r|}|||d}|dkr>|dS|dkr~|d|rZ|dS|d|rjdS||krv|S|dS|dkrdS|dvrdS||kr|S|dStd dS) Nrr/rTr)r*r z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)rlocatestarttagend_tolerantr5r>r3AssertionError)r rBrrcrDnextr r r rV_s.   z$HTMLParser.check_for_whole_start_tagcCs|j}t||d}|sdS|}t||}|s|jdurV|||||St||d}|s|||ddkr|dS| |S| d }| d|}| ||dS| d }|jdur||jkr|||||S| |||S)Nrr*r)rHzr)r endendtagr0r> endtagfindr5rr2rWrKr<rr- handle_endtagr$)r rBrr5rLZ namematchZtagnamer"r r r r7s6       zHTMLParser.parse_endtagcCs|||||dSr)r^rnr rbrar r r r]s zHTMLParser.handle_startendtagcCsdSrr ror r r r^szHTMLParser.handle_starttagcCsdSrr )r rbr r r rnszHTMLParser.handle_endtagcCsdSrr r rFr r r r=szHTMLParser.handle_charrefcCsdSrr rpr r r r@szHTMLParser.handle_entityrefcCsdSrr rr r r r2szHTMLParser.handle_datacCsdSrr rr r r rMszHTMLParser.handle_commentcCsdSrr )r Zdeclr r r rJszHTMLParser.handle_declcCsdSrr rr r r rQszHTMLParser.handle_picCsdSrr rr r r unknown_declszHTMLParser.unknown_decl)r)__name__ __module__ __qualname__r_r rrrrrr#r$rr:rKr9r6rVr7r]r^rnr=r@r2rMrJrQrqr r r r r>s4  z  3"()rrZhtmlr__all__r rrAr?r;r4rPZ commentcloserWrXVERBOSErirlrmrrr r r r  s(