a  ze,@sddlmZmZmZddlmZddlmZm Z ddl m Z ddl m Z ddl mZddl mZmZdd l mZmZmZdd l mZmZdd l mZdd lmZdd lmZeeZe dkreZne ZGdddeZdS))absolute_importdivisionunicode_literals)unichr)deque OrderedDict) version_info)spaceCharacters)entities) asciiLettersasciiUpper2Lower)digits hexDigitsEOF) tokenTypes tagTokenTypes)replacementCharacters)HTMLInputStream)Trie)csdeZdZdZdfdd ZddZddZdd d Zd d ZddZ ddZ ddZ ddZ ddZ ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Zdd?Z!d@dAZ"dBdCZ#dDdEZ$dFdGZ%dHdIZ&dJdKZ'dLdMZ(dNdOZ)dPdQZ*dRdSZ+dTdUZ,dVdWZ-dXdYZ.dZd[Z/d\d]Z0d^d_Z1d`daZ2dbdcZ3dddeZ4dfdgZ5dhdiZ6djdkZ7dldmZ8dndoZ9dpdqZ:drdsZ;dtduZdzd{Z?d|d}Z@d~dZAddZBddZCddZDddZEddZFddZGddZHddZIddZJddZKddZLZMS) HTMLTokenizera  This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object. Nc sJt|fi||_||_d|_g|_|j|_d|_d|_t t | dS)NF) rstreamparser escapeFlag lastFourChars dataStatestateescape currentTokensuperr__init__)selfrrkwargs __class__a/opt/bitninja-python-dojo/embedded/lib/python3.9/site-packages/pip/_vendor/html5lib/_tokenizer.pyr"(szHTMLTokenizer.__init__ccsPtg|_|rL|jjr6td|jjddVq|jr |jVq6q dS)z This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested. ParseErrorrtypedataN)r tokenQueuerrerrorsrpoppopleftr#r'r'r(__iter__7s  zHTMLTokenizer.__iter__c Cst}d}|rt}d}g}|j}||vrH|turH|||j}q"td||}|tvrt|}|j t ddd|idnd|krd ksn|d krd }|j t ddd|idnd |krd ks>nd|krdks>nd|krdks>nd|kr*dks>n|t gdvrZ|j t ddd|idz t |}Wn<t y|d}t d|d?Bt d|d@B}Yn0|dkr|j t ddd|j||S)zThis function returns either U+FFFD or the character based on the decimal or hexadecimal representation. It also discards ";" if present. If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. r)z$illegal-codepoint-for-numeric-entity charAsIntr+r,datavarsii�r ii)# iiiiiiiiiiiiiiiiiii i i i i i i i i i iiiiir9iii;z numeric-entity-without-semicolonr*)rrrcharrappendintjoinrr-r frozensetchr ValueErrorunget) r#isHexallowedradix charStackcr6rBvr'r'r(consumeNumberEntityGsn              &   z!HTMLTokenizer.consumeNumberEntityFc Csd}|jg}|dtvsB|dtddfvsB|durV||dkrV|j|dn|ddkr d}||j|ddvrd}||j|r|dtvs|s|dtvr|j|d||}n4|j t d d d |j| dd |}nf|dturDt d |s0qD||jq z$t d |dd}t|}Wntyd}Yn0|dur>|dd kr|j t d dd |dd kr|r||tvs||tvs||dkr|j| dd |}n.t|}|j| |d ||d7}n4|j t d dd |j| dd |}|r|jddd|7<n*|tvrd}nd}|j t ||d dS)N&r<#F)xXTr)zexpected-numeric-entityr*r5rAznamed-entity-without-semicolon=zexpected-named-entityr,r SpaceCharacters Characters)rrBr rrIrCrrrPr-rr/rE entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr r r ) r# allowedChar fromAttributeoutputrMhex entityName entityLength tokenTyper'r'r( consumeEntitys~               zHTMLTokenizer.consumeEntitycCs|j|dddS)zIThis method replaces the need for "entityInAttributeValueState". T)r_r`N)rf)r#r_r'r'r(processEntityInAttributesz&HTMLTokenizer.processEntityInAttributecCs|j}|dtvr|dt|d<|dtdkrp|d}t|}t|t|krh||ddd||d<|dtdkr|dr|j tdd d |d r|j tdd d |j ||j |_ dS) zThis method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted. r+nameStartTagr,NrTEndTagr)zattributes-in-end-tagr* selfClosingzself-closing-flag-on-end-tag) r r translater r attributeMapr]updater-rCrr)r#tokenrawr,r'r'r(emitCurrentTokens(    zHTMLTokenizer.emitCurrentTokencCs|j}|dkr|j|_n|dkr.|j|_n|dkrd|jtddd|jtdddn`|turpdS|t vr|jtd ||j t d dn&|j d }|jtd||dd S) NrQrRr)invalid-codepointr*rYFrXTrQrRrr) rrBentityDataStater tagOpenStater-rCrrr charsUntilr#r,charsr'r'r(rs.          zHTMLTokenizer.dataStatecCs||j|_dSNT)rfrrr1r'r'r(ruszHTMLTokenizer.entityDataStatecCs|j}|dkr|j|_n|dkr.|j|_n|tkr:dS|dkrp|jtddd|jtdd dnT|t vr|jtd ||j t d dn&|j d }|jtd||dd S) NrQrRFrrr)rsr*rYr:rXTrt) rrBcharacterReferenceInRcdatarrcdataLessThanSignStaterr-rCrr rwrxr'r'r( rcdataState"s.          zHTMLTokenizer.rcdataStatecCs||j|_dSrz)rfr}rr1r'r'r(r{?sz(HTMLTokenizer.characterReferenceInRcdatacCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd }|jtd||dd S NrRrrr)rsr*rYr:F)rRrrT) rrBrawtextLessThanSignStaterr-rCrrrwrxr'r'r( rawtextStateDs"       zHTMLTokenizer.rawtextStatecCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd }|jtd||dd Sr~) rrBscriptDataLessThanSignStaterr-rCrrrwrxr'r'r(scriptDataStateVs"       zHTMLTokenizer.scriptDataStatecCsr|j}|tkrdS|dkrL|jtddd|jtdddn"|jtd||jdddS) NFrrr)rsr*rYr:T)rrBrr-rCrrwr#r,r'r'r(plaintextStatehs     zHTMLTokenizer.plaintextStatecCs |j}|dkr|j|_n|dkr.|j|_n|tvrVtd|gddd|_|j|_n|dkr|j tddd |j td d d |j |_nt|d kr|j tdd d |j ||j |_n@|j tddd |j td dd |j ||j |_dS)N!/riF)r+rhr,rkselfClosingAcknowledged>r)z'expected-tag-name-but-got-right-bracketr*rYz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerRT)rrBmarkupDeclarationOpenStatercloseTagOpenStater rr tagNameStater-rCrrIbogusCommentStaterr'r'r(rvws>           zHTMLTokenizer.tagOpenStatecCs|j}|tvr0td|gdd|_|j|_n|dkrX|jtddd|j |_nn|t ur|jtddd|jtd d d|j |_n0|jtdd d |id |j ||j |_dS)NrjFr+rhr,rkrr)z*expected-closing-tag-but-got-right-bracketr*z expected-closing-tag-but-got-eofrY|tkr|jtdd d|j |_n|jtd|dd S NrrYr*rRrrr)rsr:eof-in-script-in-scriptT) rrBr-rCr scriptDataDoubleEscapedDashStater(scriptDataDoubleEscapedLessThanSignStaterrrr'r'r(rs*        z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs|j}|dkr2|jtddd|j|_n|dkrZ|jtddd|j|_n|dkr|jtddd|jtddd|j|_nF|t kr|jtdd d|j |_n|jtd|d|j|_d Sr) rrBr-rCr$scriptDataDoubleEscapedDashDashStaterrrrrrr'r'r(rs.        z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|j}|dkr*|jtdddn|dkrR|jtddd|j|_n|dkrz|jtddd|j|_n|dkr|jtddd|jtdd d|j|_nF|t kr|jtdd d|j |_n|jtd|d|j|_d S) NrrYr*rRrrrr)rsr:rT) rrBr-rCrrrrrrrrr'r'r(r%s2        z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|j}|dkr8|jtdddd|_|j|_n|j||j |_dS)NrrYr*r5T) rrBr-rCrrscriptDataDoubleEscapeEndStaterrIrrr'r'r(r>s   z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|j}|ttdBvrR|jtd|d|jdkrH|j |_ q|j |_ nB|t vr|jtd|d|j|7_n|j ||j |_ dSr)rrBr rFr-rCrrrrrrr rIrr'r'r(rIs    z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|j}|tvr$|jtdn|tvrJ|jd|dg|j|_n|dkr\| n|dkrn|j |_n|dvr|j t ddd |jd|dg|j|_n|d kr|j t dd d |jdd dg|j|_nF|t ur|j t dd d |j|_n|jd|dg|j|_dS)NTr,r5rr)'"rWrRr)#invalid-character-in-attribute-namer*rrrsr:z#expected-attribute-name-but-got-eof)rrBr rwr r rCattributeNameStaterrqrr-rrrrr'r'r(rYs<           z&HTMLTokenizer.beforeAttributeNameStatecCs|j}d}d}|dkr&|j|_n.|tvr\|jddd||jtd7<d}n|dkrjd}n|tvr||j|_n|dkr|j |_n|d kr|j t d d d |jdddd 7<d}n|dvr |j t d dd |jddd|7<d}nH|t ur6|j t d dd |j|_n|jddd|7<d}|r|jdddt|jddd<|jdddD]>\}}|jddd|kr|j t d dd qҐq|r|dS)NTFrWr,rTrrrrrr)rsr*r:rrrRrzeof-in-attribute-namezduplicate-attribute)rrBbeforeAttributeValueStaterr r rwr afterAttributeNameStaterr-rCrrrrlr rq)r#r,leavingThisState emitTokenrh_r'r'r(rws^             z HTMLTokenizer.attributeNameStatecCsD|j}|tvr$|jtdn|dkr8|j|_n|dkrJ|n|tvrp|jd |dg|j |_n|dkr|j |_n|dkr|j t dd d |jd d dg|j |_n|d vr|j t dd d |jd |dg|j |_nF|tur$|j t ddd |j|_n|jd |dg|j |_dS)NTrWrr,r5rrrr)rsr*r:rz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rrBr rwrrrqr r rCrrr-rrrrr'r'r(rs@            z%HTMLTokenizer.afterAttributeNameStatecCsh|j}|tvr$|jtdn@|dkr8|j|_n,|dkrX|j|_|j|n |dkrj|j|_n|dkr|j t ddd| n|d kr|j t dd d|j d d d d7<|j|_n|dvr|j t ddd|j d d d |7<|j|_nL|turB|j t ddd|j|_n"|j d d d |7<|j|_dS)NTrrQrrr)z.expected-attribute-value-but-got-right-bracketr*rrrsr,rTr r:)rWrR`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)rrBr rwattributeValueDoubleQuotedStaterattributeValueUnQuotedStaterIattributeValueSingleQuotedStater-rCrrqr rrrr'r'r(rsF             z'HTMLTokenizer.beforeAttributeValueStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd d 7<nN|t ur|jtdd d|j |_n&|jddd ||j d 7<d S)NrrQrrr)rsr*r,rTr r:z#eof-in-attribute-value-double-quote)rrQrrT rrBafterAttributeValueStaterrgr-rCrr rrrwrr'r'r(rs&       z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd d 7<nN|t ur|jtdd d|j |_n&|jddd ||j d 7<d S)NrrQrrr)rsr*r,rTr r:z#eof-in-attribute-value-single-quote)rrQrrTrrr'r'r(rs&       z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|j}|tvr|j|_n|dkr0|dn|dkrB|n|dvr||jt ddd|j ddd |7<n|d kr|jt dd d|j ddd d 7<nV|t ur|jt dd d|j |_n.|j ddd ||j tdtB7<dS)NrQr)rrrWrRrr)z0unexpected-character-in-unquoted-attribute-valuer*r,rTr rrrsr:z eof-in-attribute-value-no-quotes)rQrrrrWrRrrrT)rrBr rrrgrqr-rCrr rrrwrFrr'r'r(rs4         z)HTMLTokenizer.attributeValueUnQuotedStatecCs|j}|tvr|j|_n|dkr.|np|dkr@|j|_n^|turt|j t ddd|j ||j |_n*|j t ddd|j ||j|_dS)Nrrr)z$unexpected-EOF-after-attribute-valuer*z*unexpected-character-after-attribute-valueT) rrBr rrrqrrr-rCrrIrrr'r'r(r.s&         z&HTMLTokenizer.afterAttributeValueStatecCs|j}|dkr&d|jd<|n^|turZ|jtddd|j||j |_ n*|jtddd|j||j |_ dS)NrTrkr)z#unexpected-EOF-after-solidus-in-tagr*z)unexpected-character-after-solidus-in-tag) rrBr rqrr-rCrrIrrrrr'r'r(rBs         z&HTMLTokenizer.selfClosingStartTagStatecCsD|jd}|dd}|jtd|d|j|j|_dS)Nrrrr:Commentr*T) rrwreplacer-rCrrBrrrr'r'r(rTs    zHTMLTokenizer.bogusCommentStatecCs|jg}|ddkrR||j|ddkrPtddd|_|j|_dSn|ddvrd}dD](}||j|d|vrfd }qqf|rtd ddddd |_|j|_dSn|dd krD|jdurD|jj j rD|jj j dj |jj j krDd}d D].}||j|d|krd }q2q|rD|j |_dS|jtddd|rt|j|qZ|j|_dS)NrTrrr5r*T)dD))oOrNCtTyYpPeEFDoctype)r+rhpublicIdsystemIdcorrect[)rrArrrr)zexpected-dashes-or-doctype)rrBrCrr commentStartStater doctypeStatertree openElements namespacedefaultNamespacecdataSectionStater-rIr/r)r#rMmatchedexpectedr'r'r(rcsZ       z(HTMLTokenizer.markupDeclarationOpenStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd d|j|j|j|_nP|t ur|jtdd d|j|j|j|_n|jd|7<|j |_d S) Nrrrr)rsr*r,r:rincorrect-commenteof-in-commentT) rrBcommentStartDashStaterr-rCrr rr commentStaterr'r'r(rs.       zHTMLTokenizer.commentStartStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd d|j|j|j|_nT|t ur|jtdd d|j|j|j|_n|jdd|7<|j |_d S) Nrrrr)rsr*r,-�rrrT) rrBcommentEndStaterr-rCrr rrrrr'r'r(rs.       z#HTMLTokenizer.commentStartDashStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<nT|tur|jtddd|j|j|j |_n|jd||j d 7<d S) Nrrrr)rsr*r,r:r)rrrT) rrBcommentEndDashStaterr-rCrr rrrwrr'r'r(rs$       zHTMLTokenizer.commentStatecCs|j}|dkr|j|_n|dkrV|jtddd|jdd7<|j|_nT|t ur|jtddd|j|j|j |_n|jdd|7<|j|_d S) Nrrrr)rsr*r,rzeof-in-comment-end-dashT) rrBrrr-rCrr rrrrr'r'r(rs$      z!HTMLTokenizer.commentEndDashStatecCs,|j}|dkr*|j|j|j|_n|dkrd|jtddd|jdd7<|j|_n|dkr|jtdd d|j |_n|d kr|jtdd d|jd|7<nj|t ur|jtdd d|j|j|j|_n4|jtdd d|jdd|7<|j|_dS)Nrrrr)rsr*r,u--�rz,unexpected-bang-after-double-dash-in-commentrz,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T) rrBr-rCr rrrrcommentEndBangStaterrr'r'r(rs@          zHTMLTokenizer.commentEndStatecCs|j}|dkr*|j|j|j|_n|dkrN|jdd7<|j|_n|dkr|jtddd|jdd 7<|j |_nT|t ur|jtdd d|j|j|j|_n|jdd|7<|j |_d S) Nrrr,z--!rrr)rsr*u--!�zeof-in-comment-end-bang-stateT) rrBr-rCr rrrrrrrr'r'r(rs,       z!HTMLTokenizer.commentEndBangStatecCs|j}|tvr|j|_nj|tur\|jtdddd|j d<|j|j |j |_n*|jtddd|j ||j|_dS)Nr)!expected-doctype-name-but-got-eofr*Frzneed-space-after-doctypeT) rrBr beforeDoctypeNameStaterrr-rCrr rrIrr'r'r(rs        zHTMLTokenizer.doctypeStatecCs|j}|tvrn|dkrT|jtdddd|jd<|j|j|j|_n|dkr|jtdddd |jd <|j |_nR|t ur|jtdd dd|jd<|j|j|j|_n||jd <|j |_d S) Nrr)z+expected-doctype-name-but-got-right-bracketr*Frrrrsr:rhrT) rrBr r-rCrr rrdoctypeNameStaterrr'r'r(r*s4           z$HTMLTokenizer.beforeDoctypeNameStatecCs|j}|tvr2|jdt|jd<|j|_n|dkrh|jdt|jd<|j |j|j |_n|dkr|j t ddd|jdd7<|j |_nh|t ur|j t dddd |jd <|jdt|jd<|j |j|j |_n|jd|7<d S) Nrhrrrr)rsr*r:zeof-in-doctype-nameFrT)rrBr r rlr afterDoctypeNameStaterr-rCrrrrrr'r'r(rDs0        zHTMLTokenizer.doctypeNameStatecCsH|j}|tvrn.|dkr8|j|j|j|_n |turd|jd<|j ||jt ddd|j|j|j|_n|dvrd}d D]}|j}||vrd}qq|r|j |_dSnD|d vr d}d D]}|j}||vrd}qq|r |j |_dS|j ||jt dd d |idd|jd<|j |_dS)NrFrr)eof-in-doctyper*rT))uU)bB)lL)iIrsS)rrrr)mMz*expected-space-or-right-bracket-in-doctyper,r7)rrBr r-rCr rrrrIrafterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r#r,rrr'r'r(r]sT            z#HTMLTokenizer.afterDoctypeNameStatecCs|j}|tvr|j|_n|dvrP|jtddd|j||j|_nT|t ur|jtdddd|j d<|j|j |j |_n|j||j|_dS N)rrr)unexpected-char-in-doctyper*rFrT) rrBr "beforeDoctypePublicIdentifierStaterr-rCrrIrr rrr'r'r(rs&         z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs|j}|tvrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt dddd |jd <|j|j|j |_nh|t ur|jt dd dd |jd <|j|j|j |_n(|jt dd dd |jd <|j |_d S)Nrr5rrrr)unexpected-end-of-doctyper*FrrrT) rrBr r (doctypePublicIdentifierDoubleQuotedStater(doctypePublicIdentifierSingleQuotedStater-rCrrrrrr'r'r(rs:             z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrT rrB!afterDoctypePublicIdentifierStaterr-rCrr rrrr'r'r(rs0         z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrTr rr'r'r(rs0         z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs |j}|tvr|j|_n|dkr<|j|j|j|_n|dkrn|jt dddd|jd<|j |_n|dkr|jt dddd|jd<|j |_nh|t ur|jt dd dd |jd <|j|j|j|_n(|jt dddd |jd <|j |_d S) Nrrr)rr*r5rrrFrT)rrBr -betweenDoctypePublicAndSystemIdentifiersStaterr-rCr rr(doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStaterrrr'r'r(r s>              z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs|j}|tvrn|dkr4|j|j|j|_n|dkrPd|jd<|j|_n|dkrld|jd<|j |_nh|t kr|jt dddd |jd <|j|j|j|_n(|jt dd dd |jd <|j |_d S) Nrrr5rrr)rr*FrrT) rrBr r-rCr rrr r rrrrr'r'r(r s2           z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|j}|tvr|j|_n|dvrP|jtddd|j||j|_nT|t ur|jtdddd|j d<|j|j |j |_n|j||j|_dSr) rrBr "beforeDoctypeSystemIdentifierStaterr-rCrrIrr rrr'r'r(r)s&         z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs|j}|tvrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt dddd |jd <|j|j|j |_nh|t ur|jt dd dd |jd <|j|j|j |_n(|jt dddd |jd <|j |_d S) Nrr5rrrr)rr*FrrT) rrBr r r rr r-rCrrrrrr'r'r(r=s:             z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrT rrB!afterDoctypeSystemIdentifierStaterr-rCrr rrrr'r'r(r Zs0         z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrTrrr'r'r(r rs0         z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|j}|tvrn~|dkr4|j|j|j|_n^|turt|jt dddd|jd<|j|j|j|_n|jt ddd|j |_dS) Nrr)rr*FrrT) rrBr r-rCr rrrrrrr'r'r(rs$      z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|j}|dkr*|j|j|j|_n,|turV|j||j|j|j|_ndS)NrT) rrBr-rCr rrrrIrr'r'r(rs    zHTMLTokenizer.bogusDoctypeStatecCsg}||jd||jd|j}|tkr>qq|dksJJ|ddddkrv|ddd|d<qq||qd|}|d}|dkrt|D]}|jt d d d q| dd }|r|jt d |d |j |_ dS)N]rrTz]]r5rrrr)rsr*r:rYT) rCrrwrBrrEcountranger-rrrr)r#r,rB nullCountrr'r'r(rs2          zHTMLTokenizer.cdataSectionState)N)NF)N__name__ __module__ __qualname____doc__r"r2rPrfrgrqrrur}r{rrrrvrrr|rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr r rrr r rrr __classcell__r'r'r%r(rs H P#         6 "-3rN) __future__rrrZpip._vendor.sixrrG collectionsrrsysr constantsr r r r rrrrrr _inputstreamr_trierrZdictrmobjectrr'r'r'r(s