t_a~ @sddlmZmZmZddlmZddlmZmZddl Z ddl Z ddl m Z m Z ddlmZddlmZmZmZmZdd lmZdd lmZed d eDZed d eDZedd eDZeeddgBZdZejredEdkrWejddks]t e j!eddFe"ddZ#ne j!eZ#ddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3h Z$e j!d4Z%iZ&Gd5d6d6e'Z(d7d8Z)Gd9d:d:e'Z*Gd;d<d<e*Z+Gd=d>d>e,Z-Gd?d@d@e'Z.GdAdBdBe'Z/dCdDZ0dS)G)absolute_importdivisionunicode_literals) text_type) http_clienturllibN)BytesIOStringIO) webencodings)EOFspaceCharacters asciiLettersasciiUppercase)_ReparseException)_utilscCsg|]}|jdqS)ascii)encode).0itemr/builddir/build/BUILDROOT/alt-python35-pip-20.2.4-1.el7.x86_64/opt/alt/python35/lib/python3.5/site-packages/pip/_vendor/html5lib/_inputstream.py s rcCsg|]}|jdqS)r)r)rrrrrrs cCsg|]}|jdqS)r)r)rrrrrrs >t|j||krd|t|j|8}|d7}q'W||g|_dS)Nrr )_bufferedBytesAssertionErrorr#rr )r!r$offsetirrrseekFszBufferedStream.seekcCsp|js|j|S|jdt|jkr_|jdt|jdkr_|j|S|j|SdS)Nrr r)r _readStreamr r#_readFromBuffer)r!bytesrrrreadOs     zBufferedStream.readcCstdd|jDS)NcSsg|]}t|qSr)r#)rrrrrrYs z1BufferedStream._bufferedBytes..)sumr)r!rrrr'XszBufferedStream._bufferedBytescCsJ|jj|}|jj||jdd7.z3Cannot set an encoding with a unicode input, set %r) isinstancer HTTPResponserresponseaddbasefphasattrr/r TypeErrorHTMLUnicodeInputStreamHTMLBinaryInputStream)sourcekwargs isUnicode encodingsrrrHTMLInputStream}s  rOc@seZdZdZdZddZddZddZd d Zd d Z d dZ dddZ ddZ ddZ dddZddZdS)rIzProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. i(cCstjsd|_n-tddkr6|j|_n |j|_dg|_tddf|_|j ||_ |j dS)aInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) Nu􏿿r rzutf-8certain) rsupports_lone_surrogatesreportCharacterErrorsr#characterErrorsUCS4characterErrorsUCS2newLineslookupEncoding charEncoding openStream dataStreamreset)r!rKrrrr"s    zHTMLUnicodeInputStream.__init__cCsCd|_d|_d|_g|_d|_d|_d|_dS)Nr)r% chunkSize chunkOffseterrors prevNumLines prevNumCols_bufferedCharacter)r!rrrrZs      zHTMLUnicodeInputStream.resetcCs(t|dr|}n t|}|S)zvProduces a file object from source. source can be either a file object, local filename or a string. r/)rGr )r!rKrrrrrXs  z!HTMLUnicodeInputStream.openStreamcCst|j}|jdd|}|j|}|jdd|}|dkr\|j|}n||d}||fS)N rr r)r%countr_rfindr`)r!r)r%nLines positionLine lastLinePospositionColumnrrr _positions   z HTMLUnicodeInputStream._positioncCs&|j|j\}}|d|fS)z:Returns (line, col) of the current position in the stream.r )rir])r!linecolrrrr szHTMLUnicodeInputStream.positioncCsI|j|jkr"|js"tS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return EOF when EOF is reached. r )r]r\ readChunkr r%)r!r]charrrrrms    zHTMLUnicodeInputStream.charNcCsI|dkr|j}|j|j\|_|_d|_d|_d|_|jj|}|j r|j |}d|_ n |sdSt |dkrt |d }|dksd|kodknr|d |_ |dd}|j r |j ||j dd }|j d d }||_t ||_d S)Nr[rFr iiz rb Trrr)_defaultChunkSizerir\r_r`r%r]rYr/rar#ordrRreplace)r!r\r2lastvrrrrls0        (    z HTMLUnicodeInputStream.readChunkcCs:x3tttj|D]}|jjdqWdS)Nzinvalid-codepoint)ranger#invalid_unicode_refindallr^r1)r!r2_rrrrSs"z*HTMLUnicodeInputStream.characterErrorsUCS4cCsd}xtj|D]}|r%qt|j}|j}tj|||drtj|||d}|tkr|j j dd}q|dkr|dkr|t |dkr|j j dqd}|j j dqWdS)NFzinvalid-codepointTiir ) rufinditerrqgroupstartrisSurrogatePairsurrogatePairToCodepointnon_bmp_invalid_codepointsr^r1r#)r!r2skipmatch codepointr$char_valrrrrT#s    z*HTMLUnicodeInputStream.characterErrorsUCS2Fc Cseyt||f}Wntk rx&|D]}t|dks+tq+Wdjdd|D}|syd|}tjd|}t||f.z^%sz[%s]+N)charsUntilRegExKeyErrorrqr(r4recompilerr%r]r\endr1rl) r! charactersoppositecharsrregexr6mrrrrr charsUntil:s2   &    z!HTMLUnicodeInputStream.charsUntilcCsl|tk rh|jdkr=||j|_|jd7_n+|jd8_|j|j|kshtdS)Nrr )r r]r%r\r()r!rmrrrungetis  zHTMLUnicodeInputStream.unget)r;r<r=r>rpr"rZrXrir rmrlrSrTrrrrrrrIs     &  /rIc@seZdZdZddddddddZddZd d Zdd d Zd dZddZ ddZ dS)rJzProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. Nz windows-1252TcCs|j||_tj||jd|_d|_||_||_||_||_ ||_ |j ||_ |j ddk st |jdS)aInitialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) idrN)rX rawStreamrIr" numBytesMetanumBytesChardetoverride_encodingtransport_encodingsame_origin_parent_encodinglikely_encodingdefault_encodingdetermineEncodingrWr(rZ)r!rKrrrrr useChardetrrrr"s       zHTMLBinaryInputStream.__init__cCs3|jdjj|jd|_tj|dS)Nrrr)rW codec_info streamreaderrrYrIrZ)r!rrrrZs"zHTMLBinaryInputStream.resetc Cs`t|dr|}n t|}y|j|jWntk r[t|}YnX|S)zvProduces a file object from source. source can be either a file object, local filename or a string. r/)rGrr+r& Exceptionr)r!rKrrrrrXs   z HTMLBinaryInputStream.openStreamc Cs|jdf}|ddk r&|St|jdf}|ddk rO|St|jdf}|ddk rx|S|jdf}|ddk r|St|jdf}|ddk r|djjd r|St|jdf}|ddk r|S|ryddl m }Wnt k r5YnXg}|}xX|j s|j j|j}t|ts{t|sP|j||j|qHW|jt|jd}|j jd|dk r|dfSt|jdf}|ddk r |StddfS)NrPr tentativezutf-16)UniversalDetectorencodingz windows-1252) detectBOMrVrrdetectEncodingMetarname startswithr%pip._vendor.chardet.universaldetectorr ImportErrordonerr/rrBr.r(r1feedcloseresultr+r)r!chardetrWrbuffersdetectorrrrrrrsR'       z'HTMLBinaryInputStream.determineEncodingcCs|jddkstt|}|dkr5dS|jdkretd}|dk stnr||jdkr|jddf|_nF|jjd|df|_|jtd|jd|fdS) Nr rPutf-16beutf-16lezutf-8rzEncoding changed from %s to %s)rr)rWr(rVrrr+rZr)r! newEncodingrrrchangeEncodings    z$HTMLBinaryInputStream.changeEncodingc Cstjdtjdtjdtjdtjdi}|jjd}t|t sZt |j |dd}d}|s|j |}d}|s|j |dd }d }|r|jj |t |S|jj d dSdS) zAttempts to detect at BOM at the start of the stream. If an encoding can be determined from the BOM return the name of the encoding otherwise return Nonezutf-8zutf-16lezutf-16bezutf-32lezutf-32beNrxr)codecsBOM_UTF8 BOM_UTF16_LE BOM_UTF16_BE BOM_UTF32_LE BOM_UTF32_BErr/rBr.r(getr+rV)r!bomDictstringrr+rrrrs$  zHTMLBinaryInputStream.detectBOMcCs}|jj|j}t|ts*tt|}|jjd|j}|dk ry|j dkryt d}|S)z9Report the encoding declared by the meta element rNutf-16beutf-16lezutf-8)rr) rr/rrBr.r(EncodingParserr+ getEncodingrrV)r!rparserrrrrr3s   z(HTMLBinaryInputStream.detectEncodingMeta) r;r<r=r>r"rZrXrrrrrrrrrJzs (  >  "rJc@seZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ e e e Z ddZe eZeddZddZddZddZdS) EncodingByteszString-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raisedcCs+t|tsttj||jS)N)rBr.r(__new__lower)r!valuerrrrFszEncodingBytes.__new__cCs d|_dS)Nr r)ri)r!rrrrr"JszEncodingBytes.__init__cCs|S)Nr)r!rrr__iter__NszEncodingBytes.__iter__cCsS|jd}|_|t|kr/tn|dkrAt|||dS)Nr r)rir# StopIterationrH)r!prrr__next__Qs   zEncodingBytes.__next__cCs |jS)N)r)r!rrrnextYszEncodingBytes.nextcCsY|j}|t|kr$tn|dkr6t|d|_}|||dS)Nrr )rir#rrH)r!rrrrprevious]s   zEncodingBytes.previouscCs(|jt|krt||_dS)N)rir#r)r!r rrr setPositionfszEncodingBytes.setPositioncCs9|jt|krt|jdkr1|jSdSdS)Nr)rir#r)r!rrr getPositionks zEncodingBytes.getPositioncCs||j|jdS)Nr )r )r!rrrgetCurrentByteuszEncodingBytes.getCurrentBytecCsf|j}xM|t|krX|||d}||krK||_|S|d7}q W||_dS)zSkip past a list of charactersr N)r r#ri)r!rrrrrrrzs    zEncodingBytes.skipcCsf|j}xM|t|krX|||d}||krK||_|S|d7}q W||_dS)Nr )r r#ri)r!rrrrrr skipUntils    zEncodingBytes.skipUntilcCs4|j||j}|r0|jt|7_|S)zLook for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone)rr r#)r!r.r6rrr matchBytesszEncodingBytes.matchBytesc CsIy*|j||jt|d|_Wntk rDtYnXdS)zLook for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the matchr T)indexr r#ri ValueErrorr)r!r.rrrjumpTos *  zEncodingBytes.jumpToN)r;r<r=r>rr"rrrrrrpropertyr r currentBytespaceCharactersBytesrrrrrrrrrBs           rc@seZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS)rz?Mini parser for detecting character encoding from meta elementscCst||_d|_dS)z3string - the data to work on for encoding detectionN)rr2r)r!r2rrrr"szEncodingParser.__init__cCsd|jkrdSd|jfd|jfd|jfd|jfd|jfd|jff}x|jD]}d}y|jjdWntk rPYnXxP|D]H\}}|jj|ry|}PWqtk rd}PYqXqW|skPqkW|j S) Ns)r2r)r!rrrrszEncodingParser.handleCommentcCs<|jjtkrdSd}d}x|j}|dkrAdS|ddkr|ddk}|r4|dk r4||_dSq%|ddkr|d}t|}|dk r4||_dSq%|ddkr%tt|d}|j}|dk r%t|}|dk r%|r.||_dS|}q%WdS) NTFrs http-equivr s content-typescharsetscontent) r2rr getAttributerrVContentAttrParserrparse)r! hasPragmapendingEncodingattrtentativeEncodingcodec contentParserrrrrs:            zEncodingParser.handleMetacCs |jdS)NF)handlePossibleTag)r!rrrrsz%EncodingParser.handlePossibleStartTagcCst|j|jdS)NT)rr2r)r!rrrrs z#EncodingParser.handlePossibleEndTagcCs|j}|jtkr6|r2|j|jdS|jt}|dkr^|jn+|j}x|dk r|j}qmWdS)NTr)r2rasciiLettersBytesrrrspacesAngleBracketsr)r!endTagr2rrrrrrs      z EncodingParser.handlePossibleTagcCs|jjdS)Nr)r2r)r!rrrrszEncodingParser.handleOthercCs|j}|jttdgB}|dksIt|dksIt|d krYdSg}g}x|dkr~|r~Pnz|tkr|j}Pn^|d krdj|dfS|tkr|j|j n|dkrdS|j|t |}qhW|dkr1|j dj|dfSt ||j}|d kr|}xt |}||krt |dj|dj|fS|tkr|j|j q\|j|q\Wn^|dkrdj|dfS|tkr|j|j n|dkr!dS|j|xwt |}|t kredj|dj|fS|tkr|j|j q1|dkrdS|j|q1WdS) z_Return a name,value pair for the next attribute in the stream, if one is found, or None/Nr r=r3'")rN)rr)rr) r2rr frozensetr#r(r4asciiUppercaseBytesr1rrrr)r!r2rattrName attrValue quoteCharrrrrsh $                        zEncodingParser.getAttributeN) r;r<r=r>r"rrrrrrrrrrrrrs     $    rc@s(eZdZddZddZdS)rcCs"t|tst||_dS)N)rBr.r(r2)r!r2rrrr"aszContentAttrParser.__init__cCsNy1|jjd|jjd7_|jj|jjdksHdS|jjd7_|jj|jjdkr|jj}|jjd7_|jj}|jj|r|j||jjSdSn]|jj}y+|jjt|j||jjSWn#tk r/|j|dSYnXWntk rIdSYnXdS)Nscharsetr rrr)rr)r2rr rrrrr)r! quoteMark oldPositionrrrres.       zContentAttrParser.parseN)r;r<r=r"rrrrrr`s  rcCs|t|tr;y|jd}Wntk r:dSYnX|dk rtytj|SWqxtk rpdSYqxXndSdS)z{Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.rN)rBr.decodeUnicodeDecodeErrorr lookupAttributeError)rrrrrVs     rVrr)1 __future__rrrZpip._vendor.sixrpip._vendor.six.movesrrrriorr pip._vendorr constantsr r rrrr[rrrrrrinvalid_unicode_no_surrogaterQrcr(revalrur~ascii_punctuation_rerobjectrrOrIrJr.rrrrVrrrrsJ  " +  J b'