B 'Y'@sTdZdddgZddlZddlmZmZy0ddlmZmZm Z m Z m Z m Z m Z e e fZWn8ek rddlmZmZm Z m Z m Z m Z e ZYnXdd dZdd dZdd dZd d ZedejjZGdddZddZddZyddlmZWn"ek rddlmZYnXedjZye Wne!k rFe"Z YnXddZ#dS)z5External interface to the BeautifulSoup HTML parser. fromstringparse convert_treeN)etreehtml) BeautifulSoupTagCommentProcessingInstructionNavigableString DeclarationDoctype)rrr r r r cKst|||f|S)aParse a string of HTML data into an Element tree using the BeautifulSoup parser. Returns the root ```` Element of the tree. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. )_parse)data beautifulsoup makeelementbsargsrG/opt/alt/python37/lib64/python3.7/site-packages/lxml/html/soupparser.pyrs cKs,t|dst|}t|||f|}t|S)aYParse a file into an ElemenTree using the BeautifulSoup parser. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. read)hasattropenrrZ ElementTree)filerrrrootrrrr$s cCs.t||}|}x|D]}||qW|S)aConvert a BeautifulSoup tree to a list of Element trees. Returns a list instead of a single root Element to support HTML-like soup with more than one root element. You can pass a different Element factory through the `makeelement` keyword. ) _convert_treeZ getchildrenremove)beautiful_soup_treerrZchildrenchildrrrr3s  cKs|dkr t}t|dr&d|kr&d|d<t|dr@d|kr@d|d<||f|}t||}t|dkrx|djdkrx|dSd|_|S) NZ HTML_ENTITIESZconvertEntitiesrZDEFAULT_BUILDER_FEATURESZfeaturesz html.parserr)rrrlentag)sourcerrrZtreerrrrrEs    rz`(?:\s|[|\}}| j}|o|dd|_|o|dd|_| S)Nrr)rZ html_parserr enumerate isinstancerr#lower_DECLARATION_OR_DOCTYPEr%indexr"_init_node_convertersreversedZ addpreviousZaddnextZ output_readyAttributeErrorstring_parse_doctype_declarationgroupsZ getroottreedocinfoZ public_idZ system_url)rrZfirst_element_idxZlast_element_idxZ html_rootZ declarationieZpre_rootZ post_rootroots convert_nodeZres_rootprevZ convertedZdoctype_stringmatchZ external_idZsys_urir8rrrrisd   &            rcsigfdd}fdddfdd ddd d |ttfd d }|td d}|tdd}|tfdd}S)Ncsfdd}|S)Ncs$xD]}||<|qW|S)N)append)handlert) convertersordered_node_typestypesrradds z5_init_node_converters..converter..addr)rDrE)rBrC)rDr convertersz(_init_node_converters..convertercs$xD]}t||r|SqWdS)N)r.)ZnoderA)rBrCrrfind_best_converters   z2_init_node_converters..find_best_convertercsPyt|}Wn(tk r8|}t|<YnX|dkrFdS|||S)N)typeKeyError)bs_nodeparentr@)rBrGrrr<sz+_init_node_converters..convert_nodecSs\t|trFi}xH|D](\}}t|tr4d|}t|||<qWntdd|D}|S)N css|]\}}|t|fVqdS)N)unescape).0kvrrr sz;_init_node_converters..map_attrs..)r.dictitemslistjoinrM)Zbs_attrsattribsrOrPrrr map_attrss   z(_init_node_converters..map_attrscSs:t|dkr|jpd||_n|djp*d||d_dS)Nrr,)rtexttail)rKrYrrr append_texts z*_init_node_converters..append_textc s|j}|dk r2|r|nd}tj||j|d}n|r>|ni}|j|d}xP|D]H}yt|}Wntk r~YnX|dk rV|||qV||qVW|S)N)Zattrib)r$rZ SubElementr#rHrI)rJrKr$rVresrr@)r<rBrrWrr convert_tags   z*_init_node_converters..convert_tagcSs t|}|dk r|||S)N)rZ HtmlCommentr?)rJrKr\rrrconvert_comments  z._init_node_converters..convert_commentcSs>|dr|dd}tj|dd}|dk r:|||S)N?r,rLr)endswithrr splitr?)rJrKr\rrr convert_pi s    z)_init_node_converters..convert_pics|dk r|t|dS)N)rM)rJrK)r[rr convert_textsz+_init_node_converters..convert_text)N)rr"r r r )rrFr]r^rbrcr)r[r<rBrGrrWrCrr2s   r2)name2codepointz&(\w+);cCs|sdSdd}t||S)NrXcSs2ytt|dStk r,|dSXdS)Nrr)unichrrdgrouprI)mrrrunescape_entity5sz!unescape..unescape_entity)handle_entities)r5rhrrrrM1srM)NN)NN)N)$__doc____all__reZlxmlrrZbs4rrr r r r r r0 ImportErrorrrrrcompile IGNORECASEr>r6r"rr2Z html.entitiesrdZhtmlentitydefssubrire NameErrorchrrMrrrrs: $        Uc