B o«µ] iã@sŠdZddlmZddlZddlZyddlmZddlmZWn$e k r`ddl mZmZYnXddl m Z ddl mZdd l mZmZdd l mZmZyeWnek rºeZYnXyeWnek rÜeZYnXyeWnek reefZYnXd d d ddddgZe dejejB¡Ze dej¡Ze dej¡j Z!e dej¡j Z"dd„Z#e d¡j$Z%e dejejB¡Z&e  'd¡Z(e j'ddeidZ)Gdd „d e*ƒZ+e+ƒZ,e,j-Z-e dej¡e d ej¡gZ.d!d"d#d$d%d&gZ/e d'ej¡e d(ej¡e d)¡gZ0d*gZ1e.e/e0e1fd+d„Z2d,d-„Z3d.d„Z4e2je4_d"d!d#gZ5d/gZ6d0e5e6ed1ƒfd2d„Z7d3d„Z8d4d5„Z9e d6ej¡Z:d7d8„Z;dS)9zcA cleanup tool for HTML. Removes unwanted tags and content. See the `Cleaner` class for details. é)Úabsolute_importN)Úurlsplit)Ú unquote_plus)rr)Úetree)Údefs)Ú fromstringÚXHTML_NAMESPACE)Ú xhtml_to_htmlÚ_transform_resultÚ clean_htmlÚcleanÚCleanerÚautolinkÚ autolink_htmlÚ word_breakÚword_break_htmlzexpression\s*\(.*?\)z @\s*importz^data:image/.+;base64z<(?:javascript|jscript|livescript|vbscript|data|about|mocha):cCst|ƒr dSt|ƒS)N)Ú_is_image_dataurlÚ_is_possibly_malicious_scheme)Ús©rúB/opt/alt/python37/lib64/python3.7/site-packages/lxml/html/clean.pyÚ_is_javascript_schemeNsrz[\s\x00-\x08\x0B\x0C\x0E-\x19]+z\[if[\s\n\r]+.*?][\s\n\r]*>zdescendant-or-self::*[@style]zÂdescendant-or-self::a [normalize-space(@href) and substring(normalize-space(@href),1,1) != '#'] |descendant-or-self::x:a[normalize-space(@href) and substring(normalize-space(@href),1,1) != '#']Úx)Z namespacesc @sêeZdZdZdZdZdZdZdZdZ dZ dZ dZ dZ dZdZdZdZdZdZdZdZejZdZdZddhZdd „Zed d d d gd d d d dZdd„Zdd„Zdd„Z dd„Z!dd„Z"d"dd„Z#dd„Z$e% &de%j'¡j(Z)dd„Z*d d!„Z+dS)#r a Instances cleans the document of each of the possible offending elements. The cleaning is controlled by attributes; you can override attributes in a subclass, or set them in the constructor. ``scripts``: Removes any ``