1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os.path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23 from contextlib import closing, contextmanager
24
25 from .common_imports import etree, StringIO, BytesIO, HelperTestCase
26 from .common_imports import fileInTestDir, fileUrlInTestDir, read_file, path2url
27 from .common_imports import SillyFileLike, LargeFileLikeUnicode, doctest, make_doctest
28 from .common_imports import canonicalize, _str, _bytes
29
30 print("")
31 print("TESTED VERSION: %s" % etree.__version__)
32 print(" Python: " + repr(sys.version_info))
33 print(" lxml.etree: " + repr(etree.LXML_VERSION))
34 print(" libxml used: " + repr(etree.LIBXML_VERSION))
35 print(" libxml compiled: " + repr(etree.LIBXML_COMPILED_VERSION))
36 print(" libxslt used: " + repr(etree.LIBXSLT_VERSION))
37 print(" libxslt compiled: " + repr(etree.LIBXSLT_COMPILED_VERSION))
38 print("")
39
40 try:
41 _unicode = unicode
42 except NameError:
43
44 _unicode = str
45
46
47 @contextmanager
48 -def tmpfile():
55
58 """Tests only for etree, not ElementTree"""
59 etree = etree
60
71
80
88
95
97 Element = self.etree.Element
98 el = Element('name')
99 self.assertRaises(ValueError, Element, '{}')
100 self.assertRaises(ValueError, setattr, el, 'tag', '{}')
101
102 self.assertRaises(ValueError, Element, '{test}')
103 self.assertRaises(ValueError, setattr, el, 'tag', '{test}')
104
106 Element = self.etree.Element
107 self.assertRaises(ValueError, Element, 'p:name')
108 self.assertRaises(ValueError, Element, '{test}p:name')
109
110 el = Element('name')
111 self.assertRaises(ValueError, setattr, el, 'tag', 'p:name')
112
114 Element = self.etree.Element
115 self.assertRaises(ValueError, Element, "p'name")
116 self.assertRaises(ValueError, Element, 'p"name')
117
118 self.assertRaises(ValueError, Element, "{test}p'name")
119 self.assertRaises(ValueError, Element, '{test}p"name')
120
121 el = Element('name')
122 self.assertRaises(ValueError, setattr, el, 'tag', "p'name")
123 self.assertRaises(ValueError, setattr, el, 'tag', 'p"name')
124
126 Element = self.etree.Element
127 self.assertRaises(ValueError, Element, ' name ')
128 self.assertRaises(ValueError, Element, 'na me')
129 self.assertRaises(ValueError, Element, '{test} name')
130
131 el = Element('name')
132 self.assertRaises(ValueError, setattr, el, 'tag', ' name ')
133
141
149
151 Element = self.etree.Element
152 SubElement = self.etree.SubElement
153
154 el = Element('name')
155 self.assertRaises(ValueError, SubElement, el, "p'name")
156 self.assertRaises(ValueError, SubElement, el, "{test}p'name")
157
158 self.assertRaises(ValueError, SubElement, el, 'p"name')
159 self.assertRaises(ValueError, SubElement, el, '{test}p"name')
160
169
178
180 QName = self.etree.QName
181 self.assertRaises(ValueError, QName, '')
182 self.assertRaises(ValueError, QName, None)
183 self.assertRaises(ValueError, QName, None, None)
184 self.assertRaises(ValueError, QName, 'test', '')
185
192
194 QName = self.etree.QName
195 self.assertRaises(ValueError, QName, 'p:name')
196 self.assertRaises(ValueError, QName, 'test', 'p:name')
197
199 QName = self.etree.QName
200 self.assertRaises(ValueError, QName, ' name ')
201 self.assertRaises(ValueError, QName, 'na me')
202 self.assertRaises(ValueError, QName, 'test', ' name')
203
211
213
214 QName = self.etree.QName
215 qname1 = QName('http://myns', 'a')
216 a = self.etree.Element(qname1, nsmap={'p' : 'http://myns'})
217
218 qname2 = QName(a)
219 self.assertEqual(a.tag, qname1.text)
220 self.assertEqual(a.tag, qname1)
221 self.assertEqual(qname1.text, qname2.text)
222 self.assertEqual(qname1, qname2.text)
223 self.assertEqual(qname1.text, qname2)
224 self.assertEqual(qname1, qname2)
225
227
228 etree = self.etree
229 qname = etree.QName('http://myns', 'a')
230 a = etree.Element(qname, nsmap={'p' : 'http://myns'})
231 a.text = qname
232
233 self.assertEqual("p:a", a.text)
234
243
258
264
274
286
288 Element = self.etree.Element
289
290 keys = ["attr%d" % i for i in range(10)]
291 values = ["TEST-%d" % i for i in range(10)]
292 items = list(zip(keys, values))
293
294 root = Element("root")
295 for key, value in items:
296 root.set(key, value)
297 self.assertEqual(keys, root.attrib.keys())
298 self.assertEqual(values, root.attrib.values())
299
300 root2 = Element("root2", root.attrib,
301 attr_99='TOAST-1', attr_98='TOAST-2')
302 self.assertEqual(['attr_98', 'attr_99'] + keys,
303 root2.attrib.keys())
304 self.assertEqual(['TOAST-2', 'TOAST-1'] + values,
305 root2.attrib.values())
306
307 self.assertEqual(keys, root.attrib.keys())
308 self.assertEqual(values, root.attrib.values())
309
311
312
313 Element = self.etree.Element
314 root = Element("root")
315 self.assertRaises(TypeError, root.set, "newattr", 5)
316 self.assertRaises(TypeError, root.set, "newattr", object)
317 self.assertRaises(TypeError, root.set, "newattr", None)
318 self.assertRaises(TypeError, root.set, "newattr")
319
333
355
357 XML = self.etree.XML
358 xml = _bytes('<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>')
359
360 root = XML(xml)
361 self.etree.strip_elements(root, 'a')
362 self.assertEqual(_bytes('<test><x></x></test>'),
363 self._writeElement(root))
364
365 root = XML(xml)
366 self.etree.strip_elements(root, 'b', 'c', 'X', 'Y', 'Z')
367 self.assertEqual(_bytes('<test><a></a><x><a></a></x></test>'),
368 self._writeElement(root))
369
370 root = XML(xml)
371 self.etree.strip_elements(root, 'c')
372 self.assertEqual(_bytes('<test><a><b></b></a><x><a><b></b></a></x></test>'),
373 self._writeElement(root))
374
376 XML = self.etree.XML
377 xml = _bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>')
378
379 root = XML(xml)
380 self.etree.strip_elements(root, 'a')
381 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>'),
382 self._writeElement(root))
383
384 root = XML(xml)
385 self.etree.strip_elements(root, '{urn:a}b', 'c')
386 self.assertEqual(_bytes('<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
387 self._writeElement(root))
388
389 root = XML(xml)
390 self.etree.strip_elements(root, '{urn:a}*', 'c')
391 self.assertEqual(_bytes('<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
392 self._writeElement(root))
393
394 root = XML(xml)
395 self.etree.strip_elements(root, '{urn:a}*', 'c', with_tail=False)
396 self.assertEqual(_bytes('<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>'),
397 self._writeElement(root))
398
417
443
470
497
516
529
540
546
548 XML = self.etree.XML
549 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
550 self.assertEqual(root[0].target, "mypi")
551 self.assertEqual(root[0].get('my'), "1")
552 self.assertEqual(root[0].get('test'), " abc ")
553 self.assertEqual(root[0].get('quotes'), "' '")
554 self.assertEqual(root[0].get('only'), None)
555 self.assertEqual(root[0].get('names'), None)
556 self.assertEqual(root[0].get('nope'), None)
557
559 XML = self.etree.XML
560 root = XML(_bytes("<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>"))
561 self.assertEqual(root[0].target, "mypi")
562 self.assertEqual(root[0].attrib['my'], "1")
563 self.assertEqual(root[0].attrib['test'], " abc ")
564 self.assertEqual(root[0].attrib['quotes'], "' '")
565 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'only')
566 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'names')
567 self.assertRaises(KeyError, root[0].attrib.__getitem__, 'nope')
568
570
571 ProcessingInstruction = self.etree.ProcessingInstruction
572
573 a = ProcessingInstruction("PI", "ONE")
574 b = copy.deepcopy(a)
575 b.text = "ANOTHER"
576
577 self.assertEqual('ONE', a.text)
578 self.assertEqual('ANOTHER', b.text)
579
595
610
621
633
652
657
670
681
682 f = BytesIO('<a><!--A--><b><!-- B --><c/></b><!--C--></a>')
683 events = list(iterparse(f, events=('end', 'comment')))
684 root = events[-1][1]
685 self.assertEqual(6, len(events))
686 self.assertEqual(['A', ' B ', 'c', 'b', 'C', 'a'],
687 [ name(*item) for item in events ])
688 self.assertEqual(
689 _bytes('<a><!--A--><b><!-- B --><c/></b><!--C--></a>'),
690 tostring(root))
691
703
704 f = BytesIO('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>')
705 events = list(iterparse(f, events=('end', 'pi')))
706 root = events[-2][1]
707 self.assertEqual(8, len(events))
708 self.assertEqual([('pia','a'), ('pib','b'), ('pic','c'), 'c', 'b',
709 ('pid','d'), 'a', ('pie','e')],
710 [ name(*item) for item in events ])
711 self.assertEqual(
712 _bytes('<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>'),
713 tostring(ElementTree(root)))
714
729
735
737 iterparse = self.etree.iterparse
738 f = BytesIO('<a><b><c/></a>')
739 it = iterparse(f, events=('start', 'end'), recover=True)
740 events = [(ev, el.tag) for ev, el in it]
741 root = it.root
742 self.assertTrue(root is not None)
743
744 self.assertEqual(1, events.count(('start', 'a')))
745 self.assertEqual(1, events.count(('end', 'a')))
746
747 self.assertEqual(1, events.count(('start', 'b')))
748 self.assertEqual(1, events.count(('end', 'b')))
749
750 self.assertEqual(1, events.count(('start', 'c')))
751 self.assertEqual(1, events.count(('end', 'c')))
752
754 iterparse = self.etree.iterparse
755 f = BytesIO('<a><b><c/></d><b><c/></a></b>')
756 it = iterparse(f, events=('start', 'end'), recover=True)
757 events = [(ev, el.tag) for ev, el in it]
758 root = it.root
759 self.assertTrue(root is not None)
760
761 self.assertEqual(1, events.count(('start', 'a')))
762 self.assertEqual(1, events.count(('end', 'a')))
763
764 self.assertEqual(2, events.count(('start', 'b')))
765 self.assertEqual(2, events.count(('end', 'b')))
766
767 self.assertEqual(2, events.count(('start', 'c')))
768 self.assertEqual(2, events.count(('end', 'c')))
769
771 iterparse = self.etree.iterparse
772 f = BytesIO("""
773 <a> \n \n <b> b test </b> \n
774
775 \n\t <c> \n </c> </a> \n """)
776 iterator = iterparse(f, remove_blank_text=True)
777 text = [ (element.text, element.tail)
778 for event, element in iterator ]
779 self.assertEqual(
780 [(" b test ", None), (" \n ", None), (None, None)],
781 text)
782
784 iterparse = self.etree.iterparse
785 f = BytesIO('<a><b><d/></b><c/></a>')
786
787 iterator = iterparse(f, tag="b", events=('start', 'end'))
788 events = list(iterator)
789 root = iterator.root
790 self.assertEqual(
791 [('start', root[0]), ('end', root[0])],
792 events)
793
795 iterparse = self.etree.iterparse
796 f = BytesIO('<a><b><d/></b><c/></a>')
797
798 iterator = iterparse(f, tag="*", events=('start', 'end'))
799 events = list(iterator)
800 self.assertEqual(
801 8,
802 len(events))
803
805 iterparse = self.etree.iterparse
806 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
807
808 iterator = iterparse(f, tag="{urn:test:1}b", events=('start', 'end'))
809 events = list(iterator)
810 root = iterator.root
811 self.assertEqual(
812 [('start', root[0]), ('end', root[0])],
813 events)
814
816 iterparse = self.etree.iterparse
817 f = BytesIO('<a><b><d/></b><c/></a>')
818 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
819 events = list(iterator)
820 root = iterator.root
821 self.assertEqual(
822 [('start', root[0]), ('end', root[0])],
823 events)
824
825 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
826 iterator = iterparse(f, tag="{}b", events=('start', 'end'))
827 events = list(iterator)
828 root = iterator.root
829 self.assertEqual([], events)
830
832 iterparse = self.etree.iterparse
833 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
834 iterator = iterparse(f, tag="{urn:test:1}*", events=('start', 'end'))
835 events = list(iterator)
836 self.assertEqual(8, len(events))
837
839 iterparse = self.etree.iterparse
840 f = BytesIO('<a xmlns="urn:test:1"><b><d/></b><c/></a>')
841 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
842 events = list(iterator)
843 self.assertEqual([], events)
844
845 f = BytesIO('<a><b><d/></b><c/></a>')
846 iterator = iterparse(f, tag="{}*", events=('start', 'end'))
847 events = list(iterator)
848 self.assertEqual(8, len(events))
849
851 text = _str('Søk på nettet')
852 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
853 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
854 ).encode('iso-8859-1')
855
856 self.assertRaises(self.etree.ParseError,
857 list, self.etree.iterparse(BytesIO(xml_latin1)))
858
860 text = _str('Søk på nettet', encoding="UTF-8")
861 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
862 xml_latin1 = (_str('%s<a>%s</a>') % (wrong_declaration, text)
863 ).encode('iso-8859-1')
864
865 iterator = self.etree.iterparse(BytesIO(xml_latin1),
866 encoding="iso-8859-1")
867 self.assertEqual(1, len(list(iterator)))
868
869 a = iterator.root
870 self.assertEqual(a.text, text)
871
873 tostring = self.etree.tostring
874 f = BytesIO('<root><![CDATA[test]]></root>')
875 context = self.etree.iterparse(f, strip_cdata=False)
876 content = [ el.text for event,el in context ]
877
878 self.assertEqual(['test'], content)
879 self.assertEqual(_bytes('<root><![CDATA[test]]></root>'),
880 tostring(context.root))
881
885
890
909
910
911
934
935
936
938 assertEqual = self.assertEqual
939 assertFalse = self.assertFalse
940
941 events = []
942 class Target(object):
943 def start(self, tag, attrib):
944 events.append("start")
945 assertFalse(attrib)
946 assertEqual("TAG", tag)
947 def end(self, tag):
948 events.append("end")
949 assertEqual("TAG", tag)
950 def close(self):
951 return "DONE"
952
953 parser = self.etree.XMLParser(target=Target())
954 tree = self.etree.ElementTree()
955
956 self.assertRaises(TypeError,
957 tree.parse, BytesIO("<TAG/>"), parser=parser)
958 self.assertEqual(["start", "end"], events)
959
961
962 events = []
963 class Target(object):
964 def start(self, tag, attrib):
965 events.append("start-" + tag)
966 def end(self, tag):
967 events.append("end-" + tag)
968 if tag == 'a':
969 raise ValueError("dead and gone")
970 def data(self, data):
971 events.append("data-" + data)
972 def close(self):
973 events.append("close")
974 return "DONE"
975
976 parser = self.etree.XMLParser(target=Target())
977
978 try:
979 parser.feed(_bytes('<root>A<a>ca</a>B</root>'))
980 done = parser.close()
981 self.fail("error expected, but parsing succeeded")
982 except ValueError:
983 done = 'value error received as expected'
984
985 self.assertEqual(["start-root", "data-A", "start-a",
986 "data-ca", "end-a", "close"],
987 events)
988
990
991 events = []
992 class Target(object):
993 def start(self, tag, attrib):
994 events.append("start-" + tag)
995 def end(self, tag):
996 events.append("end-" + tag)
997 if tag == 'a':
998 raise ValueError("dead and gone")
999 def data(self, data):
1000 events.append("data-" + data)
1001 def close(self):
1002 events.append("close")
1003 return "DONE"
1004
1005 parser = self.etree.XMLParser(target=Target())
1006
1007 try:
1008 done = self.etree.fromstring(_bytes('<root>A<a>ca</a>B</root>'),
1009 parser=parser)
1010 self.fail("error expected, but parsing succeeded")
1011 except ValueError:
1012 done = 'value error received as expected'
1013
1014 self.assertEqual(["start-root", "data-A", "start-a",
1015 "data-ca", "end-a", "close"],
1016 events)
1017
1019
1020 events = []
1021 class Target(object):
1022 def start(self, tag, attrib):
1023 events.append("start-" + tag)
1024 def end(self, tag):
1025 events.append("end-" + tag)
1026 def data(self, data):
1027 events.append("data-" + data)
1028 def comment(self, text):
1029 events.append("comment-" + text)
1030 def close(self):
1031 return "DONE"
1032
1033 parser = self.etree.XMLParser(target=Target(), collect_ids=False)
1034
1035 parser.feed(_bytes('<!--a--><root xml:id="123">A<!--b-->'))
1036 parser.feed(_bytes('<sub xml:id="321"/>B</root>'))
1037 done = parser.close()
1038
1039 self.assertEqual("DONE", done)
1040 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1041 "start-sub", "end-sub", "data-B", "end-root"],
1042 events)
1043
1049 def end(self, tag):
1050 events.append("end-" + tag)
1051 def data(self, data):
1052 events.append("data-" + data)
1053 def comment(self, text):
1054 events.append("comment-" + text)
1055 def close(self):
1056 return "DONE"
1057
1058 parser = self.etree.XMLParser(target=Target())
1059
1060 parser.feed(_bytes('<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->'))
1061 done = parser.close()
1062
1063 self.assertEqual("DONE", done)
1064 self.assertEqual(["comment-a", "start-root", "data-A", "comment-b",
1065 "start-sub", "end-sub", "comment-c", "data-B",
1066 "end-root", "comment-d"],
1067 events)
1068
1070 events = []
1071 class Target(object):
1072 def start(self, tag, attrib):
1073 events.append("start-" + tag)
1074 def end(self, tag):
1075 events.append("end-" + tag)
1076 def data(self, data):
1077 events.append("data-" + data)
1078 def pi(self, target, data):
1079 events.append("pi-" + target + "-" + data)
1080 def close(self):
1081 return "DONE"
1082
1083 parser = self.etree.XMLParser(target=Target())
1084
1085 parser.feed(_bytes('<?test a?><root>A<?test b?>B</root><?test c?>'))
1086 done = parser.close()
1087
1088 self.assertEqual("DONE", done)
1089 self.assertEqual(["pi-test-a", "start-root", "data-A", "pi-test-b",
1090 "data-B", "end-root", "pi-test-c"],
1091 events)
1092
1094 events = []
1095 class Target(object):
1096 def start(self, tag, attrib):
1097 events.append("start-" + tag)
1098 def end(self, tag):
1099 events.append("end-" + tag)
1100 def data(self, data):
1101 events.append("data-" + data)
1102 def close(self):
1103 return "DONE"
1104
1105 parser = self.etree.XMLParser(target=Target(),
1106 strip_cdata=False)
1107
1108 parser.feed(_bytes('<root>A<a><![CDATA[ca]]></a>B</root>'))
1109 done = parser.close()
1110
1111 self.assertEqual("DONE", done)
1112 self.assertEqual(["start-root", "data-A", "start-a",
1113 "data-ca", "end-a", "data-B", "end-root"],
1114 events)
1115
1117 events = []
1118 class Target(object):
1119 def start(self, tag, attrib):
1120 events.append("start-" + tag)
1121 def end(self, tag):
1122 events.append("end-" + tag)
1123 def data(self, data):
1124 events.append("data-" + data)
1125 def close(self):
1126 events.append("close")
1127 return "DONE"
1128
1129 parser = self.etree.XMLParser(target=Target(),
1130 recover=True)
1131
1132 parser.feed(_bytes('<root>A<a>ca</a>B</not-root>'))
1133 done = parser.close()
1134
1135 self.assertEqual("DONE", done)
1136 self.assertEqual(["start-root", "data-A", "start-a",
1137 "data-ca", "end-a", "data-B",
1138 "end-root", "close"],
1139 events)
1140
1150
1160
1169
1179
1181 iterwalk = self.etree.iterwalk
1182 root = self.etree.XML(_bytes('<a><b></b><c/></a>'))
1183
1184 iterator = iterwalk(root, events=('start','end'))
1185 events = list(iterator)
1186 self.assertEqual(
1187 [('start', root), ('start', root[0]), ('end', root[0]),
1188 ('start', root[1]), ('end', root[1]), ('end', root)],
1189 events)
1190
1200
1210
1224
1235
1237 iterwalk = self.etree.iterwalk
1238 root = self.etree.XML(_bytes('<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>'))
1239
1240 attr_name = '{testns}bla'
1241 events = []
1242 iterator = iterwalk(root, events=('start','end','start-ns','end-ns'))
1243 for event, elem in iterator:
1244 events.append(event)
1245 if event == 'start':
1246 if elem.tag != '{ns1}a':
1247 elem.set(attr_name, 'value')
1248
1249 self.assertEqual(
1250 ['start-ns', 'start', 'start', 'start-ns', 'start',
1251 'end', 'end-ns', 'end', 'end', 'end-ns'],
1252 events)
1253
1254 self.assertEqual(
1255 None,
1256 root.get(attr_name))
1257 self.assertEqual(
1258 'value',
1259 root[0].get(attr_name))
1260
1273
1275 iterwalk = self.etree.iterwalk
1276 root = self.etree.XML(_bytes('<a><b><c/></b><d><e/></d></a>'))
1277
1278 iterator = iterwalk(root, events=('start', 'end'))
1279 tags = []
1280 for event, elem in iterator:
1281 tags.append((event, elem.tag))
1282 if elem.tag in ('b', 'e'):
1283
1284 iterator.skip_subtree()
1285
1286 self.assertEqual(
1287 [('start', 'a'),
1288 ('start', 'b'), ('end', 'b'),
1289 ('start', 'd'),
1290 ('start', 'e'), ('end', 'e'),
1291 ('end', 'd'),
1292 ('end', 'a')],
1293 tags)
1294
1296 iterwalk = self.etree.iterwalk
1297 root = self.etree.XML(_bytes(
1298 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>'))
1299
1300 events = []
1301 iterator = iterwalk(root, events=('start','start-ns','end-ns'))
1302 for event, elem in iterator:
1303 if event in ('start-ns', 'end-ns'):
1304 events.append((event, elem))
1305 if event == 'start-ns' and elem == ('', 'nsb'):
1306 events.append('skip')
1307 iterator.skip_subtree()
1308 else:
1309 events.append((event, elem.tag))
1310
1311 self.assertEqual(
1312 [('start-ns', ('', 'ns1')),
1313 ('start', '{ns1}a'),
1314 ('start-ns', ('', 'nsb')),
1315 'skip',
1316 ('start', '{nsb}b'),
1317 ('end-ns', None),
1318 ('start-ns', ('', 'ns2')),
1319 ('start', '{ns2}d'),
1320 ('start', '{ns2}e'),
1321 ('end-ns', None),
1322 ('end-ns', None)
1323 ],
1324 events)
1325
1336
1338 parse = self.etree.parse
1339 parser = self.etree.XMLParser(dtd_validation=True)
1340 assertEqual = self.assertEqual
1341 test_url = _str("__nosuch.dtd")
1342
1343 class MyResolver(self.etree.Resolver):
1344 def resolve(self, url, id, context):
1345 assertEqual(url, test_url)
1346 return self.resolve_string(
1347 _str('''<!ENTITY myentity "%s">
1348 <!ELEMENT doc ANY>''') % url, context)
1349
1350 parser.resolvers.add(MyResolver())
1351
1352 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1353 tree = parse(StringIO(xml), parser)
1354 root = tree.getroot()
1355 self.assertEqual(root.text, test_url)
1356
1358 parse = self.etree.parse
1359 parser = self.etree.XMLParser(dtd_validation=True)
1360 assertEqual = self.assertEqual
1361 test_url = _str("__nosuch.dtd")
1362
1363 class MyResolver(self.etree.Resolver):
1364 def resolve(self, url, id, context):
1365 assertEqual(url, test_url)
1366 return self.resolve_string(
1367 (_str('''<!ENTITY myentity "%s">
1368 <!ELEMENT doc ANY>''') % url).encode('utf-8'),
1369 context)
1370
1371 parser.resolvers.add(MyResolver())
1372
1373 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1374 tree = parse(StringIO(xml), parser)
1375 root = tree.getroot()
1376 self.assertEqual(root.text, test_url)
1377
1379 parse = self.etree.parse
1380 parser = self.etree.XMLParser(dtd_validation=True)
1381 assertEqual = self.assertEqual
1382 test_url = _str("__nosuch.dtd")
1383
1384 class MyResolver(self.etree.Resolver):
1385 def resolve(self, url, id, context):
1386 assertEqual(url, test_url)
1387 return self.resolve_file(
1388 SillyFileLike(
1389 _str('''<!ENTITY myentity "%s">
1390 <!ELEMENT doc ANY>''') % url), context)
1391
1392 parser.resolvers.add(MyResolver())
1393
1394 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1395 tree = parse(StringIO(xml), parser)
1396 root = tree.getroot()
1397 self.assertEqual(root.text, test_url)
1398
1400 parse = self.etree.parse
1401 parser = self.etree.XMLParser(attribute_defaults=True)
1402 assertEqual = self.assertEqual
1403 test_url = _str("__nosuch.dtd")
1404
1405 class MyResolver(self.etree.Resolver):
1406 def resolve(self, url, id, context):
1407 assertEqual(url, test_url)
1408 return self.resolve_filename(
1409 fileInTestDir('test.dtd'), context)
1410
1411 parser.resolvers.add(MyResolver())
1412
1413 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1414 tree = parse(StringIO(xml), parser)
1415 root = tree.getroot()
1416 self.assertEqual(
1417 root.attrib, {'default': 'valueA'})
1418 self.assertEqual(
1419 root[0].attrib, {'default': 'valueB'})
1420
1435
1436 parser.resolvers.add(MyResolver())
1437
1438 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1439 tree = parse(StringIO(xml), parser,
1440 base_url=fileUrlInTestDir('__test.xml'))
1441 root = tree.getroot()
1442 self.assertEqual(
1443 root.attrib, {'default': 'valueA'})
1444 self.assertEqual(
1445 root[0].attrib, {'default': 'valueB'})
1446
1448 parse = self.etree.parse
1449 parser = self.etree.XMLParser(attribute_defaults=True)
1450 assertEqual = self.assertEqual
1451 test_url = _str("__nosuch.dtd")
1452
1453 class MyResolver(self.etree.Resolver):
1454 def resolve(self, url, id, context):
1455 assertEqual(url, test_url)
1456 return self.resolve_file(
1457 open(fileInTestDir('test.dtd'), 'rb'), context)
1458
1459 parser.resolvers.add(MyResolver())
1460
1461 xml = _str('<!DOCTYPE a SYSTEM "%s"><a><b/></a>') % test_url
1462 tree = parse(StringIO(xml), parser)
1463 root = tree.getroot()
1464 self.assertEqual(
1465 root.attrib, {'default': 'valueA'})
1466 self.assertEqual(
1467 root[0].attrib, {'default': 'valueB'})
1468
1470 parse = self.etree.parse
1471 parser = self.etree.XMLParser(load_dtd=True)
1472 assertEqual = self.assertEqual
1473 test_url = _str("__nosuch.dtd")
1474
1475 class check(object):
1476 resolved = False
1477
1478 class MyResolver(self.etree.Resolver):
1479 def resolve(self, url, id, context):
1480 assertEqual(url, test_url)
1481 check.resolved = True
1482 return self.resolve_empty(context)
1483
1484 parser.resolvers.add(MyResolver())
1485
1486 xml = _str('<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>') % test_url
1487 self.assertRaises(etree.XMLSyntaxError, parse, StringIO(xml), parser)
1488 self.assertTrue(check.resolved)
1489
1496
1497 class MyResolver(self.etree.Resolver):
1498 def resolve(self, url, id, context):
1499 raise _LocalException
1500
1501 parser.resolvers.add(MyResolver())
1502
1503 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1504 self.assertRaises(_LocalException, parse, BytesIO(xml), parser)
1505
1522
1524 xml = _bytes('''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1525 <root>
1526 <child1/>
1527 <child2/>
1528 <child3> </child3>
1529 </root>''')
1530
1531 parser = self.etree.XMLParser(resolve_entities=False)
1532 root = etree.fromstring(xml, parser)
1533 self.assertEqual([ el.tag for el in root ],
1534 ['child1', 'child2', 'child3'])
1535
1536 root[0] = root[-1]
1537 self.assertEqual([ el.tag for el in root ],
1538 ['child3', 'child2'])
1539 self.assertEqual(root[0][0].text, ' ')
1540 self.assertEqual(root[0][0].name, 'nbsp')
1541
1557
1564
1566 Entity = self.etree.Entity
1567 self.assertRaises(ValueError, Entity, 'a b c')
1568 self.assertRaises(ValueError, Entity, 'a,b')
1569 self.assertRaises(ValueError, Entity, 'a\0b')
1570 self.assertRaises(ValueError, Entity, '#abc')
1571 self.assertRaises(ValueError, Entity, '#xxyz')
1572
1585
1606
1619
1631
1640
1649
1650
1660
1669
1671 Element = self.etree.Element
1672 SubElement = self.etree.SubElement
1673 root = Element('root')
1674 self.assertRaises(ValueError, root.append, root)
1675 child = SubElement(root, 'child')
1676 self.assertRaises(ValueError, child.append, root)
1677 child2 = SubElement(child, 'child2')
1678 self.assertRaises(ValueError, child2.append, root)
1679 self.assertRaises(ValueError, child2.append, child)
1680 self.assertEqual('child2', root[0][0].tag)
1681
1694
1707
1718
1729
1739
1749
1765
1781
1787
1802
1815
1830
1843
1858
1871
1886
1899
1900
1908
1909
1919
1920
1935
1936
1946
1947
1958
1985
1986
1988 self.assertRaises(TypeError, self.etree.dump, None)
1989
2002
2015
2036
2045
2047 XML = self.etree.XML
2048
2049 root = XML(_bytes('<doc><one/><two>Two</two>Hm<three/></doc>'))
2050 result = []
2051 for el in root.iterchildren(reversed=True):
2052 result.append(el.tag)
2053 self.assertEqual(['three', 'two', 'one'], result)
2054
2063
2072
2081
2083 XML = self.etree.XML
2084
2085 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2086 result = []
2087 for el in root.iterchildren(tag=['two', 'three']):
2088 result.append(el.text)
2089 self.assertEqual(['Two', 'Bla', None], result)
2090
2092 XML = self.etree.XML
2093
2094 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2095 result = []
2096 for el in root.iterchildren('two', 'three'):
2097 result.append(el.text)
2098 self.assertEqual(['Two', 'Bla', None], result)
2099
2101 XML = self.etree.XML
2102
2103 root = XML(_bytes('<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>'))
2104 result = []
2105 for el in root.iterchildren(reversed=True, tag=['two', 'three']):
2106 result.append(el.text)
2107 self.assertEqual([None, 'Bla', 'Two'], result)
2108
2129
2151
2153 Element = self.etree.Element
2154 SubElement = self.etree.SubElement
2155
2156 a = Element('a')
2157 b = SubElement(a, 'b')
2158 c = SubElement(a, 'c')
2159 d = SubElement(b, 'd')
2160 self.assertEqual(
2161 [b, a],
2162 list(d.iterancestors(tag=('a', 'b'))))
2163 self.assertEqual(
2164 [b, a],
2165 list(d.iterancestors('a', 'b')))
2166
2167 self.assertEqual(
2168 [],
2169 list(d.iterancestors(tag=('w', 'x', 'y', 'z'))))
2170 self.assertEqual(
2171 [],
2172 list(d.iterancestors('w', 'x', 'y', 'z')))
2173
2174 self.assertEqual(
2175 [],
2176 list(d.iterancestors(tag=('d', 'x'))))
2177 self.assertEqual(
2178 [],
2179 list(d.iterancestors('d', 'x')))
2180
2181 self.assertEqual(
2182 [b, a],
2183 list(d.iterancestors(tag=('b', '*'))))
2184 self.assertEqual(
2185 [b, a],
2186 list(d.iterancestors('b', '*')))
2187
2188 self.assertEqual(
2189 [b],
2190 list(d.iterancestors(tag=('b', 'c'))))
2191 self.assertEqual(
2192 [b],
2193 list(d.iterancestors('b', 'c')))
2194
2211
2213 Element = self.etree.Element
2214 SubElement = self.etree.SubElement
2215
2216 a = Element('a')
2217 b = SubElement(a, 'b')
2218 c = SubElement(a, 'c')
2219 d = SubElement(b, 'd')
2220 e = SubElement(c, 'e')
2221
2222 self.assertEqual(
2223 [],
2224 list(a.iterdescendants('a')))
2225 self.assertEqual(
2226 [],
2227 list(a.iterdescendants(tag='a')))
2228
2229 a2 = SubElement(e, 'a')
2230 self.assertEqual(
2231 [a2],
2232 list(a.iterdescendants('a')))
2233
2234 self.assertEqual(
2235 [a2],
2236 list(c.iterdescendants('a')))
2237 self.assertEqual(
2238 [a2],
2239 list(c.iterdescendants(tag='a')))
2240
2242 Element = self.etree.Element
2243 SubElement = self.etree.SubElement
2244
2245 a = Element('a')
2246 b = SubElement(a, 'b')
2247 c = SubElement(a, 'c')
2248 d = SubElement(b, 'd')
2249 e = SubElement(c, 'e')
2250
2251 self.assertEqual(
2252 [b, e],
2253 list(a.iterdescendants(tag=('a', 'b', 'e'))))
2254 self.assertEqual(
2255 [b, e],
2256 list(a.iterdescendants('a', 'b', 'e')))
2257
2258 a2 = SubElement(e, 'a')
2259 self.assertEqual(
2260 [b, a2],
2261 list(a.iterdescendants(tag=('a', 'b'))))
2262 self.assertEqual(
2263 [b, a2],
2264 list(a.iterdescendants('a', 'b')))
2265
2266 self.assertEqual(
2267 [],
2268 list(c.iterdescendants(tag=('x', 'y', 'z'))))
2269 self.assertEqual(
2270 [],
2271 list(c.iterdescendants('x', 'y', 'z')))
2272
2273 self.assertEqual(
2274 [b, d, c, e, a2],
2275 list(a.iterdescendants(tag=('x', 'y', 'z', '*'))))
2276 self.assertEqual(
2277 [b, d, c, e, a2],
2278 list(a.iterdescendants('x', 'y', 'z', '*')))
2279
2297
2314
2332
2356
2358 Element = self.etree.Element
2359 SubElement = self.etree.SubElement
2360
2361 a = Element('a')
2362 b = SubElement(a, 'b')
2363 c = SubElement(a, 'c')
2364 d = SubElement(b, 'd')
2365 self.assertEqual(
2366 [],
2367 list(a.itersiblings(tag='XXX')))
2368 self.assertEqual(
2369 [c],
2370 list(b.itersiblings(tag='c')))
2371 self.assertEqual(
2372 [c],
2373 list(b.itersiblings(tag='*')))
2374 self.assertEqual(
2375 [b],
2376 list(c.itersiblings(preceding=True, tag='b')))
2377 self.assertEqual(
2378 [],
2379 list(c.itersiblings(preceding=True, tag='c')))
2380
2382 Element = self.etree.Element
2383 SubElement = self.etree.SubElement
2384
2385 a = Element('a')
2386 b = SubElement(a, 'b')
2387 c = SubElement(a, 'c')
2388 d = SubElement(b, 'd')
2389 e = SubElement(a, 'e')
2390 self.assertEqual(
2391 [],
2392 list(a.itersiblings(tag=('XXX', 'YYY'))))
2393 self.assertEqual(
2394 [c, e],
2395 list(b.itersiblings(tag=('c', 'd', 'e'))))
2396 self.assertEqual(
2397 [b],
2398 list(c.itersiblings(preceding=True, tag=('b', 'b', 'c', 'd'))))
2399 self.assertEqual(
2400 [c, b],
2401 list(e.itersiblings(preceding=True, tag=('c', '*'))))
2402
2404 parseid = self.etree.parseid
2405 XML = self.etree.XML
2406 xml_text = _bytes('''
2407 <!DOCTYPE document [
2408 <!ELEMENT document (h1,p)*>
2409 <!ELEMENT h1 (#PCDATA)>
2410 <!ATTLIST h1 myid ID #REQUIRED>
2411 <!ELEMENT p (#PCDATA)>
2412 <!ATTLIST p someid ID #REQUIRED>
2413 ]>
2414 <document>
2415 <h1 myid="chapter1">...</h1>
2416 <p id="note1" class="note">...</p>
2417 <p>Regular paragraph.</p>
2418 <p xml:id="xmlid">XML:ID paragraph.</p>
2419 <p someid="warn1" class="warning">...</p>
2420 </document>
2421 ''')
2422
2423 tree, dic = parseid(BytesIO(xml_text))
2424 root = tree.getroot()
2425 root2 = XML(xml_text)
2426 self.assertEqual(self._writeElement(root),
2427 self._writeElement(root2))
2428 expected = {
2429 "chapter1" : root[0],
2430 "xmlid" : root[3],
2431 "warn1" : root[4]
2432 }
2433 self.assertTrue("chapter1" in dic)
2434 self.assertTrue("warn1" in dic)
2435 self.assertTrue("xmlid" in dic)
2436 self._checkIDDict(dic, expected)
2437
2439 XMLDTDID = self.etree.XMLDTDID
2440 XML = self.etree.XML
2441 xml_text = _bytes('''
2442 <!DOCTYPE document [
2443 <!ELEMENT document (h1,p)*>
2444 <!ELEMENT h1 (#PCDATA)>
2445 <!ATTLIST h1 myid ID #REQUIRED>
2446 <!ELEMENT p (#PCDATA)>
2447 <!ATTLIST p someid ID #REQUIRED>
2448 ]>
2449 <document>
2450 <h1 myid="chapter1">...</h1>
2451 <p id="note1" class="note">...</p>
2452 <p>Regular paragraph.</p>
2453 <p xml:id="xmlid">XML:ID paragraph.</p>
2454 <p someid="warn1" class="warning">...</p>
2455 </document>
2456 ''')
2457
2458 root, dic = XMLDTDID(xml_text)
2459 root2 = XML(xml_text)
2460 self.assertEqual(self._writeElement(root),
2461 self._writeElement(root2))
2462 expected = {
2463 "chapter1" : root[0],
2464 "xmlid" : root[3],
2465 "warn1" : root[4]
2466 }
2467 self.assertTrue("chapter1" in dic)
2468 self.assertTrue("warn1" in dic)
2469 self.assertTrue("xmlid" in dic)
2470 self._checkIDDict(dic, expected)
2471
2473 XMLDTDID = self.etree.XMLDTDID
2474 XML = self.etree.XML
2475 xml_text = _bytes('''
2476 <document>
2477 <h1 myid="chapter1">...</h1>
2478 <p id="note1" class="note">...</p>
2479 <p>Regular paragraph.</p>
2480 <p someid="warn1" class="warning">...</p>
2481 </document>
2482 ''')
2483
2484 root, dic = XMLDTDID(xml_text)
2485 root2 = XML(xml_text)
2486 self.assertEqual(self._writeElement(root),
2487 self._writeElement(root2))
2488 expected = {}
2489 self._checkIDDict(dic, expected)
2490
2492 XMLDTDID = self.etree.XMLDTDID
2493 XML = self.etree.XML
2494 xml_text = _bytes('''
2495 <!DOCTYPE document [
2496 <!ELEMENT document (h1,p)*>
2497 <!ELEMENT h1 (#PCDATA)>
2498 <!ATTLIST h1 myid ID #REQUIRED>
2499 <!ELEMENT p (#PCDATA)>
2500 <!ATTLIST p someid ID #REQUIRED>
2501 ]>
2502 <document>
2503 <h1 myid="chapter1">...</h1>
2504 <p id="note1" class="note">...</p>
2505 <p>Regular paragraph.</p>
2506 <p xml:id="xmlid">XML:ID paragraph.</p>
2507 <p someid="warn1" class="warning">...</p>
2508 </document>
2509 ''')
2510
2511 parser = etree.XMLParser(collect_ids=False)
2512 root, dic = XMLDTDID(xml_text, parser=parser)
2513 root2 = XML(xml_text)
2514 self.assertEqual(self._writeElement(root),
2515 self._writeElement(root2))
2516 self.assertFalse(dic)
2517 self._checkIDDict(dic, {})
2518
2520 self.assertEqual(len(dic),
2521 len(expected))
2522 self.assertEqual(sorted(dic.items()),
2523 sorted(expected.items()))
2524 if sys.version_info < (3,):
2525 self.assertEqual(sorted(dic.iteritems()),
2526 sorted(expected.iteritems()))
2527 self.assertEqual(sorted(dic.keys()),
2528 sorted(expected.keys()))
2529 if sys.version_info < (3,):
2530 self.assertEqual(sorted(dic.iterkeys()),
2531 sorted(expected.iterkeys()))
2532 if sys.version_info < (3,):
2533 self.assertEqual(sorted(dic.values()),
2534 sorted(expected.values()))
2535 self.assertEqual(sorted(dic.itervalues()),
2536 sorted(expected.itervalues()))
2537
2539 etree = self.etree
2540
2541 r = {'foo': 'http://ns.infrae.com/foo'}
2542 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2543 self.assertEqual(
2544 'foo',
2545 e.prefix)
2546 self.assertEqual(
2547 _bytes('<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>'),
2548 self._writeElement(e))
2549
2551 etree = self.etree
2552
2553 r = {None: 'http://ns.infrae.com/foo'}
2554 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2555 self.assertEqual(
2556 None,
2557 e.prefix)
2558 self.assertEqual(
2559 '{http://ns.infrae.com/foo}bar',
2560 e.tag)
2561 self.assertEqual(
2562 _bytes('<bar xmlns="http://ns.infrae.com/foo"></bar>'),
2563 self._writeElement(e))
2564
2566 etree = self.etree
2567
2568 r = {None: 'http://ns.infrae.com/foo', 'p': 'http://test/'}
2569 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2570 self.assertEqual(None, e.prefix)
2571 self.assertEqual('{http://ns.infrae.com/foo}bar', e.tag)
2572 self.assertEqual(
2573 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>'),
2574 self._writeElement(e))
2575
2577 etree = self.etree
2578
2579 r = {None: 'http://ns.infrae.com/foo',
2580 'hoi': 'http://ns.infrae.com/hoi'}
2581 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2582 e.set('{http://ns.infrae.com/hoi}test', 'value')
2583 self.assertEqual(
2584 _bytes('<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>'),
2585 self._writeElement(e))
2586
2588 etree = self.etree
2589
2590 root = etree.Element('{http://test/ns}root',
2591 nsmap={None: 'http://test/ns'})
2592 sub = etree.Element('{http://test/ns}sub',
2593 nsmap={'test': 'http://test/ns'})
2594
2595 sub.attrib['{http://test/ns}attr'] = 'value'
2596 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2597 self.assertEqual(
2598 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2599 etree.tostring(sub))
2600
2601 root.append(sub)
2602 self.assertEqual(
2603 _bytes('<root xmlns="http://test/ns">'
2604 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2605 '</root>'),
2606 etree.tostring(root))
2607
2609 etree = self.etree
2610
2611 root = etree.Element('root')
2612 sub = etree.Element('{http://test/ns}sub',
2613 nsmap={'test': 'http://test/ns'})
2614
2615 sub.attrib['{http://test/ns}attr'] = 'value'
2616 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2617 self.assertEqual(
2618 _bytes('<test:sub xmlns:test="http://test/ns" test:attr="value"/>'),
2619 etree.tostring(sub))
2620
2621 root.append(sub)
2622 self.assertEqual(
2623 _bytes('<root>'
2624 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2625 '</root>'),
2626 etree.tostring(root))
2627
2629 etree = self.etree
2630
2631 root = etree.Element('root')
2632 sub = etree.Element('{http://test/ns}sub',
2633 nsmap={None: 'http://test/ns'})
2634
2635 sub.attrib['{http://test/ns}attr'] = 'value'
2636 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2637 self.assertEqual(
2638 _bytes('<sub xmlns="http://test/ns" '
2639 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2640 etree.tostring(sub))
2641
2642 root.append(sub)
2643 self.assertEqual(
2644 _bytes('<root>'
2645 '<sub xmlns="http://test/ns"'
2646 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2647 '</root>'),
2648 etree.tostring(root))
2649
2651 etree = self.etree
2652
2653 root = etree.Element('{http://test/ns}root',
2654 nsmap={'test': 'http://test/ns',
2655 None: 'http://test/ns'})
2656 sub = etree.Element('{http://test/ns}sub',
2657 nsmap={None: 'http://test/ns'})
2658
2659 sub.attrib['{http://test/ns}attr'] = 'value'
2660 self.assertEqual(sub.attrib['{http://test/ns}attr'], 'value')
2661 self.assertEqual(
2662 _bytes('<sub xmlns="http://test/ns" '
2663 'xmlns:ns0="http://test/ns" ns0:attr="value"/>'),
2664 etree.tostring(sub))
2665
2666 root.append(sub)
2667 self.assertEqual(
2668 _bytes('<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2669 '<test:sub test:attr="value"/>'
2670 '</test:root>'),
2671 etree.tostring(root))
2672
2674 etree = self.etree
2675 r = {None: 'http://ns.infrae.com/foo',
2676 'hoi': 'http://ns.infrae.com/hoi'}
2677 e = etree.Element('{http://ns.infrae.com/foo}z', nsmap=r)
2678 tree = etree.ElementTree(element=e)
2679 etree.SubElement(e, '{http://ns.infrae.com/hoi}x')
2680 self.assertEqual(
2681 _bytes('<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>'),
2682 self._writeElement(e))
2683
2685 etree = self.etree
2686
2687 r = {None: 'http://ns.infrae.com/foo'}
2688 e1 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2689 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2690
2691 e1.append(e2)
2692
2693 self.assertEqual(
2694 None,
2695 e1.prefix)
2696 self.assertEqual(
2697 None,
2698 e1[0].prefix)
2699 self.assertEqual(
2700 '{http://ns.infrae.com/foo}bar',
2701 e1.tag)
2702 self.assertEqual(
2703 '{http://ns.infrae.com/foo}bar',
2704 e1[0].tag)
2705
2707 etree = self.etree
2708
2709 r = {None: 'http://ns.infrae.com/BAR'}
2710 e1 = etree.Element('{http://ns.infrae.com/BAR}bar', nsmap=r)
2711 e2 = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2712
2713 e1.append(e2)
2714
2715 self.assertEqual(
2716 None,
2717 e1.prefix)
2718 self.assertNotEqual(
2719 None,
2720 e2.prefix)
2721 self.assertEqual(
2722 '{http://ns.infrae.com/BAR}bar',
2723 e1.tag)
2724 self.assertEqual(
2725 '{http://ns.infrae.com/foo}bar',
2726 e2.tag)
2727
2729 ns_href = "http://a.b.c"
2730 one = self.etree.fromstring(
2731 _bytes('<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href))
2732 baz = one[0][0]
2733
2734 two = self.etree.fromstring(
2735 _bytes('<root xmlns:ns="%s"/>' % ns_href))
2736 two.append(baz)
2737 del one
2738
2739 self.assertEqual('{%s}baz' % ns_href, baz.tag)
2740 self.assertEqual(
2741 _bytes('<root xmlns:ns="%s"><ns:baz/></root>' % ns_href),
2742 self.etree.tostring(two))
2743
2757
2774
2785
2787 xml = ('<root>' +
2788 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2789 '<n64:x/>' + '</a>'*100 + '</root>').encode('utf8')
2790 root = self.etree.fromstring(xml)
2791 self.assertEqual(xml, self.etree.tostring(root))
2792 self.etree.cleanup_namespaces(root)
2793 self.assertEqual(
2794 b'<root>' + b'<a>'*64 + b'<a xmlns:n64="NS64">' + b'<a>'*35 +
2795 b'<n64:x/>' + b'</a>'*100 + b'</root>',
2796 self.etree.tostring(root))
2797
2799 xml = ('<root>' +
2800 ''.join('<a xmlns:n{n}="NS{n}">'.format(n=i) for i in range(100)) +
2801 '<n64:x xmlns:a="A" a:attr="X"/>' +
2802 '</a>'*100 +
2803 '</root>').encode('utf8')
2804 root = self.etree.fromstring(xml)
2805 self.assertEqual(xml, self.etree.tostring(root))
2806 self.etree.cleanup_namespaces(root, top_nsmap={'n64': 'NS64'})
2807 self.assertEqual(
2808 b'<root xmlns:n64="NS64">' + b'<a>'*100 +
2809 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>'*100 + b'</root>',
2810 self.etree.tostring(root))
2811
2813 xml = ('<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2814 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2815 '<foo>foo:bar</foo>'
2816 '</root>').encode('utf8')
2817 root = self.etree.fromstring(xml)
2818 self.assertEqual(xml, self.etree.tostring(root))
2819 self.etree.cleanup_namespaces(root, keep_ns_prefixes=['foo'])
2820 self.assertEqual(
2821 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2822 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2823 b'<foo>foo:bar</foo>'
2824 b'</root>',
2825 self.etree.tostring(root))
2826
2828 xml = ('<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2829 '<sub xmlns:foo="FOO">'
2830 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2831 '<foo>foo:bar</foo>'
2832 '</sub>'
2833 '</root>').encode('utf8')
2834 root = self.etree.fromstring(xml)
2835 self.assertEqual(xml, self.etree.tostring(root))
2836 self.etree.cleanup_namespaces(
2837 root,
2838 top_nsmap={'foo': 'FOO', 'unused1': 'UNUSED'},
2839 keep_ns_prefixes=['foo'])
2840 self.assertEqual(
2841 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2842 b'<sub>'
2843 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2844 b'<foo>foo:bar</foo>'
2845 b'</sub>'
2846 b'</root>',
2847 self.etree.tostring(root))
2848
2850 etree = self.etree
2851
2852 r = {None: 'http://ns.infrae.com/foo',
2853 'hoi': 'http://ns.infrae.com/hoi'}
2854 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=r)
2855 self.assertEqual(
2856 r,
2857 e.nsmap)
2858
2860 etree = self.etree
2861
2862 re = {None: 'http://ns.infrae.com/foo',
2863 'hoi': 'http://ns.infrae.com/hoi'}
2864 e = etree.Element('{http://ns.infrae.com/foo}bar', nsmap=re)
2865
2866 rs = {None: 'http://ns.infrae.com/honk',
2867 'top': 'http://ns.infrae.com/top'}
2868 s = etree.SubElement(e, '{http://ns.infrae.com/honk}bar', nsmap=rs)
2869
2870 r = re.copy()
2871 r.update(rs)
2872 self.assertEqual(re, e.nsmap)
2873 self.assertEqual(r, s.nsmap)
2874
2876 etree = self.etree
2877 el = etree.HTML('<hha:page-description>aa</hha:page-description>').find('.//page-description')
2878 self.assertEqual({'hha': None}, el.nsmap)
2879
2881 Element = self.etree.Element
2882 SubElement = self.etree.SubElement
2883
2884 a = Element('a')
2885 b = SubElement(a, 'b')
2886 c = SubElement(a, 'c')
2887 d = SubElement(b, 'd')
2888 e = SubElement(c, 'e')
2889 f = SubElement(c, 'f')
2890
2891 self.assertEqual(
2892 [a, b],
2893 list(a.getiterator('a', 'b')))
2894 self.assertEqual(
2895 [],
2896 list(a.getiterator('x', 'y')))
2897 self.assertEqual(
2898 [a, f],
2899 list(a.getiterator('f', 'a')))
2900 self.assertEqual(
2901 [c, e, f],
2902 list(c.getiterator('c', '*', 'a')))
2903 self.assertEqual(
2904 [],
2905 list(a.getiterator( (), () )))
2906
2908 Element = self.etree.Element
2909 SubElement = self.etree.SubElement
2910
2911 a = Element('a')
2912 b = SubElement(a, 'b')
2913 c = SubElement(a, 'c')
2914 d = SubElement(b, 'd')
2915 e = SubElement(c, 'e')
2916 f = SubElement(c, 'f')
2917
2918 self.assertEqual(
2919 [a, b],
2920 list(a.getiterator( ('a', 'b') )))
2921 self.assertEqual(
2922 [],
2923 list(a.getiterator( ('x', 'y') )))
2924 self.assertEqual(
2925 [a, f],
2926 list(a.getiterator( ('f', 'a') )))
2927 self.assertEqual(
2928 [c, e, f],
2929 list(c.getiterator( ('c', '*', 'a') )))
2930 self.assertEqual(
2931 [],
2932 list(a.getiterator( () )))
2933
2935 Element = self.etree.Element
2936 SubElement = self.etree.SubElement
2937
2938 a = Element('{a}a')
2939 b = SubElement(a, '{a}b')
2940 c = SubElement(a, '{a}c')
2941 d = SubElement(b, '{b}d')
2942 e = SubElement(c, '{a}e')
2943 f = SubElement(c, '{b}f')
2944 g = SubElement(c, 'g')
2945
2946 self.assertEqual(
2947 [a],
2948 list(a.getiterator('{a}a')))
2949 self.assertEqual(
2950 [],
2951 list(a.getiterator('{b}a')))
2952 self.assertEqual(
2953 [],
2954 list(a.getiterator('a')))
2955 self.assertEqual(
2956 [a,b,d,c,e,f,g],
2957 list(a.getiterator('*')))
2958 self.assertEqual(
2959 [f],
2960 list(c.getiterator('{b}*')))
2961 self.assertEqual(
2962 [d, f],
2963 list(a.getiterator('{b}*')))
2964 self.assertEqual(
2965 [g],
2966 list(a.getiterator('g')))
2967 self.assertEqual(
2968 [g],
2969 list(a.getiterator('{}g')))
2970 self.assertEqual(
2971 [g],
2972 list(a.getiterator('{}*')))
2973
2975 Element = self.etree.Element
2976 SubElement = self.etree.SubElement
2977
2978 a = Element('{a}a')
2979 b = SubElement(a, '{nsA}b')
2980 c = SubElement(b, '{nsB}b')
2981 d = SubElement(a, 'b')
2982 e = SubElement(a, '{nsA}e')
2983 f = SubElement(e, '{nsB}e')
2984 g = SubElement(e, 'e')
2985
2986 self.assertEqual(
2987 [b, c, d],
2988 list(a.getiterator('{*}b')))
2989 self.assertEqual(
2990 [e, f, g],
2991 list(a.getiterator('{*}e')))
2992 self.assertEqual(
2993 [a, b, c, d, e, f, g],
2994 list(a.getiterator('{*}*')))
2995
3020
3036
3053
3055 a = etree.Element("a")
3056 b = etree.SubElement(a, "b")
3057 c = etree.SubElement(a, "c")
3058 d1 = etree.SubElement(c, "d")
3059 d2 = etree.SubElement(c, "d")
3060 c.text = d1.text = 'TEXT'
3061
3062 tree = etree.ElementTree(a)
3063 self.assertEqual('.', tree.getelementpath(a))
3064 self.assertEqual('c/d[1]', tree.getelementpath(d1))
3065 self.assertEqual('c/d[2]', tree.getelementpath(d2))
3066
3067 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3068 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3069
3070 tree = etree.ElementTree(c)
3071 self.assertEqual('.', tree.getelementpath(c))
3072 self.assertEqual('d[2]', tree.getelementpath(d2))
3073 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3074
3075 tree = etree.ElementTree(b)
3076 self.assertEqual('.', tree.getelementpath(b))
3077 self.assertRaises(ValueError, tree.getelementpath, a)
3078 self.assertRaises(ValueError, tree.getelementpath, c)
3079 self.assertRaises(ValueError, tree.getelementpath, d2)
3080
3082 a = etree.Element("{http://ns1/}a")
3083 b = etree.SubElement(a, "{http://ns1/}b")
3084 c = etree.SubElement(a, "{http://ns1/}c")
3085 d1 = etree.SubElement(c, "{http://ns1/}d")
3086 d2 = etree.SubElement(c, "{http://ns2/}d")
3087 d3 = etree.SubElement(c, "{http://ns1/}d")
3088
3089 tree = etree.ElementTree(a)
3090 self.assertEqual('.', tree.getelementpath(a))
3091 self.assertEqual('{http://ns1/}c/{http://ns1/}d[1]',
3092 tree.getelementpath(d1))
3093 self.assertEqual('{http://ns1/}c/{http://ns2/}d',
3094 tree.getelementpath(d2))
3095 self.assertEqual('{http://ns1/}c/{http://ns1/}d[2]',
3096 tree.getelementpath(d3))
3097
3098 self.assertEqual(a, tree.find(tree.getelementpath(a)))
3099 self.assertEqual(b, tree.find(tree.getelementpath(b)))
3100 self.assertEqual(c, tree.find(tree.getelementpath(c)))
3101 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3102 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3103 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3104
3105 tree = etree.ElementTree(c)
3106 self.assertEqual('{http://ns1/}d[1]', tree.getelementpath(d1))
3107 self.assertEqual('{http://ns2/}d', tree.getelementpath(d2))
3108 self.assertEqual('{http://ns1/}d[2]', tree.getelementpath(d3))
3109 self.assertEqual(d1, tree.find(tree.getelementpath(d1)))
3110 self.assertEqual(d2, tree.find(tree.getelementpath(d2)))
3111 self.assertEqual(d3, tree.find(tree.getelementpath(d3)))
3112
3113 tree = etree.ElementTree(b)
3114 self.assertRaises(ValueError, tree.getelementpath, d1)
3115 self.assertRaises(ValueError, tree.getelementpath, d2)
3116
3123
3130
3139
3141 XML = self.etree.XML
3142 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>'))
3143 self.assertEqual(len(root.findall(".//{X}b")), 2)
3144 self.assertEqual(len(root.findall(".//{X}*")), 2)
3145 self.assertEqual(len(root.findall(".//b")), 3)
3146
3148 XML = self.etree.XML
3149 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3150 nsmap = {'xx': 'X'}
3151 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3152 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 2)
3153 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3154 nsmap = {'xx': 'Y'}
3155 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3156 self.assertEqual(len(root.findall(".//xx:*", namespaces=nsmap)), 1)
3157 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3158
3160 XML = self.etree.XML
3161 root = XML(_bytes('<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>'))
3162 nsmap = {'xx': 'X'}
3163 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3164 nsmap = {'xx': 'X', None: 'Y'}
3165 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3166 nsmap = {'xx': 'X', '': 'Y'}
3167 self.assertRaises(ValueError, root.findall, ".//xx:b", namespaces=nsmap)
3168
3175
3177 etree = self.etree
3178 e = etree.Element('foo')
3179 for i in range(10):
3180 etree.SubElement(e, 'a%s' % i)
3181 for i in range(10):
3182 self.assertEqual(
3183 i,
3184 e.index(e[i]))
3185 self.assertEqual(
3186 3, e.index(e[3], 3))
3187 self.assertRaises(
3188 ValueError, e.index, e[3], 4)
3189 self.assertRaises(
3190 ValueError, e.index, e[3], 0, 2)
3191 self.assertRaises(
3192 ValueError, e.index, e[8], 0, -3)
3193 self.assertRaises(
3194 ValueError, e.index, e[8], -5, -3)
3195 self.assertEqual(
3196 8, e.index(e[8], 0, -1))
3197 self.assertEqual(
3198 8, e.index(e[8], -12, -1))
3199 self.assertEqual(
3200 0, e.index(e[0], -12, -1))
3201
3203 etree = self.etree
3204 e = etree.Element('foo')
3205 for i in range(10):
3206 el = etree.SubElement(e, 'a%s' % i)
3207 el.text = "text%d" % i
3208 el.tail = "tail%d" % i
3209
3210 child0 = e[0]
3211 child1 = e[1]
3212 child2 = e[2]
3213
3214 e.replace(e[0], e[1])
3215 self.assertEqual(
3216 9, len(e))
3217 self.assertEqual(
3218 child1, e[0])
3219 self.assertEqual(
3220 child1.text, "text1")
3221 self.assertEqual(
3222 child1.tail, "tail1")
3223 self.assertEqual(
3224 child0.tail, "tail0")
3225 self.assertEqual(
3226 child2, e[1])
3227
3228 e.replace(e[-1], e[0])
3229 self.assertEqual(
3230 child1, e[-1])
3231 self.assertEqual(
3232 child1.text, "text1")
3233 self.assertEqual(
3234 child1.tail, "tail1")
3235 self.assertEqual(
3236 child2, e[0])
3237
3239 etree = self.etree
3240 e = etree.Element('foo')
3241 for i in range(10):
3242 etree.SubElement(e, 'a%s' % i)
3243
3244 new_element = etree.Element("test")
3245 new_element.text = "TESTTEXT"
3246 new_element.tail = "TESTTAIL"
3247 child1 = e[1]
3248 e.replace(e[0], new_element)
3249 self.assertEqual(
3250 new_element, e[0])
3251 self.assertEqual(
3252 "TESTTEXT",
3253 e[0].text)
3254 self.assertEqual(
3255 "TESTTAIL",
3256 e[0].tail)
3257 self.assertEqual(
3258 child1, e[1])
3259
3275
3293
3311
3329
3331 Element = self.etree.Element
3332 SubElement = self.etree.SubElement
3333 try:
3334 slice
3335 except NameError:
3336 print("slice() not found")
3337 return
3338
3339 a = Element('a')
3340 b = SubElement(a, 'b')
3341 c = SubElement(a, 'c')
3342 d = SubElement(a, 'd')
3343 e = SubElement(a, 'e')
3344
3345 x = Element('x')
3346 y = Element('y')
3347 z = Element('z')
3348
3349 self.assertRaises(
3350 ValueError,
3351 operator.setitem, a, slice(1,None,2), [x, y, z])
3352
3353 self.assertEqual(
3354 [b, c, d, e],
3355 list(a))
3356
3369
3371 XML = self.etree.XML
3372 root = XML(_bytes(
3373 '<?xml version="1.0"?>\n'
3374 '<root>' + '\n' * 65536 +
3375 '<p>' + '\n' * 65536 + '</p>\n' +
3376 '<br/>\n'
3377 '</root>'))
3378
3379 if self.etree.LIBXML_VERSION >= (2, 9):
3380 expected = [2, 131074, 131076]
3381 else:
3382 expected = [2, 65535, 65535]
3383
3384 self.assertEqual(expected, [el.sourceline for el in root.iter()])
3385
3393
3402
3412
3422
3428
3436
3442
3449
3455
3457 etree = self.etree
3458 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3459 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3460 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3461 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id)
3462
3463 xml = _bytes(xml_header + doctype_string + '<html><body></body></html>')
3464
3465 tree = etree.parse(BytesIO(xml))
3466 docinfo = tree.docinfo
3467 self.assertEqual(docinfo.encoding, "ascii")
3468 self.assertEqual(docinfo.xml_version, "1.0")
3469 self.assertEqual(docinfo.public_id, pub_id)
3470 self.assertEqual(docinfo.system_url, sys_id)
3471 self.assertEqual(docinfo.root_name, 'html')
3472 self.assertEqual(docinfo.doctype, doctype_string)
3473
3489
3501
3513
3519
3521 etree = self.etree
3522 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3523 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3524 doctype_string = _bytes('<!DOCTYPE html PUBLIC "%s" "%s">' % (pub_id, sys_id))
3525
3526 xml = _bytes('<!DOCTYPE root>\n<root/>')
3527 tree = etree.parse(BytesIO(xml))
3528 self.assertEqual(xml.replace(_bytes('<!DOCTYPE root>'), doctype_string),
3529 etree.tostring(tree, doctype=doctype_string))
3530
3532 etree = self.etree
3533 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3534 self.assertEqual(root.base, "http://no/such/url")
3535 self.assertEqual(
3536 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3537 root.base = "https://secret/url"
3538 self.assertEqual(root.base, "https://secret/url")
3539 self.assertEqual(
3540 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3541 "https://secret/url")
3542
3544 etree = self.etree
3545 root = etree.XML(_bytes("<root/>"), base_url="http://no/such/url")
3546 self.assertEqual(root.base, "http://no/such/url")
3547 self.assertEqual(
3548 root.get('{http://www.w3.org/XML/1998/namespace}base'), None)
3549 root.set('{http://www.w3.org/XML/1998/namespace}base',
3550 "https://secret/url")
3551 self.assertEqual(root.base, "https://secret/url")
3552 self.assertEqual(
3553 root.get('{http://www.w3.org/XML/1998/namespace}base'),
3554 "https://secret/url")
3555
3561
3566
3573
3587
3589 Element = self.etree.Element
3590
3591 a = Element('a')
3592 self.assertRaises(ValueError, setattr, a, "text", 'ha\0ho')
3593 self.assertRaises(ValueError, setattr, a, "tail", 'ha\0ho')
3594
3595 self.assertRaises(ValueError, Element, 'ha\0ho')
3596
3598 Element = self.etree.Element
3599
3600 a = Element('a')
3601 self.assertRaises(ValueError, setattr, a, "text",
3602 _str('ha\0ho'))
3603 self.assertRaises(ValueError, setattr, a, "tail",
3604 _str('ha\0ho'))
3605
3606 self.assertRaises(ValueError, Element,
3607 _str('ha\0ho'))
3608
3610 Element = self.etree.Element
3611
3612 a = Element('a')
3613 self.assertRaises(ValueError, setattr, a, "text", 'ha\x07ho')
3614 self.assertRaises(ValueError, setattr, a, "text", 'ha\x02ho')
3615
3616 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x07ho')
3617 self.assertRaises(ValueError, setattr, a, "tail", 'ha\x02ho')
3618
3619 self.assertRaises(ValueError, Element, 'ha\x07ho')
3620 self.assertRaises(ValueError, Element, 'ha\x02ho')
3621
3623 Element = self.etree.Element
3624
3625 a = Element('a')
3626 self.assertRaises(ValueError, setattr, a, "text",
3627 _str('ha\x07ho'))
3628 self.assertRaises(ValueError, setattr, a, "text",
3629 _str('ha\x02ho'))
3630
3631 self.assertRaises(ValueError, setattr, a, "tail",
3632 _str('ha\x07ho'))
3633 self.assertRaises(ValueError, setattr, a, "tail",
3634 _str('ha\x02ho'))
3635
3636 self.assertRaises(ValueError, Element,
3637 _str('ha\x07ho'))
3638 self.assertRaises(ValueError, Element,
3639 _str('ha\x02ho'))
3640
3642 Element = self.etree.Element
3643
3644 a = Element('a')
3645 self.assertRaises(ValueError, setattr, a, "text",
3646 _str('ha\u1234\x07ho'))
3647 self.assertRaises(ValueError, setattr, a, "text",
3648 _str('ha\u1234\x02ho'))
3649
3650 self.assertRaises(ValueError, setattr, a, "tail",
3651 _str('ha\u1234\x07ho'))
3652 self.assertRaises(ValueError, setattr, a, "tail",
3653 _str('ha\u1234\x02ho'))
3654
3655 self.assertRaises(ValueError, Element,
3656 _str('ha\u1234\x07ho'))
3657 self.assertRaises(ValueError, Element,
3658 _str('ha\u1234\x02ho'))
3659
3673
3678
3696
3716
3718 tostring = self.etree.tostring
3719 html = self.etree.fromstring(
3720 '<html><body>'
3721 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3722 '</body></html>',
3723 parser=self.etree.HTMLParser())
3724 self.assertEqual(html.tag, 'html')
3725 div = html.find('.//div')
3726 self.assertEqual(div.tail, '\r\n')
3727 result = tostring(div, method='html')
3728 self.assertEqual(
3729 result,
3730 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3731 result = tostring(div, method='html', with_tail=True)
3732 self.assertEqual(
3733 result,
3734 _bytes("<div><p>Some text<i>\r\n</i></p></div>\r\n"))
3735 result = tostring(div, method='html', with_tail=False)
3736 self.assertEqual(
3737 result,
3738 _bytes("<div><p>Some text<i>\r\n</i></p></div>"))
3739
3761
3763 tostring = self.etree.tostring
3764 XML = self.etree.XML
3765 ElementTree = self.etree.ElementTree
3766
3767 root = XML(_bytes("<root/>"))
3768
3769 tree = ElementTree(root)
3770 self.assertEqual(None, tree.docinfo.standalone)
3771
3772 result = tostring(root, xml_declaration=True, encoding="ASCII")
3773 self.assertEqual(result, _bytes(
3774 "<?xml version='1.0' encoding='ASCII'?>\n<root/>"))
3775
3776 result = tostring(root, xml_declaration=True, encoding="ASCII",
3777 standalone=True)
3778 self.assertEqual(result, _bytes(
3779 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>"))
3780
3781 tree = ElementTree(XML(result))
3782 self.assertEqual(True, tree.docinfo.standalone)
3783
3784 result = tostring(root, xml_declaration=True, encoding="ASCII",
3785 standalone=False)
3786 self.assertEqual(result, _bytes(
3787 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>"))
3788
3789 tree = ElementTree(XML(result))
3790 self.assertEqual(False, tree.docinfo.standalone)
3791
3811
3813 tostring = self.etree.tostring
3814 Element = self.etree.Element
3815 SubElement = self.etree.SubElement
3816
3817 a = Element('a')
3818 a.text = "A"
3819 a.tail = "tail"
3820 b = SubElement(a, 'b')
3821 b.text = "B"
3822 b.tail = _str("Søk på nettet")
3823 c = SubElement(a, 'c')
3824 c.text = "C"
3825
3826 result = tostring(a, method="text", encoding="UTF-16")
3827
3828 self.assertEqual(_str('ABSøk på nettetCtail').encode("UTF-16"),
3829 result)
3830
3832 tostring = self.etree.tostring
3833 Element = self.etree.Element
3834 SubElement = self.etree.SubElement
3835
3836 a = Element('a')
3837 a.text = _str('Søk på nettetA')
3838 a.tail = "tail"
3839 b = SubElement(a, 'b')
3840 b.text = "B"
3841 b.tail = _str('Søk på nettetB')
3842 c = SubElement(a, 'c')
3843 c.text = "C"
3844
3845 self.assertRaises(UnicodeEncodeError,
3846 tostring, a, method="text")
3847
3848 self.assertEqual(
3849 _str('Søk på nettetABSøk på nettetBCtail').encode('utf-8'),
3850 tostring(a, encoding="UTF-8", method="text"))
3851
3864
3880
3884
3899
3917
3930
3932 tostring = self.etree.tostring
3933 Element = self.etree.Element
3934 SubElement = self.etree.SubElement
3935
3936 a = Element('a')
3937 b = SubElement(a, 'b')
3938 c = SubElement(a, 'c')
3939 d = SubElement(c, 'd')
3940 self.assertTrue(isinstance(tostring(b, encoding=_unicode), _unicode))
3941 self.assertTrue(isinstance(tostring(c, encoding=_unicode), _unicode))
3942 self.assertEqual(_bytes('<b></b>'),
3943 canonicalize(tostring(b, encoding=_unicode)))
3944 self.assertEqual(_bytes('<c><d></d></c>'),
3945 canonicalize(tostring(c, encoding=_unicode)))
3946
3951
3966
3968 tostring = self.etree.tostring
3969 Element = self.etree.Element
3970 SubElement = self.etree.SubElement
3971
3972 a = Element('a')
3973 b = SubElement(a, 'b')
3974 c = SubElement(a, 'c')
3975
3976 result = tostring(a, encoding=_unicode)
3977 self.assertEqual(result, "<a><b/><c/></a>")
3978
3979 result = tostring(a, encoding=_unicode, pretty_print=False)
3980 self.assertEqual(result, "<a><b/><c/></a>")
3981
3982 result = tostring(a, encoding=_unicode, pretty_print=True)
3983 self.assertEqual(result, "<a>\n <b/>\n <c/>\n</a>\n")
3984
3996
3998 class SubEl(etree.ElementBase):
3999 pass
4000
4001 el1 = SubEl()
4002 el2 = SubEl()
4003 self.assertEqual('SubEl', el1.tag)
4004 self.assertEqual('SubEl', el2.tag)
4005 el1.other = el2
4006 el2.other = el1
4007
4008 del el1, el2
4009 gc.collect()
4010
4011
4025
4027 root = etree.Element('parent')
4028 c1 = etree.SubElement(root, 'child1')
4029 c2 = etree.SubElement(root, 'child2')
4030
4031 root.remove(c1)
4032 root.remove(c2)
4033 c1.addnext(c2)
4034 c1.tail = 'abc'
4035 c2.tail = 'xyz'
4036 del c1
4037
4038 c2.getprevious()
4039
4040 self.assertEqual('child1', c2.getprevious().tag)
4041 self.assertEqual('abc', c2.getprevious().tail)
4042
4043
4044
4045 - def _writeElement(self, element, encoding='us-ascii', compression=0):
4056
4101
4102 res_instance = res()
4103 parser = etree.XMLParser(load_dtd = True)
4104 parser.resolvers.add(res_instance)
4105
4106 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4107 parser = parser)
4108
4109 self.include(tree)
4110
4111 called = list(res_instance.called.items())
4112 called.sort()
4113 self.assertEqual(
4114 [("dtd", True), ("include", True), ("input", True)],
4115 called)
4116
4118 data = textwrap.dedent('''
4119 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4120 <foo/>
4121 <xi:include href="./test.xml" />
4122 </doc>
4123 ''')
4124
4125 class Resolver(etree.Resolver):
4126 called = {}
4127
4128 def resolve(self, url, id, context):
4129 if url.endswith("test_xinclude.xml"):
4130 assert not self.called.get("input")
4131 self.called["input"] = True
4132 return None
4133 elif url.endswith('/test5.xml'):
4134 assert not self.called.get("DONE")
4135 self.called["DONE"] = True
4136 return self.resolve_string('<DONE/>', context)
4137 else:
4138 _, filename = url.rsplit('/', 1)
4139 assert not self.called.get(filename)
4140 self.called[filename] = True
4141 next_data = data.replace(
4142 'test.xml', 'test%d.xml' % len(self.called))
4143 return self.resolve_string(next_data, context)
4144
4145 res_instance = Resolver()
4146 parser = etree.XMLParser(load_dtd=True)
4147 parser.resolvers.add(res_instance)
4148
4149 tree = etree.parse(fileInTestDir('include/test_xinclude.xml'),
4150 parser=parser)
4151
4152 self.include(tree)
4153
4154 called = list(res_instance.called.items())
4155 called.sort()
4156 self.assertEqual(
4157 [("DONE", True), ("input", True), ("test.xml", True),
4158 ("test2.xml", True), ("test3.xml", True), ("test4.xml", True)],
4159 called)
4160
4165
4171
4175 tree = self.parse(_bytes('<a><b/></a>'))
4176 f = BytesIO()
4177 tree.write_c14n(f)
4178 s = f.getvalue()
4179 self.assertEqual(_bytes('<a><b></b></a>'),
4180 s)
4181
4183 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4184 f = BytesIO()
4185 tree.write_c14n(f, compression=9)
4186 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4187 s = gzfile.read()
4188 self.assertEqual(_bytes('<a>'+'<b></b>'*200+'</a>'),
4189 s)
4190
4198
4207
4225
4237
4249
4251 tree = self.parse(_bytes(
4252 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4253 f = BytesIO()
4254 tree.write_c14n(f)
4255 s = f.getvalue()
4256 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4257 s)
4258 f = BytesIO()
4259 tree.write_c14n(f, exclusive=False)
4260 s = f.getvalue()
4261 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4262 s)
4263 f = BytesIO()
4264 tree.write_c14n(f, exclusive=True)
4265 s = f.getvalue()
4266 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4267 s)
4268
4269 f = BytesIO()
4270 tree.write_c14n(f, exclusive=True, inclusive_ns_prefixes=['z'])
4271 s = f.getvalue()
4272 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>'),
4273 s)
4274
4276 tree = self.parse(_bytes(
4277 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4278 s = etree.tostring(tree, method='c14n')
4279 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4280 s)
4281 s = etree.tostring(tree, method='c14n', exclusive=False)
4282 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4283 s)
4284 s = etree.tostring(tree, method='c14n', exclusive=True)
4285 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4286 s)
4287
4288 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4289 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>'),
4290 s)
4291
4293 tree = self.parse(_bytes(
4294 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4295 s = etree.tostring(tree.getroot(), method='c14n')
4296 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4297 s)
4298 s = etree.tostring(tree.getroot(), method='c14n', exclusive=False)
4299 self.assertEqual(_bytes('<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4300 s)
4301 s = etree.tostring(tree.getroot(), method='c14n', exclusive=True)
4302 self.assertEqual(_bytes('<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>'),
4303 s)
4304
4305 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=False)
4306 self.assertEqual(_bytes('<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4307 s)
4308 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True)
4309 self.assertEqual(_bytes('<z:b xmlns:z="http://cde"></z:b>'),
4310 s)
4311
4312 s = etree.tostring(tree.getroot()[0], method='c14n', exclusive=True, inclusive_ns_prefixes=['y'])
4313 self.assertEqual(_bytes('<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>'),
4314 s)
4315
4317 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4318 tree = self.parse(_bytes(
4319 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>'))
4320
4321 s = etree.tostring(tree, method='c14n', exclusive=True, inclusive_ns_prefixes=['x', 'y', 'z'])
4322 self.assertEqual(_bytes('<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>'),
4323 s)
4324
4328 tree = self.parse(_bytes('<a><b/></a>'))
4329 f = BytesIO()
4330 tree.write(f)
4331 s = f.getvalue()
4332 self.assertEqual(_bytes('<a><b/></a>'),
4333 s)
4334
4336 tree = self.parse(_bytes('<a><b/></a>'))
4337 f = BytesIO()
4338 tree.write(f, doctype='HUHU')
4339 s = f.getvalue()
4340 self.assertEqual(_bytes('HUHU\n<a><b/></a>'),
4341 s)
4342
4344 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4345 f = BytesIO()
4346 tree.write(f, compression=9)
4347 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4348 s = gzfile.read()
4349 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4350 s)
4351
4353 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4354 f = BytesIO()
4355 tree.write(f, compression=9, doctype='<!DOCTYPE a>')
4356 with closing(gzip.GzipFile(fileobj=BytesIO(f.getvalue()))) as gzfile:
4357 s = gzfile.read()
4358 self.assertEqual(_bytes('<!DOCTYPE a>\n<a>'+'<b/>'*200+'</a>'),
4359 s)
4360
4362 tree = self.parse(_bytes('<a>'+'<b/>'*200+'</a>'))
4363 f = BytesIO()
4364 tree.write(f, compression=0)
4365 s0 = f.getvalue()
4366
4367 f = BytesIO()
4368 tree.write(f)
4369 self.assertEqual(f.getvalue(), s0)
4370
4371 f = BytesIO()
4372 tree.write(f, compression=1)
4373 s = f.getvalue()
4374 self.assertTrue(len(s) <= len(s0))
4375 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4376 s1 = gzfile.read()
4377
4378 f = BytesIO()
4379 tree.write(f, compression=9)
4380 s = f.getvalue()
4381 self.assertTrue(len(s) <= len(s0))
4382 with closing(gzip.GzipFile(fileobj=BytesIO(s))) as gzfile:
4383 s9 = gzfile.read()
4384
4385 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4386 s0)
4387 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4388 s1)
4389 self.assertEqual(_bytes('<a>'+'<b/>'*200+'</a>'),
4390 s9)
4391
4399
4408
4416
4425
4428 etree = etree
4429
4451
4453 """This can't really be tested as long as there isn't a way to
4454 reset the logging setup ...
4455 """
4456 parse = self.etree.parse
4457
4458 messages = []
4459 class Logger(self.etree.PyErrorLog):
4460 def log(self, entry, message, *args):
4461 messages.append(message)
4462
4463 self.etree.use_global_python_log(Logger())
4464 f = BytesIO('<a><b></c></b></a>')
4465 try:
4466 parse(f)
4467 except SyntaxError:
4468 pass
4469 f.close()
4470
4471 self.assertTrue([ message for message in messages
4472 if 'mismatch' in message ])
4473 self.assertTrue([ message for message in messages
4474 if ':PARSER:' in message])
4475 self.assertTrue([ message for message in messages
4476 if ':ERR_TAG_NAME_MISMATCH:' in message ])
4477 self.assertTrue([ message for message in messages
4478 if ':1:15:' in message ])
4479
4482 etree = etree
4483
4487
4489 class Target(object):
4490 def start(self, tag, attrib):
4491 return 'start(%s)' % tag
4492 def end(self, tag):
4493 return 'end(%s)' % tag
4494 def close(self):
4495 return 'close()'
4496
4497 parser = self.etree.XMLPullParser(target=Target())
4498 events = parser.read_events()
4499
4500 parser.feed('<root><element>')
4501 self.assertFalse(list(events))
4502 self.assertFalse(list(events))
4503 parser.feed('</element><child>')
4504 self.assertEqual([('end', 'end(element)')], list(events))
4505 parser.feed('</child>')
4506 self.assertEqual([('end', 'end(child)')], list(events))
4507 parser.feed('</root>')
4508 self.assertEqual([('end', 'end(root)')], list(events))
4509 self.assertFalse(list(events))
4510 self.assertEqual('close()', parser.close())
4511
4513 class Target(object):
4514 def start(self, tag, attrib):
4515 return 'start(%s)' % tag
4516 def end(self, tag):
4517 return 'end(%s)' % tag
4518 def close(self):
4519 return 'close()'
4520
4521 parser = self.etree.XMLPullParser(
4522 ['start', 'end'], target=Target())
4523 events = parser.read_events()
4524
4525 parser.feed('<root><element>')
4526 self.assertEqual(
4527 [('start', 'start(root)'), ('start', 'start(element)')],
4528 list(events))
4529 self.assertFalse(list(events))
4530 parser.feed('</element><child>')
4531 self.assertEqual(
4532 [('end', 'end(element)'), ('start', 'start(child)')],
4533 list(events))
4534 parser.feed('</child>')
4535 self.assertEqual(
4536 [('end', 'end(child)')],
4537 list(events))
4538 parser.feed('</root>')
4539 self.assertEqual(
4540 [('end', 'end(root)')],
4541 list(events))
4542 self.assertFalse(list(events))
4543 self.assertEqual('close()', parser.close())
4544
4546 parser = self.etree.XMLPullParser(
4547 ['start', 'end'], target=etree.TreeBuilder())
4548 events = parser.read_events()
4549
4550 parser.feed('<root><element>')
4551 self.assert_event_tags(
4552 events, [('start', 'root'), ('start', 'element')])
4553 self.assertFalse(list(events))
4554 parser.feed('</element><child>')
4555 self.assert_event_tags(
4556 events, [('end', 'element'), ('start', 'child')])
4557 parser.feed('</child>')
4558 self.assert_event_tags(
4559 events, [('end', 'child')])
4560 parser.feed('</root>')
4561 self.assert_event_tags(
4562 events, [('end', 'root')])
4563 self.assertFalse(list(events))
4564 root = parser.close()
4565 self.assertEqual('root', root.tag)
4566
4568 class Target(etree.TreeBuilder):
4569 def end(self, tag):
4570 el = super(Target, self).end(tag)
4571 el.tag += '-huhu'
4572 return el
4573
4574 parser = self.etree.XMLPullParser(
4575 ['start', 'end'], target=Target())
4576 events = parser.read_events()
4577
4578 parser.feed('<root><element>')
4579 self.assert_event_tags(
4580 events, [('start', 'root'), ('start', 'element')])
4581 self.assertFalse(list(events))
4582 parser.feed('</element><child>')
4583 self.assert_event_tags(
4584 events, [('end', 'element-huhu'), ('start', 'child')])
4585 parser.feed('</child>')
4586 self.assert_event_tags(
4587 events, [('end', 'child-huhu')])
4588 parser.feed('</root>')
4589 self.assert_event_tags(
4590 events, [('end', 'root-huhu')])
4591 self.assertFalse(list(events))
4592 root = parser.close()
4593 self.assertEqual('root-huhu', root.tag)
4594
4624
4625
4626 if __name__ == '__main__':
4627 print('to test use test.py %s' % __file__)
4628