4 # Compare two XML files for structural and content equivalence. Used for
13 use XML::NamespaceSupport;
15 my $parser = XML::DOM::Parser->new;
16 my $d1 = $parser->parsefile($ARGV[0]);
17 my $d2 = $parser->parsefile($ARGV[1]);
18 my $nsup1 = XML::NamespaceSupport->new;
19 my $nsup2 = XML::NamespaceSupport->new;
21 compare($d1, $d2, $nsup1, $nsup2);
24 my ($n1, $n2, $nsup1, $nsup2) = @_;
26 if ($n1->getNodeType != $n2->getNodeType) {
27 printf STDERR "Node types don't match (%u vs. %u)\n",
28 $n1->getNodeType, $n2->getNodeType;
35 if ($n1->getNodeType == XML::DOM::ELEMENT_NODE) {
36 process_namespaces($n1, $nsup1);
37 process_namespaces($n2, $nsup2);
40 my ($nsuri1, undef, $lname1) = $nsup1->process_element_name($n1->getNodeName);
41 my ($nsuri2, undef, $lname2) = $nsup2->process_element_name($n2->getNodeName);
46 # compare element names
47 unless ($nsuri1 eq $nsuri2 && $lname1 eq $lname2) {
48 print STDERR "$nsuri1/$lname1 != $nsuri2/$lname2\n";
53 my $attrs1 = $n1->getAttributes;
54 my $attrs2 = $n2->getAttributes;
56 # this will need some special care, since we ignore xmlns= attributes; defer
57 # to its own function so it's easier to do comparison both ways
58 compare_attr_list($attrs1, $attrs2, $nsup1, $nsup2) if (defined($attrs1));
59 compare_attr_list($attrs2, $attrs1, $nsup2, $nsup1) if (defined($attrs2));
61 # if this is a text node, check the contents
62 if ($n1->getNodeType == XML::DOM::TEXT_NODE) {
63 my $d1 = $n1->getData;
64 my $d2 = $n2->getData;
66 # ignore leading/trailing whitespace
72 # compress other whitespace
79 print STDERR "$nsuri1/$lname1 has differing textual content ('$d1' vs. '$d2')\n";
84 # this element is ok, let's compare all children
85 my $c1 = $n1->getChildNodes;
86 my $c2 = $n2->getChildNodes;
91 # find all elements except comments and blanks (not perfect, since we don't get
92 # compression, but ok)
93 for my $i (0..($c1->getLength-1)) {
94 my $item = $c1->item($i);
95 next if ($item->getNodeType == XML::DOM::COMMENT_NODE);
96 next if ($item->getNodeType == XML::DOM::TEXT_NODE && $item->getData !~ /\S/);
100 for my $i (0..($c2->getLength-1)) {
101 my $item = $c2->item($i);
102 next if ($item->getNodeType == XML::DOM::COMMENT_NODE);
103 next if ($item->getNodeType == XML::DOM::TEXT_NODE && $item->getData !~ /\S/);
108 if (scalar @c1 != scalar @c2) {
109 print STDERR "$nsuri1/$lname1 has differing number of children\n";
113 for my $i (0..$#c1) {
114 compare($c1[$i], $c2[$i], $nsup1, $nsup2);
121 sub process_namespaces {
122 my ($node, $nsup) = @_;
124 my $attrs = $node->getAttributes;
125 return unless defined($attrs);
127 for my $attr ($attrs->getValues) {
128 my $name = $attr->getName;
129 if ($name =~ /^xmlns:(.*)$/) {
130 $nsup->declare_prefix($1, $attr->getValue);
135 sub compare_attr_list {
136 my ($attrs1, $attrs2, $nsup1, $nsup2) = @_;
138 for my $attr1 ($attrs1->getValues) {
139 my $name = $attr1->getName;
140 next if ($name =~ /^xmlns:(.*)$/);
142 my ($nsuri1, undef, $lname1) = $nsup1->process_attribute_name($attr1->getName);
145 if (!defined($attrs2)) {
146 # n2 has no attributes at all
147 print STDERR "Attribute $nsuri1/$lname1 exists on one side but not the other\n";
152 for my $attr2 ($attrs2->getValues) {
153 next if ($attr2->getName =~ /^xmlns:(.*)$/);
154 my ($nsuri2, undef, $lname2) = $nsup2->process_attribute_name($attr2->getName);
157 if ($nsuri1 eq $nsuri2 && $lname1 eq $lname2) {
158 $attr2_found = $attr2;
163 if (!defined($attr2_found)) {
164 print STDERR "Attribute $nsuri1/$lname1 exists on one side but not the other\n";
168 if ($attr1->getValue ne $attr2_found->getValue) {
169 print STDERR "Attribute $nsuri1/$lname1 has differing values\n";