C
C#2w ago
orlac

XmlReader skipping every other node while iterating

I have this code to parse a simple xml file:
private async IAsyncEnumerable<Compound> ParseCppReferenceIndexTags() {
string indexFilePath = Path.Join(
this.DocsRootDir.AbsolutePath,
@"cppreference-doxygen-local.tag.xml"
);

FileStream stream = new(indexFilePath, FileMode.Open);
await foreach (XElement elem in StreamElementsAsync(stream, "compound")) {
Compound compoundTag = new() {
Name = elem.Element("name")?.Value,
Type = elem.Attribute("kind")?.Value,
FileName = elem.Element("filename")?.Value,
NameSpace = elem.Attribute("kind")?.Value == "file"
? elem.Element("namespace")?.Value
: null
};

// other stuff..
yield return compoundTag;
}
}

private static async IAsyncEnumerable<XElement> StreamElementsAsync(Stream stream, string matchName) {
XmlReaderSettings settings = new() {
Async = true,
IgnoreWhitespace = true
};

using XmlReader reader = XmlReader.Create(stream, settings);
while (await reader.ReadAsync()) {
while (reader.ReadToFollowing(matchName))
if (await XNode.ReadFromAsync(reader, CancellationToken.None) is XElement elem)
yield return elem;
}
}
private async IAsyncEnumerable<Compound> ParseCppReferenceIndexTags() {
string indexFilePath = Path.Join(
this.DocsRootDir.AbsolutePath,
@"cppreference-doxygen-local.tag.xml"
);

FileStream stream = new(indexFilePath, FileMode.Open);
await foreach (XElement elem in StreamElementsAsync(stream, "compound")) {
Compound compoundTag = new() {
Name = elem.Element("name")?.Value,
Type = elem.Attribute("kind")?.Value,
FileName = elem.Element("filename")?.Value,
NameSpace = elem.Attribute("kind")?.Value == "file"
? elem.Element("namespace")?.Value
: null
};

// other stuff..
yield return compoundTag;
}
}

private static async IAsyncEnumerable<XElement> StreamElementsAsync(Stream stream, string matchName) {
XmlReaderSettings settings = new() {
Async = true,
IgnoreWhitespace = true
};

using XmlReader reader = XmlReader.Create(stream, settings);
while (await reader.ReadAsync()) {
while (reader.ReadToFollowing(matchName))
if (await XNode.ReadFromAsync(reader, CancellationToken.None) is XElement elem)
yield return elem;
}
}
The XML file looks like this:
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<tagfile>
<compound kind="file">
<name>algorithm</name>
<filename>cpp/header/algorithm</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>any</name>
<filename>cpp/header/any</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>array</name>
<filename>cpp/header/array</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>atomic</name>
<filename>cpp/header/atomic</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>bit</name>
<filename>cpp/header/bit</filename>
<namespace>std</namespace>
</compound>
...
</tagfile>
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<tagfile>
<compound kind="file">
<name>algorithm</name>
<filename>cpp/header/algorithm</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>any</name>
<filename>cpp/header/any</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>array</name>
<filename>cpp/header/array</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>atomic</name>
<filename>cpp/header/atomic</filename>
<namespace>std</namespace>
</compound>
<compound kind="file">
<name>bit</name>
<filename>cpp/header/bit</filename>
<namespace>std</namespace>
</compound>
...
</tagfile>
the data eventually makes its way into a sqlite db, but for some reason, when it's iterating through the top level component nodes, it seems to be skipping every other node because of some bad logic in StreamElementsAsync(). Is there something I should be using other than reader.ReadToFollowing(matchName) in the while loop to traverse all of the top level nodes? If that seems correct, does anyone happen to see anything else that might be off in the logic somewhere?
1 Reply
orlac
orlac2w ago
this seems to have fixed it:
private static async IAsyncEnumerable<XElement> StreamElementsAsync(Stream stream, string matchName) {
XmlReaderSettings settings = new() {
Async = true,
IgnoreWhitespace = true
};

using XmlReader reader = XmlReader.Create(stream, settings);
while (await reader.ReadAsync()) {
while (reader.NodeType == XmlNodeType.Element && reader.Name == matchName) {
if (await XNode.ReadFromAsync(reader, CancellationToken.None) is XElement elem)
yield return elem;
}
}
}
private static async IAsyncEnumerable<XElement> StreamElementsAsync(Stream stream, string matchName) {
XmlReaderSettings settings = new() {
Async = true,
IgnoreWhitespace = true
};

using XmlReader reader = XmlReader.Create(stream, settings);
while (await reader.ReadAsync()) {
while (reader.NodeType == XmlNodeType.Element && reader.Name == matchName) {
if (await XNode.ReadFromAsync(reader, CancellationToken.None) is XElement elem)
yield return elem;
}
}
}