C
C#4mo ago
Moonlit

✅ How to clean up a stream of incomplete JSON data?

I'm working on adding some discord capabilities to a bot I'm working on but the data I get is really messy? I seem to just get a constant stream of data but I don't see any actual identifier of when a message ends? So I was wondering if there is any way that I could possibly keep adding a buffer to a StringBuilder and then try and extract the first json object, add it to a Channel and then remove it from the StringBuilder and keep looping it? This is 2 bits that I was working on so far.. I've cut out a lot of stuff that didn't work
No description
No description
21 Replies
Moonlit
MoonlitOP4mo ago
Here is an example of 2 and half logs of data that I get
[Discord] [Initialize] {"t":null,"s":null,"op":10,"d":{"heartbeat_interval":41250,"_trace":["[\"gateway-prd-us-east1-b-mzd7\",{\"micros\":0.0}]"]}}

[Discord] [Initialize] {"t":"READY","s":1,"op":0,"d":{"v":6,"user_settings":{},"user":{"verified":true,"username":"WittleMoonBot","mfa_enabled":true,"id":"REDACTED","global_name":null,"flags":0,"email":null,"discriminator":"9708","clan":null,"bot":true,"avatar":"6133452f2cf3d5e5b01147d0f3ee3857"},"session_type":"normal","session_id":"6e225c9baed4df287ee19b0b4d5c9f47","resume_gateway_url":"wss://gateway-us-east1-b.discord.gg","relationships":[],"private_channels":[],"presences":[],"guilds":[{"unavailable":true,"id":"1201942350667190332"}],"guild_join_requests":[],"geo_ordered_rtc_regions":["rotterdam","paris","frankfurt","frankfurt-two","milan"],"auth":{},"application":{"id":"REDACTED","flags":0},"_trace":["[\"gateway-prd-us-east1-b-mzd7\",{\"micros\":63972,\"calls\":[\"id_created\",{\"micros\":1152,\"calls\":[]},\"session_lookup_time\",{\"micros\":12967,\"calls\":[]},\"session_lookup_finished\",{\"micros\":12,\"calls\":[]},\"discord-sessions-prd-1-10\",{\"micros\":49581,\"calls\":[\"start_session\",{\"micros\":40583,\"calls\":[\"discord-api-rpc-85579b8f54-xnthg\",{\"micros\":36822,\"calls\":[\"get_user\",{\"micros\":5389},\"get_guilds\",{\"micros\":2989},\"send_scheduled_deletion_message\",{\"micros\":8},\"guild_join_requests\",{\"micros\":226},\"authorized_ip_coro\",{\"micros\":8}]}]},\"starting_guild_connect\",{\"micros\":43,\"calls\":[]},\"presence_started\",{\"micros\":245,\"calls\":[]},\"guilds_started\",{\"micros\":75,\"calls\":[]},\"guilds_connect\",{\"micros\":2,\"calls\":[]},\"presence_connect\",{\"micros\":8611,\"calls\":[]},\"connect_finished\",{\"micros\":8615,\"calls\":[]},\"build_ready\",{\"micros\":15,\"calls\":[]},\"clean_ready\",{\"micros\":0,\"calls\":[]},\"optimize_ready\",{\"micros\":1,\"calls\":[]},\"split_ready\",{\"micros\":0,\"calls\":[]}]}]}]"]}}

[Discord] [Initialize] {"t":"GUILD_CREATE","s":2,"op":0,"d":{"premium_tier":0,"splash":null,"application_id":null,"presences":[],"max_members":500000,"emojis":[{"version":1709808013726,"roles":[],"require_colons":true,"name":"Moonlit_Rave","managed":false,"id":"1215247599691505675","available":true,"animated":true},{"version":1722587833223,"roles":[],"require_colons":true, ...
[Discord] [Initialize] {"t":null,"s":null,"op":10,"d":{"heartbeat_interval":41250,"_trace":["[\"gateway-prd-us-east1-b-mzd7\",{\"micros\":0.0}]"]}}

[Discord] [Initialize] {"t":"READY","s":1,"op":0,"d":{"v":6,"user_settings":{},"user":{"verified":true,"username":"WittleMoonBot","mfa_enabled":true,"id":"REDACTED","global_name":null,"flags":0,"email":null,"discriminator":"9708","clan":null,"bot":true,"avatar":"6133452f2cf3d5e5b01147d0f3ee3857"},"session_type":"normal","session_id":"6e225c9baed4df287ee19b0b4d5c9f47","resume_gateway_url":"wss://gateway-us-east1-b.discord.gg","relationships":[],"private_channels":[],"presences":[],"guilds":[{"unavailable":true,"id":"1201942350667190332"}],"guild_join_requests":[],"geo_ordered_rtc_regions":["rotterdam","paris","frankfurt","frankfurt-two","milan"],"auth":{},"application":{"id":"REDACTED","flags":0},"_trace":["[\"gateway-prd-us-east1-b-mzd7\",{\"micros\":63972,\"calls\":[\"id_created\",{\"micros\":1152,\"calls\":[]},\"session_lookup_time\",{\"micros\":12967,\"calls\":[]},\"session_lookup_finished\",{\"micros\":12,\"calls\":[]},\"discord-sessions-prd-1-10\",{\"micros\":49581,\"calls\":[\"start_session\",{\"micros\":40583,\"calls\":[\"discord-api-rpc-85579b8f54-xnthg\",{\"micros\":36822,\"calls\":[\"get_user\",{\"micros\":5389},\"get_guilds\",{\"micros\":2989},\"send_scheduled_deletion_message\",{\"micros\":8},\"guild_join_requests\",{\"micros\":226},\"authorized_ip_coro\",{\"micros\":8}]}]},\"starting_guild_connect\",{\"micros\":43,\"calls\":[]},\"presence_started\",{\"micros\":245,\"calls\":[]},\"guilds_started\",{\"micros\":75,\"calls\":[]},\"guilds_connect\",{\"micros\":2,\"calls\":[]},\"presence_connect\",{\"micros\":8611,\"calls\":[]},\"connect_finished\",{\"micros\":8615,\"calls\":[]},\"build_ready\",{\"micros\":15,\"calls\":[]},\"clean_ready\",{\"micros\":0,\"calls\":[]},\"optimize_ready\",{\"micros\":1,\"calls\":[]},\"split_ready\",{\"micros\":0,\"calls\":[]}]}]}]"]}}

[Discord] [Initialize] {"t":"GUILD_CREATE","s":2,"op":0,"d":{"premium_tier":0,"splash":null,"application_id":null,"presences":[],"max_members":500000,"emojis":[{"version":1709808013726,"roles":[],"require_colons":true,"name":"Moonlit_Rave","managed":false,"id":"1215247599691505675","available":true,"animated":true},{"version":1722587833223,"roles":[],"require_colons":true, ...
ero
ero4mo ago
_webSocket is what here? A stream?
Moonlit
MoonlitOP4mo ago
It's a "ClientWebSocket" I'm kinda new to networking stuff, but it's whatever my connection to Discord is
ero
ero4mo ago
What in the Well I mean... That's just how that class works I guess, yeah What you can do instead is collect just the bytes and then create the string at the very end
Moonlit
MoonlitOP4mo ago
How would I know when I've recieved the entire message though?
ero
ero4mo ago
When result is < buffer.Length, right?
Moonlit
MoonlitOP4mo ago
Yeh I suppose so... If at any rare time, a message ends up being the exact size of buffer, it'd break the entire thing though? I guess it wouldn't break... It just wouldn't process any messages until new ones are sent to make the buffer not match again So once I know all my Json objects are complete. Is there an easy Json function to extract them all? I made a channel to put messages in. Thinking of having a task run independently on a seperate thread to just pull from the channel
ero
ero4mo ago
We usually recommend System.Text.Json for de/serialization STJ can accept objects like streams to read its json from But this web socket class is weird...
Moonlit
MoonlitOP4mo ago
If you have a suggestion of a better way of doing it, I'm all ears Like I said, I'm pretty new... So I was just trying to be consistent with using sockets What is STJ?
ero
ero4mo ago
System.Text.Json I don't know too much about discord bots. I usually just use one of the existing discord libraries I hear Remora.Discord and NetCord are good
Moonlit
MoonlitOP4mo ago
Well I'm trying to challenge myself, so I'm not using any bot libs I want to use Discords API directly If I wanted to use a lib, I'd just run it in Discord.js anyway which makes everything really simplistic But I want to learn how to network and these libs bypass all of that It's just a question of if ClientWebSocket is the right class? Also if I am sticking with that... Your suggestion, am I meant to be creating a Queue of byte arrays to store the data before turning it all into a string?
ero
ero4mo ago
I was thinking of a simple list And AddRange on it Rent the buffer from the arraypool
Moonlit
MoonlitOP4mo ago
Rent?
ero
ero4mo ago
var buffer = ArrayPool<byte>.Shared.Rent(8192);
var buffer = ArrayPool<byte>.Shared.Rent(8192);
You return it when you don't need it anymore And your call stays allocation free
Moonlit
MoonlitOP4mo ago
Do you have an example of implementing that? Because I don't quite understand what that does or how it pairs with my _webSocket.ReceiveAsync
ero
ero4mo ago
My idea was something like this
var bytes = new List<byte>();
var buffer = new ArraySegment<byte>(ArrayPool<byte>.Shared.Rent(8192));

while (_webSocket.State == WebSocketState.Open)
{
var result = await _webSocket.ReceiveAsync(buffer, CancellationToken.None);
bytes.AddRange(buffer[..result.Count]);

// ...
}

var json = Encoding.UTF8.GetString(CollectionsMarshal.AsSpan(bytes));

ArrayPool<byte>.Shared.Return(buffer.Array);
var bytes = new List<byte>();
var buffer = new ArraySegment<byte>(ArrayPool<byte>.Shared.Rent(8192));

while (_webSocket.State == WebSocketState.Open)
{
var result = await _webSocket.ReceiveAsync(buffer, CancellationToken.None);
bytes.AddRange(buffer[..result.Count]);

// ...
}

var json = Encoding.UTF8.GetString(CollectionsMarshal.AsSpan(bytes));

ArrayPool<byte>.Shared.Return(buffer.Array);
You can probably also just statically allocate a buffer
Moonlit
MoonlitOP4mo ago
:Velion_Think: I See what you mean. I don't understand the shared bytes thing but I think I get the general idea of how to get my bot working. I've also never heard of CollectionsMarshal but I can look that up Thanks for the love :Moon_JJ_Love:
ero
ero4mo ago
Don't worry about the array pool, it's unnecessary. I'd much more recommend just having one persistent buffer Like private static readonly byte[] _buffer = new byte[8192]; And just use that. You're overwriting it all the time anyway
Moonlit
MoonlitOP4mo ago
Okay so I think this is what you mean
No description
Moonlit
MoonlitOP4mo ago
I still need a way to split the json objects though Just noticed the while condition is wrong
Moonlit
MoonlitOP4mo ago
I think I got it! It seems to work :Moon_JJ_UwU: Thanks! Also I think each message is actually just 1 json object... I think the guild create one is just absolutely huge Thanks for all the help
No description

Did you find this page helpful?