C
C#ā€¢2y ago
ero

Parsing a string into a Vector of bytes most efficiently

Given a string with an even amount of characters, what's the most efficient way to parse that into a Vector<byte> along with another Vector<byte> of bit masks? A sample input is 12 34 ?? 78 87 ?5 ?3 21 -- these are meant to be hex bytes -- which would get turned into < 0x12 0x34 0x00 0x78 0x87 0x05 0x03 0x21 > for the values, and < 0xFF 0xFF 0x00 0xFF 0xFF 0x0F 0x0F 0xFF > for the masks. Currently I'm doing the input.Length % 2 != 0 check first, of course, followed by Regex.Matches(input, @"..").Select(match => match.Value).ToList();. From here I do this to parse all of the bytes;
int length = bytes.Length;
if (length < Vector<byte>.Count)
length = Vector<byte>.Count;

byte[] values = new byte[length], masks = new byte[length];

for (int i = 0; i < bytes.Length; i++)
{
var currByte = bytes[i];

var hasUpper = byte.TryParse(currByte[0].ToString(), NumberStyles.HexNumber, null, out byte upper);
var hasLower = byte.TryParse(currByte[1].ToString(), NumberStyles.HexNumber, null, out byte lower);

switch ((hasUpper, hasLower))
{
case (true, true):
{
values[i] = (byte)((upper << 4) + lower);
masks[i] = 0xFF;
break;
}

case (true, false):
{
values[i] = (byte)(upper << 4);
masks[i] = 0xF0;
break;
}

case (false, true):
{
values[i] = lower;
masks[i] = 0x0F;
break;
}

case (false, false):
{
values[i] = 0x00;
masks[i] = 0x00;
break;
}
}
}

return (length, new(values), new(masks));
int length = bytes.Length;
if (length < Vector<byte>.Count)
length = Vector<byte>.Count;

byte[] values = new byte[length], masks = new byte[length];

for (int i = 0; i < bytes.Length; i++)
{
var currByte = bytes[i];

var hasUpper = byte.TryParse(currByte[0].ToString(), NumberStyles.HexNumber, null, out byte upper);
var hasLower = byte.TryParse(currByte[1].ToString(), NumberStyles.HexNumber, null, out byte lower);

switch ((hasUpper, hasLower))
{
case (true, true):
{
values[i] = (byte)((upper << 4) + lower);
masks[i] = 0xFF;
break;
}

case (true, false):
{
values[i] = (byte)(upper << 4);
masks[i] = 0xF0;
break;
}

case (false, true):
{
values[i] = lower;
masks[i] = 0x0F;
break;
}

case (false, false):
{
values[i] = 0x00;
masks[i] = 0x00;
break;
}
}
}

return (length, new(values), new(masks));
Obviously the big one is using Regex. Any ideas?
6 Replies
TimberStalker
TimberStalkerā€¢2y ago
what type is the bytes variable?
ero
eroā€¢2y ago
string[]. Using the example, the contents are [ "12", "34", "??", "78", "87", "?5", "?3", "21" ] watch me optimizing removing whitespace from a string
TimberStalker
TimberStalkerā€¢2y ago
why use question marks instead of 0. Is that just for the mask?
ero
eroā€¢2y ago
for the mask, yeah it should be any non-hexdigit character
char* output = stackalloc char[INPUT.Length];

fixed (char* pInput = INPUT)
{
for (int i = 0, j = 0; i < INPUT.Length; i++)
{
char c = *(pInput + i);

if (!char.IsWhiteSpace(c))
output[j++] = c;
}

return new string(output);
}
char* output = stackalloc char[INPUT.Length];

fixed (char* pInput = INPUT)
{
for (int i = 0, j = 0; i < INPUT.Length; i++)
{
char c = *(pInput + i);

if (!char.IsWhiteSpace(c))
output[j++] = c;
}

return new string(output);
}
šŸ™ƒ
TimberStalker
TimberStalkerā€¢2y ago
you can change the logic inside the for loop to this
var currByte = bytes[i];
for(int j = currByte.Length-1; j >= 0; j++)
{
if(char.IsDigit(currByte[^(j + 1)])
{
values[i] += (currByte[^(j + 1)] - '0') << (4 * j);
mask[i] += 0xF << (4 * j);
}
}
var currByte = bytes[i];
for(int j = currByte.Length-1; j >= 0; j++)
{
if(char.IsDigit(currByte[^(j + 1)])
{
values[i] += (currByte[^(j + 1)] - '0') << (4 * j);
mask[i] += 0xF << (4 * j);
}
}
if you want to be funky you can do
var currByte = bytes[i];
for(int j = 0; j < currByte.Length; j++)
{
values[i] <<= 4;
mask[i] <<= 4;
if(char.IsDigit(currByte[j])
{
values[i] += (currByte[j] - '0');
mask[i] += 0xF;
}
}
var currByte = bytes[i];
for(int j = 0; j < currByte.Length; j++)
{
values[i] <<= 4;
mask[i] <<= 4;
if(char.IsDigit(currByte[j])
{
values[i] += (currByte[j] - '0');
mask[i] += 0xF;
}
}
Instead
ero
eroā€¢2y ago
char.IsAsciiHexDigit if anything