Ascii Map Compression

Just a simple RLE algorithm for a project in the works. Mainly just a silly project. Regardless this algorithm is used to compress ascii maps down to a smaller file size. It does this by using a run-length encoding algorithm. TL;DR Small Scale Uncompressed map is 2kb Compressed map is 758 bytes Big(ger) Scale Uncompressed map is 86kb Compressed map is 42kb So over half size saved. There is still room for improvement, but I can't be too bothered right now
4 Replies
Dumb Bird
Dumb Bird2mo ago
Here is an uncompressed map
############
#.......a..#
#|.........#
#....`.....#
######################## #..........#
#.......e...#..........# #####'#########
#...........#.$........# ############### #.........a...#
#...........#..........# #..........s..# #.............#
#...........#..........# #.....\.......# #.............#
#...........#..........# #..$..........######### #.o....h......#
#...........+..........# #.............'.......# #.............#
#...........#..r.......# #.............#..V....######+###########
#...........#..........# #....P........#.......+..S...#
##################+###########..Q..........#.......#....f.#
#.J...C.....#.............#########......#
############### #...........'.............# #......#
#.............# #...........############### #.$....#
#.....u.......# #...........# ##############......#
#..Y..........# #...........# #............#......#
#.............# #.....r.....# #............+$.....#
#.............# #...........# #............#......#
#############'#######+######+############################+#
############
#.......a..#
#|.........#
#....`.....#
######################## #..........#
#.......e...#..........# #####'#########
#...........#.$........# ############### #.........a...#
#...........#..........# #..........s..# #.............#
#...........#..........# #.....\.......# #.............#
#...........#..........# #..$..........######### #.o....h......#
#...........+..........# #.............'.......# #.............#
#...........#..r.......# #.............#..V....######+###########
#...........#..........# #....P........#.......+..S...#
##################+###########..Q..........#.......#....f.#
#.J...C.....#.............#########......#
############### #...........'.............# #......#
#.............# #...........############### #.$....#
#.....u.......# #...........# ##############......#
#..Y..........# #...........# #............#......#
#.............# #.....r.....# #............+$.....#
#.............# #...........# #............#......#
#############'#######+######+############################+#
Heres that compressed
(60)#(12)
(60)#.(7)a..#
(60)#|.(9)#
(60)#....`.(5)#
(6)#(24) (30)#.(10)#
(6)#.(7)e...#.(10)# (30)#(5)'#(9)
(6)#.(11)#.$.(8)# (5)#(15) (10)#.(9)a...#
(6)#.(11)#.(10)# (5)#.(10)s..# (10)#.(13)#
(6)#.(11)#.(10)# (5)#.(5)\.(7)# (10)#.(13)#
(6)#.(11)#.(10)# (5)#..$.(10)#(9) #.o....h.(6)#
(6)#.(11)+.(10)# (5)#.(13)'.(7)# #.(13)#
(6)#.(11)#..r.(7)# (5)#.(13)#..V....#(6)+#(11)
(6)#.(11)#.(10)# (5)#....P.(8)#.(7)+..S...#
(6)#(18)+#(11)..Q.(10)#.(7)#....f.#
(23)#.J...C.(5)#.(13)#(9).(6)#
(6)#(15) #.(11)'.(13)# (7)#.(6)#
(6)#.(13)# #.(11)#(15) (7)#.$....#
(6)#.(5)u.(7)# #.(11)# (8)#(14).(6)#
(6)#..Y.(10)# #.(11)# (8)#.(12)#.(6)#
(6)#.(13)# #.(5)r.(5)# (8)#.(12)+$.(5)#
(6)#.(13)# #.(11)# (8)#.(12)#.(6)#
(6)#(13)'#(7)+#(6)+#(28)+#
(60)#(12)
(60)#.(7)a..#
(60)#|.(9)#
(60)#....`.(5)#
(6)#(24) (30)#.(10)#
(6)#.(7)e...#.(10)# (30)#(5)'#(9)
(6)#.(11)#.$.(8)# (5)#(15) (10)#.(9)a...#
(6)#.(11)#.(10)# (5)#.(10)s..# (10)#.(13)#
(6)#.(11)#.(10)# (5)#.(5)\.(7)# (10)#.(13)#
(6)#.(11)#.(10)# (5)#..$.(10)#(9) #.o....h.(6)#
(6)#.(11)+.(10)# (5)#.(13)'.(7)# #.(13)#
(6)#.(11)#..r.(7)# (5)#.(13)#..V....#(6)+#(11)
(6)#.(11)#.(10)# (5)#....P.(8)#.(7)+..S...#
(6)#(18)+#(11)..Q.(10)#.(7)#....f.#
(23)#.J...C.(5)#.(13)#(9).(6)#
(6)#(15) #.(11)'.(13)# (7)#.(6)#
(6)#.(13)# #.(11)#(15) (7)#.$....#
(6)#.(5)u.(7)# #.(11)# (8)#(14).(6)#
(6)#..Y.(10)# #.(11)# (8)#.(12)#.(6)#
(6)#.(13)# #.(5)r.(5)# (8)#.(12)+$.(5)#
(6)#.(13)# #.(11)# (8)#.(12)#.(6)#
(6)#(13)'#(7)+#(6)+#(28)+#
My algorithm differs slightly from your typical RLE. It only compresses what makes sense to compress. This is because instead of just doing something like #2 to repeat # twice I do #(2). This is to try and make sure cases where there may just really be a #2 somewhere in the map from getting messed up. The side effect is sometimes "shortening" doesn't actually make it shorter. Either it's the same length or longer. This is where my algorithm differs. It won't replace anything that will either take more characters, or equal amount of characters My poor code
def rle_encode(map_lines):
encoded_lines = []
for line in map_lines:
encoded_line = []
prev_char = line[0]
count = 1
for char in line[1:]:
if char == prev_char:
count += 1
else:
if count > 1:
encoded_segment = f"{prev_char}({count})"
if len(encoded_segment) < count:
encoded_line.append(encoded_segment)
else:
encoded_line.append(prev_char * count)
else:
encoded_line.append(prev_char)
prev_char = char
count = 1
# Handle the last segment
if count > 1:
encoded_segment = f"{prev_char}({count})"
if len(encoded_segment) < count:
encoded_line.append(encoded_segment)
else:
encoded_line.append(prev_char * count)
else:
encoded_line.append(prev_char)
encoded_lines.append(''.join(encoded_line))
return encoded_lines
def rle_encode(map_lines):
encoded_lines = []
for line in map_lines:
encoded_line = []
prev_char = line[0]
count = 1
for char in line[1:]:
if char == prev_char:
count += 1
else:
if count > 1:
encoded_segment = f"{prev_char}({count})"
if len(encoded_segment) < count:
encoded_line.append(encoded_segment)
else:
encoded_line.append(prev_char * count)
else:
encoded_line.append(prev_char)
prev_char = char
count = 1
# Handle the last segment
if count > 1:
encoded_segment = f"{prev_char}({count})"
if len(encoded_segment) < count:
encoded_line.append(encoded_segment)
else:
encoded_line.append(prev_char * count)
else:
encoded_line.append(prev_char)
encoded_lines.append(''.join(encoded_line))
return encoded_lines
def rle_decode(encoded_lines):
import re
decoded_lines = []
for encoded_line in encoded_lines:
decoded_line = []
i = 0
while i < len(encoded_line):
if encoded_line[i].isalpha() or encoded_line[i] in ('.', '#'):
if i + 1 < len(encoded_line) and encoded_line[i+1] == '(':
# Find the closing parenthesis
j = i + 2
while encoded_line[j] != ')':
j += 1
count = int(encoded_line[i+2:j])
decoded_line.append(encoded_line[i] * count)
i = j + 1
else:
decoded_line.append(encoded_line[i])
i += 1
else:
i += 1
decoded_lines.append(''.join(decoded_line))
return decoded_lines
def rle_decode(encoded_lines):
import re
decoded_lines = []
for encoded_line in encoded_lines:
decoded_line = []
i = 0
while i < len(encoded_line):
if encoded_line[i].isalpha() or encoded_line[i] in ('.', '#'):
if i + 1 < len(encoded_line) and encoded_line[i+1] == '(':
# Find the closing parenthesis
j = i + 2
while encoded_line[j] != ')':
j += 1
count = int(encoded_line[i+2:j])
decoded_line.append(encoded_line[i] * count)
i = j + 1
else:
decoded_line.append(encoded_line[i])
i += 1
else:
i += 1
decoded_lines.append(''.join(decoded_line))
return decoded_lines
# example ASCII map
map = []

with open("map.txt") as f:
map = [line.strip("\n") for line in f.readlines()]

# encoding the map
encoded_map = rle_encode(map)
print("Encoded Map:")
for line in encoded_map:
print(line)

# decoding the map
decoded_map = rle_decode(encoded_map)
print("\nDecoded Map:")
for line in decoded_map:
print(line)
# example ASCII map
map = []

with open("map.txt") as f:
map = [line.strip("\n") for line in f.readlines()]

# encoding the map
encoded_map = rle_encode(map)
print("Encoded Map:")
for line in encoded_map:
print(line)

# decoding the map
decoded_map = rle_decode(encoded_map)
print("\nDecoded Map:")
for line in decoded_map:
print(line)
This code is bad, as it's really meant for a test, not production
anic17
anic174w ago
You could compress it even more using the characters with the corresponding ASCII number and then decoding it Like replace 33 with ! (if readability isn't an issue) 48 with 0 etc
Dumb Bird
Dumb Bird4w ago
Yes I did think about that, this was for more of a proof of concept so I didn't feel the need to push it that far. For bigger numbers though I would likely have to split it into multiple characters
anic17
anic174w ago
Each byte could represent up to 256 Because there isn't any need for length 0