Program Dream•10mo ago

KMNIST

Similar to @earth's penguin EMNIST project, I am developing a CNN (Conventional Neural Network) in python to parse a KMNIST dataset. A quick summary of what this dataset consists of: 60,000 training and 10,000 testing examples of handwritten Kuzushiji Hiragana characters (Cursive Japanese Characters). Do I know japanese? No Is it a cool AI project? Yes

4 Replies

assangexOP•10mo ago

This script pretty much grabs a random label from the dataset, parses it, and sends it back to you with a ascii version of it as well as a matplotlib version of it.

def parse():
    train_file = 'samples/train-images-idx3-ubyte'
    test_file =  'samples/t10k-images-idx3-ubyte'
    test_images, test_labels = mndata.load_testing()
    train_images, train_labels = mndata.load_training()
    
    train_arr = idx2numpy.convert_from_file(train_file) / 255.0
    test_arr = idx2numpy.convert_from_file(test_file) / 255.0

    train_arr = train_arr.flatten().reshape(-1, 28 * 28)
    test_arr = test_arr.flatten().reshape(-1, 28 * 28)
    
    def randoImage(arr, labels, filename):
        index = random.randrange(len(arr))
        data = {
            'image': arr[index].tolist(),
            'label': labels[index]

        }
        with open(filename, 'w') as f:
            json.dump(data, f, indent=4)

    #save images
    randoImage(train_arr, train_labels, 'KMNIST-Train.json')
    randoImage(test_arr, test_labels, 'KMNIST-Test.json')

    #Display random images from training dataset
    index = random.randrange(len(train_arr))
    print(mndata.display(train_images[index]))

    image = np.array(train_images[index]) / 255.0
    label = np.array(train_labels[index])

    with open('image.json', 'w') as f:
        json.dump(image.tolist(), f, indent=4)
    with open('label.json', 'w') as f:
        json.dump(label.tolist(), f, indent=4)


    #matplotlib
    plt.imshow(image.reshape(28, 28), cmap='gray')
    plt.title(f'Label: {label}')
    plt.show()

    
    return image, label

parse()

def parse():
    train_file = 'samples/train-images-idx3-ubyte'
    test_file =  'samples/t10k-images-idx3-ubyte'
    test_images, test_labels = mndata.load_testing()
    train_images, train_labels = mndata.load_training()
    
    train_arr = idx2numpy.convert_from_file(train_file) / 255.0
    test_arr = idx2numpy.convert_from_file(test_file) / 255.0

    train_arr = train_arr.flatten().reshape(-1, 28 * 28)
    test_arr = test_arr.flatten().reshape(-1, 28 * 28)
    
    def randoImage(arr, labels, filename):
        index = random.randrange(len(arr))
        data = {
            'image': arr[index].tolist(),
            'label': labels[index]

        }
        with open(filename, 'w') as f:
            json.dump(data, f, indent=4)

    #save images
    randoImage(train_arr, train_labels, 'KMNIST-Train.json')
    randoImage(test_arr, test_labels, 'KMNIST-Test.json')

    #Display random images from training dataset
    index = random.randrange(len(train_arr))
    print(mndata.display(train_images[index]))

    image = np.array(train_images[index]) / 255.0
    label = np.array(train_labels[index])

    with open('image.json', 'w') as f:
        json.dump(image.tolist(), f, indent=4)
    with open('label.json', 'w') as f:
        json.dump(label.tolist(), f, indent=4)


    #matplotlib
    plt.imshow(image.reshape(28, 28), cmap='gray')
    plt.title(f'Label: {label}')
    plt.show()

    
    return image, label

parse()

'''
This piece of the code is what really flattens the dataset


    train_file = 'samples/train-images-idx3-ubyte'
    test_file =  'samples/t10k-images-idx3-ubyte'
    test_images, test_labels = mndata.load_testing()
    train_images, train_labels = mndata.load_training()
    
    train_arr = idx2numpy.convert_from_file(train_file) / 255.0
    test_arr = idx2numpy.convert_from_file(test_file) / 255.0

    train_arr = train_arr.flatten().reshape(-1, 28 * 28)
    test_arr = test_arr.flatten().reshape(-1, 28 * 28)

'''

'''
This piece of the code is what really flattens the dataset


    train_file = 'samples/train-images-idx3-ubyte'
    test_file =  'samples/t10k-images-idx3-ubyte'
    test_images, test_labels = mndata.load_testing()
    train_images, train_labels = mndata.load_training()
    
    train_arr = idx2numpy.convert_from_file(train_file) / 255.0
    test_arr = idx2numpy.convert_from_file(test_file) / 255.0

    train_arr = train_arr.flatten().reshape(-1, 28 * 28)
    test_arr = test_arr.flatten().reshape(-1, 28 * 28)

'''

I made a testing script to put the parsed and flattened arrays into a json file, and it worked!

import numpy as np
import json 
import random
from mnist import MNIST
import idx2numpy
import matplotlib.pyplot as plt
mndata = MNIST('samples')

def test_parse():
    train_file = 'samples/train-images-idx3-ubyte'
    test_file =  'samples/t10k-images-idx3-ubyte'
    test_images, test_labels = mndata.load_testing()
    train_images, train_labels = mndata.load_training()
    
    train_arr = idx2numpy.convert_from_file(train_file) / 255.0
    test_arr = idx2numpy.convert_from_file(test_file) / 255.0

    train_arr = train_arr.flatten().reshape(-1, 28 * 28)
    test_arr = test_arr.flatten().reshape(-1, 28 * 28)

    with open('KMNIST-FILETEST-Train.json', 'w') as f:
        json.dump(train_arr.tolist(), f, indent=4)
    with open('KMNIST-FILETEST-Test.json', 'w') as f:
        json.dump(test_arr.tolist(), f, indent=4)

    print("Parsing Complete")

test_parse()

import numpy as np
import json 
import random
from mnist import MNIST
import idx2numpy
import matplotlib.pyplot as plt
mndata = MNIST('samples')

def test_parse():
    train_file = 'samples/train-images-idx3-ubyte'
    test_file =  'samples/t10k-images-idx3-ubyte'
    test_images, test_labels = mndata.load_testing()
    train_images, train_labels = mndata.load_training()
    
    train_arr = idx2numpy.convert_from_file(train_file) / 255.0
    test_arr = idx2numpy.convert_from_file(test_file) / 255.0

    train_arr = train_arr.flatten().reshape(-1, 28 * 28)
    test_arr = test_arr.flatten().reshape(-1, 28 * 28)

    with open('KMNIST-FILETEST-Train.json', 'w') as f:
        json.dump(train_arr.tolist(), f, indent=4)
    with open('KMNIST-FILETEST-Test.json', 'w') as f:
        json.dump(test_arr.tolist(), f, indent=4)

    print("Parsing Complete")

test_parse()

...i think

assangexOP•10mo ago

So it ended up creating the files and for a split second i was was able to see floats :D but because of how data there there was per file crashed :(

assangexOP•10mo ago

output:

Dumb Bird•10mo ago

If you don't know japanese how do you verify if it's working correctly

Gaming

Programming

KMNIST

Did you find this page helpful?