C
C#2y ago
Thinker

✅ Caching an IEnumerable

Simple question: I have an IEnumerable<T> which does some complex-ish calculations, and I would ideally like to cache the result of the enumerable, but while still allowing it to be lazy and not use something like ToArray() or ToList() which would enumerate the entire thing at once. So for instance, if the enumerable produces some elements using a Select, then if the user begins by enumerating the first three elements and nothing more, then those three elements would be cached and not be calculated again on successive enumerations. What would be the best way to do this?
15 Replies
Thinker
Thinker2y ago
var xs = new[] {1,2,3,4,5,6,7};

var cached = xs
.Where(x => x % 2 == 0)
.Select(x + 1)
.Cache();

... = cached.First();
... = cached.ElementAt(2); // This would not have to recalculate the first element
... = cached.ElementAt(1); // This would not have to recalculate any elements
var xs = new[] {1,2,3,4,5,6,7};

var cached = xs
.Where(x => x % 2 == 0)
.Select(x + 1)
.Cache();

... = cached.First();
... = cached.ElementAt(2); // This would not have to recalculate the first element
... = cached.ElementAt(1); // This would not have to recalculate any elements
^ something like this
mtreit
mtreit2y ago
Isn't that what yield gives you ? Write a wrapper that enumerates the wrapped IEnumerable using yield... I haven't actually done this and maybe that's not actually easy to implement, but it sprang to mind...
Thinker
Thinker2y ago
Idk what yield has to do with this. That's used to create iterator methods.
mtreit
mtreit2y ago
Well, if you are using yield to yield up members of the source sequence it will remember where it left off and I was thinking you could have some state inside some wrapper that keeps yielding from wherever the wrapped thing was instead of starting a new enumeration... Maybe it doesn't work, I didn't really think it through.
mtreit
mtreit2y ago
GitHub
reactive/Memoize.cs at main · dotnet/reactive
The Reactive Extensions for .NET. Contribute to dotnet/reactive development by creating an account on GitHub.
mtreit
mtreit2y ago
GitHub
reactive/Memoize.cs at 85f1eb7c53e27cccdbeee3e0b044916168843fcc · d...
The Reactive Extensions for .NET. Contribute to dotnet/reactive development by creating an account on GitHub.
Thinker
Thinker2y ago
Looks like what I had in mind I was thinking of doing something similar, although just using a simple list instead of whatever an IBuffer<T> is.
mtreit
mtreit2y ago
Give it a try, looks pretty nice to me...
Thinker
Thinker2y ago
ye
mtreit
mtreit2y ago
I wonder if I could use this Thonk
Thinker
Thinker2y ago
The implementation looks pretty straightforward, it's just the lock, exception handling, and stopping mechanisms which make it way less readable. Also I don't really know what the difference between caching and memoization is
mtreit
mtreit2y ago
memoization just means not redoing something you've already computed. Same thing as caching I think, depending on what you mean by caching - caching might involve things like cache eviction, I don't think that's a thing in memoization?
ero
ero2y ago
i just use something like this
public abstract class CachedEnumerable<TKey, TValue>
: IEnumerable<TValue>
where TKey : notnull
{
public int Count => _cache.Count;

public abstract IEnumerator<TValue> GetEnumerator();
protected abstract TKey GetKey(TValue value);

protected virtual bool CompareKeys(TKey searchedKey, TKey itemKey)
{
return searchedKey.Equals(itemKey);
}

public TValue this[TKey key]
{
get
{
if (TryGetValue(key, out TValue value))
{
return value;
}
else
{
throw new KeyNotFoundException(key);
}
}
protected set => _cache[key] = value;
}

public bool TryGetValue(TKey key, out TValue value)
{
if (_cache.TryGetValue(key, out value))
{
return true;
}

lock (_cache)
{
foreach (TValue item in this)
{
value = item;
TKey itemKey = GetKey(value);

_cache[itemKey] = value;

if (CompareKeys(key, itemKey))
{
return true;
}
}
}

value = default;
return false;
}

public void Clear()
{
lock (_cache)
{
_cache.Clear();
}
}

IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
public abstract class CachedEnumerable<TKey, TValue>
: IEnumerable<TValue>
where TKey : notnull
{
public int Count => _cache.Count;

public abstract IEnumerator<TValue> GetEnumerator();
protected abstract TKey GetKey(TValue value);

protected virtual bool CompareKeys(TKey searchedKey, TKey itemKey)
{
return searchedKey.Equals(itemKey);
}

public TValue this[TKey key]
{
get
{
if (TryGetValue(key, out TValue value))
{
return value;
}
else
{
throw new KeyNotFoundException(key);
}
}
protected set => _cache[key] = value;
}

public bool TryGetValue(TKey key, out TValue value)
{
if (_cache.TryGetValue(key, out value))
{
return true;
}

lock (_cache)
{
foreach (TValue item in this)
{
value = item;
TKey itemKey = GetKey(value);

_cache[itemKey] = value;

if (CompareKeys(key, itemKey))
{
return true;
}
}
}

value = default;
return false;
}

public void Clear()
{
lock (_cache)
{
_cache.Clear();
}
}

IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
but that probably doesn't suit your needs
Thinker
Thinker2y ago
Okay I came up with this
internal sealed class CachedEnumerable<T> : IEnumerable<T>, IDisposable
{
private readonly IEnumerator<T> enumerator;
private readonly List<T> cache;
private bool disposed;

public CachedEnumerable(IEnumerable<T> source)
{
enumerator = source.GetEnumerator();
cache = new();
disposed = false;
}

public IEnumerator<T> GetEnumerator()
{
foreach (var cached in cache) yield return cached;

if (disposed) yield break;
while (enumerator.MoveNext())
{
var item = enumerator.Current;
cache.Add(item);
yield return item;
}

Dispose();
}

IEnumerator IEnumerable.GetEnumerator() =>
GetEnumerator();

public void Dispose()
{
if (disposed) return;

enumerator.Dispose();
disposed = true;
}
}

public static partial class EnumerableExtensions
{
public static IEnumerable<T> Cache<T>(this IEnumerable<T> source) =>
new CachedEnumerable<T>(source);
}
internal sealed class CachedEnumerable<T> : IEnumerable<T>, IDisposable
{
private readonly IEnumerator<T> enumerator;
private readonly List<T> cache;
private bool disposed;

public CachedEnumerable(IEnumerable<T> source)
{
enumerator = source.GetEnumerator();
cache = new();
disposed = false;
}

public IEnumerator<T> GetEnumerator()
{
foreach (var cached in cache) yield return cached;

if (disposed) yield break;
while (enumerator.MoveNext())
{
var item = enumerator.Current;
cache.Add(item);
yield return item;
}

Dispose();
}

IEnumerator IEnumerable.GetEnumerator() =>
GetEnumerator();

public void Dispose()
{
if (disposed) return;

enumerator.Dispose();
disposed = true;
}
}

public static partial class EnumerableExtensions
{
public static IEnumerable<T> Cache<T>(this IEnumerable<T> source) =>
new CachedEnumerable<T>(source);
}
It's really simple but it works
Accord
Accord2y ago
Closed!