How to translate the WITH keyword in postgres when SELECT does not have FROM?

I'm trying to translate the following postgres function into pure Kysley:
CREATE OR REPLACE FUNCTION rand.similarity_search(model TEXT, needle TEXT)
RETURNS TABLE (
document_chunk_id INTEGER,
similarity REAL,
text TEXT
) AS $$
BEGIN
RETURN QUERY
WITH query AS (
SELECT pgml.embed(model, needle) AS embedding
)
SELECT document_chunk_embedding.document_chunk_id, pgml.cosine_similarity(document_chunk_embedding.embedding, query.embedding), document_chunk.text AS similarity
FROM rand.document_chunk_embedding
JOIN rand.document_chunk ON document_chunk_embedding.document_chunk_id = document_chunk.id, query
ORDER BY similarity DESC
LIMIT 5;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION rand.similarity_search(model TEXT, needle TEXT)
RETURNS TABLE (
document_chunk_id INTEGER,
similarity REAL,
text TEXT
) AS $$
BEGIN
RETURN QUERY
WITH query AS (
SELECT pgml.embed(model, needle) AS embedding
)
SELECT document_chunk_embedding.document_chunk_id, pgml.cosine_similarity(document_chunk_embedding.embedding, query.embedding), document_chunk.text AS similarity
FROM rand.document_chunk_embedding
JOIN rand.document_chunk ON document_chunk_embedding.document_chunk_id = document_chunk.id, query
ORDER BY similarity DESC
LIMIT 5;
END;
$$ LANGUAGE plpgsql;
I'm able to translate the main select body:
return db.selectFrom('document_chunk_embedding')
.innerJoin('document_chunk', 'document_chunk_embedding.document_chunk_id', 'document_chunk.id')
.select(['document_chunk_embedding.document_chunk_id', 'document_chunk.text as similarity', db.fn('pgml.cosine_similarity', ['document_chunk_embedding.embedding', 'query.embedding'])])
.orderBy('similarity', 'desc')
.limit(5)
.returningAll()
.execute();
return db.selectFrom('document_chunk_embedding')
.innerJoin('document_chunk', 'document_chunk_embedding.document_chunk_id', 'document_chunk.id')
.select(['document_chunk_embedding.document_chunk_id', 'document_chunk.text as similarity', db.fn('pgml.cosine_similarity', ['document_chunk_embedding.embedding', 'query.embedding'])])
.orderBy('similarity', 'desc')
.limit(5)
.returningAll()
.execute();
But I'm not sure how to translate the WITH part:
WITH query AS (
SELECT pgml.embed(model, needle) AS embedding
)
WITH query AS (
SELECT pgml.embed(model, needle) AS embedding
)
4 Replies
Theo Gravity
Theo Gravity2y ago
So far I have:
return db
.with('query', (qb) => qb.selectFrom('??').select([db.fn('pgml.embed', [sql.val(model), sql.val(text)]).as('embedding')]))
.selectFrom(['document_chunk_embedding', 'query'])
.innerJoin('document_chunk', 'document_chunk_embedding.document_chunk_id', 'document_chunk.id')
.select(['document_chunk_embedding.document_chunk_id', 'document_chunk.text as similarity', db.fn('pgml.cosine_similarity', ['document_chunk_embedding.embedding', 'query.embedding'])])
.orderBy('similarity', 'desc')
.limit(5)
.returningAll()
.execute();
return db
.with('query', (qb) => qb.selectFrom('??').select([db.fn('pgml.embed', [sql.val(model), sql.val(text)]).as('embedding')]))
.selectFrom(['document_chunk_embedding', 'query'])
.innerJoin('document_chunk', 'document_chunk_embedding.document_chunk_id', 'document_chunk.id')
.select(['document_chunk_embedding.document_chunk_id', 'document_chunk.text as similarity', db.fn('pgml.cosine_similarity', ['document_chunk_embedding.embedding', 'query.embedding'])])
.orderBy('similarity', 'desc')
.limit(5)
.returningAll()
.execute();
I think the problem is that the SELECT in question doesn't have a FROM in the WITH It seems query needs to be at the end of the join and can't be part of SELECT FROM Not sure how to do that Got it to all work, it feels like a huge hack though:
export async function similaritySearch({
db,
text,
model,
}: {
db: Kysely<SbDatabase>;
text: string;
model?: string;
}): Promise<SimilaritySearchRow[]> {
return (
db
.with(
'query',
() =>
// Ignoring since kysely doesn't know how to work with selects without FROM
// You can see what this query outputs to here: https://kyse.link/?p=s&i=hrcwDweUyG01KMCYp34j
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
//@ts-ignore
sql`(SELECT pgml.embed(${sql.val(model)}, ${sql.val(text)}) as embedding)`,
)
.selectFrom('document_chunk_embedding')
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
//@ts-ignore
.innerJoin('document_chunk', 'document_chunk_embedding.document_chunk_id', sql.raw('document_chunk.id, query'))
.select([
'document_chunk_embedding.document_chunk_id',
'document_chunk.text as similarity',
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
//@ts-ignore
db.fn('pgml.cosine_similarity', ['document_chunk_embedding.embedding', 'query.embedding']),
])
.orderBy('similarity', 'desc')
.limit(5)
.execute() as Promise<SimilaritySearchRow[]>
);
}
export async function similaritySearch({
db,
text,
model,
}: {
db: Kysely<SbDatabase>;
text: string;
model?: string;
}): Promise<SimilaritySearchRow[]> {
return (
db
.with(
'query',
() =>
// Ignoring since kysely doesn't know how to work with selects without FROM
// You can see what this query outputs to here: https://kyse.link/?p=s&i=hrcwDweUyG01KMCYp34j
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
//@ts-ignore
sql`(SELECT pgml.embed(${sql.val(model)}, ${sql.val(text)}) as embedding)`,
)
.selectFrom('document_chunk_embedding')
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
//@ts-ignore
.innerJoin('document_chunk', 'document_chunk_embedding.document_chunk_id', sql.raw('document_chunk.id, query'))
.select([
'document_chunk_embedding.document_chunk_id',
'document_chunk.text as similarity',
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
//@ts-ignore
db.fn('pgml.cosine_similarity', ['document_chunk_embedding.embedding', 'query.embedding']),
])
.orderBy('similarity', 'desc')
.limit(5)
.execute() as Promise<SimilaritySearchRow[]>
);
}
Igal
Igal2y ago
Hey 👋🏻 I've answered in github issue /w a playground link
Theo Gravity
Theo Gravity2y ago
Thanks Igal. I've responded. It's close but not the solution unfortunately
Theo Gravity
Theo Gravity2y ago
Linking the github issue thread so it can be traced: https://github.com/kysely-org/kysely/issues/517
GitHub
[Feature] SELECT without FROM · Issue #517 · kysely-org/kysely
I'd like support for the following use-case: WITH query AS ( SELECT pgml.embed(model, needle) AS embedding ) I've read from discord that this case isn't supported. Could you create a .s...
Want results from more Discord servers?
Add your server