K
Kysely•2y ago
Theo Gravity

Using Postgres function in an insert with a field as a select

I'm trying to translate the following SQL:
INSERT INTO sb.text_document_chunk (text_document_id, text)
SELECT NEW.id, sentence
FROM unnest(string_to_array(NEW.text_content, '. ')) AS sentence
WHERE sentence ~ '[[:alnum:]]';
INSERT INTO sb.text_document_chunk (text_document_id, text)
SELECT NEW.id, sentence
FROM unnest(string_to_array(NEW.text_content, '. ')) AS sentence
WHERE sentence ~ '[[:alnum:]]';
to:
db.insertInto('text_document_chunk').values((eb) => ({
text_document_id: document.id,
text: db
.selectFrom(eb.fn('unnest', [eb.fn('string_to_array', [document.text_content, '. '])]).as('sentence'))
.where('sentence', '~', '[[:alnum:]]'),
})).execute()
db.insertInto('text_document_chunk').values((eb) => ({
text_document_id: document.id,
text: db
.selectFrom(eb.fn('unnest', [eb.fn('string_to_array', [document.text_content, '. '])]).as('sentence'))
.where('sentence', '~', '[[:alnum:]]'),
})).execute()
It doesn't seem to like [document.text_content, '. '] and where('sentence'). It seems the array value has to be a ReferenceExpression type, but the documentation isn't very clear on what that is.
11 Replies
Theo Gravity
Theo GravityOP•2y ago
Would this be the correct version? One concern I have is does this properly handle issues around SQL injection?
db.insertInto('text_document_chunk').values((eb) => ({
text_document_id: document.id,
text: db
.selectFrom(
eb.fn('unnest', [eb.fn('string_to_array', [sql.lit(document.text_content), sql.lit('. ')])]).as('sentence'),
)
.where(sql.lit('sentence'), '~', '[[:alnum:]]').execute()
db.insertInto('text_document_chunk').values((eb) => ({
text_document_id: document.id,
text: db
.selectFrom(
eb.fn('unnest', [eb.fn('string_to_array', [sql.lit(document.text_content), sql.lit('. ')])]).as('sentence'),
)
.where(sql.lit('sentence'), '~', '[[:alnum:]]').execute()
Igal
Igal•2y ago
Hey 👋 Very close! You can use .expression instead:
import { sql } from "kysely"

await kysely
.insertInto("text_document_chunk")
.columns(["text_document_id", "text"])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>("unnest", [
eb.fn("string_to_array", [sql`NEW.text_content`, sql.lit(". ")]),
])
.as("sentence"),
)
.where("sentence", "~", "[[:alnum:]]")
.select([sql<string>`NEW.id`.as("text_document_id"), "sentence"]),
)
.execute()
import { sql } from "kysely"

await kysely
.insertInto("text_document_chunk")
.columns(["text_document_id", "text"])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>("unnest", [
eb.fn("string_to_array", [sql`NEW.text_content`, sql.lit(". ")]),
])
.as("sentence"),
)
.where("sentence", "~", "[[:alnum:]]")
.select([sql<string>`NEW.id`.as("text_document_id"), "sentence"]),
)
.execute()
https://kyse.link/?p=s&i=bYFbOw1HNX4D81GQJOHI
Theo Gravity
Theo GravityOP•2y ago
Thanks! In my translation, I'm not using NEW (trying to translate out of triggers / pg functions), and have document instead as the object with the values. So would it be sql.lit(document.text_content)?
Igal
Igal•2y ago
sql.lit is used to add literal values to the query (instead of passing values as parameters). passing a string to it will result in 'document.text_content' which not what you're looking for. You need to refer to "document.text_content" so sql.ref
Theo Gravity
Theo GravityOP•2y ago
ah
Igal
Igal•2y ago
or eb.ref if the referenced column is in query context. eb.ref is type-safe.
Theo Gravity
Theo GravityOP•2y ago
this is the contents of document.text_content i think i understand the mismatch, in the original fn, NEW.text_content is an actual column, whereas here I'm feeding in the content, which I don't think is allowed?
Igal
Igal•2y ago
Will continue tomorrow, good night 💤
Theo Gravity
Theo GravityOP•2y ago
thank you for your time Igal really helped! This works, but it's not ideal as sql.lit says it's susceptible to SQL injection. Is there a version that I can use that would be safe (eg parameter binding)? The document.text_content variable would come from user input, and the contents of that variable is whatever they wrote. We may or may not have sanitized it by the time it gets called here.
export async function generateTextDocumentChunks(
db: Kysely<SbDatabase>,
document: TextDocumentRow,
): Promise<TextDocumentChunkRow[]> {
return db
.insertInto('text_document_chunk')
.columns(['text_document_id', 'text'])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>('unnest', [
eb.fn('string_to_array', [sql.lit(document.text_content), sql.lit('. ')]),
])
.as('sentence'),
)
.where('sentence', '~', '[[:alnum:]]')
.select([sql.lit(document.id).as('text_document_id'), 'sentence']),
)
.returningAll()
.execute();
}
export async function generateTextDocumentChunks(
db: Kysely<SbDatabase>,
document: TextDocumentRow,
): Promise<TextDocumentChunkRow[]> {
return db
.insertInto('text_document_chunk')
.columns(['text_document_id', 'text'])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>('unnest', [
eb.fn('string_to_array', [sql.lit(document.text_content), sql.lit('. ')]),
])
.as('sentence'),
)
.where('sentence', '~', '[[:alnum:]]')
.select([sql.lit(document.id).as('text_document_id'), 'sentence']),
)
.returningAll()
.execute();
}
Reading the doc, doing:
sql`${value}`
sql`${value}`
should be the equiv? This does work for me
export async function generateTextDocumentChunks(
db: Kysely<SbDatabase>,
document: TextDocumentRow,
): Promise<TextDocumentChunkRow[]> {
return db
.insertInto('text_document_chunk')
.columns(['text_document_id', 'text'])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>('unnest', [
eb.fn('string_to_array', [sql`${document.text_content}`, sql.lit('. ')]),
])
.as('sentence'),
)
.where('sentence', '~', '[[:alnum:]]')
.select([sql`${document.id}`.as('text_document_id'), 'sentence']),
)
.returningAll()
.execute();
}
export async function generateTextDocumentChunks(
db: Kysely<SbDatabase>,
document: TextDocumentRow,
): Promise<TextDocumentChunkRow[]> {
return db
.insertInto('text_document_chunk')
.columns(['text_document_id', 'text'])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>('unnest', [
eb.fn('string_to_array', [sql`${document.text_content}`, sql.lit('. ')]),
])
.as('sentence'),
)
.where('sentence', '~', '[[:alnum:]]')
.select([sql`${document.id}`.as('text_document_id'), 'sentence']),
)
.returningAll()
.execute();
}
Igal
Igal•2y ago
You should always validate and sanitize user input! You should try sql.val instead of interpolation
Theo Gravity
Theo GravityOP•2y ago
Agreed! Thanks! Here's the final result:
export async function generateTextDocumentChunks(
db: Kysely<SbDatabase>,
document: TextDocumentRow,
): Promise<TextDocumentChunkRow[]> {
return db
.insertInto('text_document_chunk')
.columns(['text_document_id', 'text'])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>('unnest', [
eb.fn('string_to_array', [sql.val(document.text_content), sql.val('. ')]),
])
.as('sentence'),
)
.where('sentence', '~', '[[:alnum:]]')
.select([sql.val(document.id).as('text_document_id'), 'sentence']),
)
.returningAll()
.execute();
}
export async function generateTextDocumentChunks(
db: Kysely<SbDatabase>,
document: TextDocumentRow,
): Promise<TextDocumentChunkRow[]> {
return db
.insertInto('text_document_chunk')
.columns(['text_document_id', 'text'])
.expression((eb) =>
eb
.selectFrom(
eb
.fn<{ sentence: string }>('unnest', [
eb.fn('string_to_array', [sql.val(document.text_content), sql.val('. ')]),
])
.as('sentence'),
)
.where('sentence', '~', '[[:alnum:]]')
.select([sql.val(document.id).as('text_document_id'), 'sentence']),
)
.returningAll()
.execute();
}
Want results from more Discord servers?
Add your server