SQLite 'WHERE table.relationship (one to many) > 0'

Hi! I'm trying to figure out how to query for rows that have a relationship at least defined once.
const allVideos = await db.query.videos.findMany({
where: // Not all videos have timestamps (one-to-many). Exclude those somehow
with: {
timestamps: true
},
});

// Below should not be done in memory.
return allVideos.filter(
(video) => video.timestamps.length > 0
);
const allVideos = await db.query.videos.findMany({
where: // Not all videos have timestamps (one-to-many). Exclude those somehow
with: {
timestamps: true
},
});

// Below should not be done in memory.
return allVideos.filter(
(video) => video.timestamps.length > 0
);
Can't seem to find any examples of this in the docs. Any help would be greatly appreciated, thanks.
67 Replies
rphlmr ⚡
rphlmr ⚡5mo ago
👋 you can use sub query in where if you want: https://drizzle.run/dxdp67ytednpzy3ercnabpfb
rphlmr ⚡
rphlmr ⚡5mo ago
const videosWithTimestamps = db
.select({ videoId: timestamps.videoId })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

console.log(
"all videos with timestamps",
await db.query.videos.findMany({
where: inArray(videos.id, videosWithTimestamps),
with: {
timestamps: true,
},
}),
);

// or

const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

console.log(
"all videos with timestamps 2",
await db.query.videos.findMany({
where: gt(numberOfTimestamps, 0),
with: {
timestamps: true,
},
}),
);
const videosWithTimestamps = db
.select({ videoId: timestamps.videoId })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

console.log(
"all videos with timestamps",
await db.query.videos.findMany({
where: inArray(videos.id, videosWithTimestamps),
with: {
timestamps: true,
},
}),
);

// or

const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

console.log(
"all videos with timestamps 2",
await db.query.videos.findMany({
where: gt(numberOfTimestamps, 0),
with: {
timestamps: true,
},
}),
);
dhakan
dhakanOP5mo ago
Thanks a bunch! Will try this out. Works ✅ It seems like in production (with 3000 videos), this query is suuuuuuper slow (takes 10 seconds or more):
export async function selectVideos(limit: number = 10) {
limit = Math.min(limit, 20);

// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

// Select only videos with at least 1 timestamp
return db.query.videos.findMany({
where: gt(numberOfTimestamps, 0),
with: {
// timestamps: {
// orderBy: (timestamps, { asc }) => [asc(timestamps.startsAt)],
// },
thumbnailJob: true,
},
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit,
});
}
export async function selectVideos(limit: number = 10) {
limit = Math.min(limit, 20);

// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

// Select only videos with at least 1 timestamp
return db.query.videos.findMany({
where: gt(numberOfTimestamps, 0),
with: {
// timestamps: {
// orderBy: (timestamps, { asc }) => [asc(timestamps.startsAt)],
// },
thumbnailJob: true,
},
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit,
});
}
Maybe you know @Raphaël M (@rphlmr) ⚡ if anything might be terrible in terms of performance 😄 Maybe some index I'm missing? Maybe the order by?
rphlmr ⚡
rphlmr ⚡5mo ago
Do you have indexes on anything involved in your relations? It makes a huge difference
dhakan
dhakanOP5mo ago
I don’t think I’ve looked at that! Was surprised this dataset causes this amount of lag Where would you optimize given the query above?
rphlmr ⚡
rphlmr ⚡5mo ago
timestamps.videoId and videos.id but I guess they are indexed (primary key?). Check in your DB in case it is not. fields and references of thumbnailJob relation videos.createdAt too + if not all columns of thumbnailJob are required (same for video), try to select what you really need (https://orm.drizzle.team/docs/rqb#partial-fields-select)
dhakan
dhakanOP5mo ago
They are indeed PKs. I’ll look at the others! Maybe the sorted by field of createdAt actually Indexing the two fields that are ordered made a huge difference. Dope! Takes on average 300ms which is still pretty slow though if you ask me, hmm... I'll try to optimize further 🙂
rphlmr ⚡
rphlmr ⚡5mo ago
If you can, try select and joins :p then you will be 1:1 with a "handmade" sql query
dhakan
dhakanOP5mo ago
Could you elaborate what you mean? I suck at this language 😂😂😂
rphlmr ⚡
rphlmr ⚡5mo ago
The query API will be reviewed again by the dev team because other people have complained about it and SQLite as well. Drizzle has 2 APIs: Query (the one you use) and Select (the "classic" one that looks like SQL)
rphlmr ⚡
rphlmr ⚡5mo ago
Drizzle ORM - Select
Drizzle ORM is a lightweight and performant TypeScript ORM with developer experience in mind.
dhakan
dhakanOP5mo ago
I see. Yeah I did not use sqlite to have slow read speeds So they are different from eachother? Interesting
rphlmr ⚡
rphlmr ⚡5mo ago
something like that maybe:
export async function selectVideos(limit: number = 10) {
limit = Math.min(limit, 20);

// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

return db
.select()
.from(videos)
.where(gt(numberOfTimestamps, 0))
.innerJoin(thumbnailJob)
.on(eq(thumbnailJob.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);

}
export async function selectVideos(limit: number = 10) {
limit = Math.min(limit, 20);

// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

return db
.select()
.from(videos)
.where(gt(numberOfTimestamps, 0))
.innerJoin(thumbnailJob)
.on(eq(thumbnailJob.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);

}
yes, the Query API will use some json db function to transform the result. The Select API will not but Drizzle handles the final mapping
dhakan
dhakanOP5mo ago
Man this is insanely more readable IMO
return db
.select()
.from(videos)
.where(gt(numberOfTimestamps, 0))
// .innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);
return db
.select()
.from(videos)
.where(gt(numberOfTimestamps, 0))
// .innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);
This query results in around 100ms, which is a great improvement. Still slow though maybe? Not sure. Uncommenting the innerJoin of the thumbnail jobs table makes it take up to 8 seconds to run. Not sure if the change I made from what you posted (no on was the issue). What you posted didn't compile because innerJoin expected a second argument. That inner join should be indexed because of PK/FK relationship, right?
rphlmr ⚡
rphlmr ⚡5mo ago
yes oupsy, yes innerJoin needs the "on" :tired: You start to love SQL.
dhakan
dhakanOP5mo ago
1. The inner join makes it ultra slow 2. The output is now a JSON of thumbnail_jobs instead of the video perspective thumbnailJob I guess those are the two things that are changed now
rphlmr ⚡
rphlmr ⚡5mo ago
the initial query could be improved. the sub query is no more needed here. It can be an innerJoin too does thumbnailJobs has an index on videoId ? a unique index maybe? If your database is huge and you have just added the index, I wonder if it is instant or needs some time to build it.
dhakan
dhakanOP5mo ago
Commenting out the where(subquery) reduces it (even with the thumbnail job inner join) to about 10ms(!!!), so I think you're on to something here. There's nothing more than a one to one. Maybe that makes it unique? Not sure how this ORM differentiates between 1tm and 1t1 relationships How would you conceptualize this? You would inner join on what condition? And what table? Trying to wrap my head around it
rphlmr ⚡
rphlmr ⚡5mo ago
innerJoin on timestamps with timestamps.videoId Except if you have a many-to-many relationship here (you will have as many rows as there are matches).
dhakan
dhakanOP5mo ago
There shouldn't be a many-to-many relationship anywhere in the model thus far
rphlmr ⚡
rphlmr ⚡5mo ago
you have a unique timestamps.videoId ?
dhakan
dhakanOP5mo ago
But that wouldn't restrict videos from only returning video rows based on the conditions of at least 1 timestamp existing? Yes, it's a PK UUID
rphlmr ⚡
rphlmr ⚡5mo ago
yes that's it.
dhakan
dhakanOP5mo ago
Are you saying yes that understanding is correct, or yes it would restrict? 😄
rphlmr ⚡
rphlmr ⚡5mo ago
Yes, for both, it will restrict, and I think this is what you understood. (sorry, not my native language 😬)
dhakan
dhakanOP5mo ago
Are you thinking something like:
return db
.select()
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.groupBy(videos.id)
.having(gt(count(timestamps.id), 0))
.orderBy(desc(videos.createdAt))
.limit(limit);
return db
.select()
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.groupBy(videos.id)
.having(gt(count(timestamps.id), 0))
.orderBy(desc(videos.createdAt))
.limit(limit);
rphlmr ⚡
rphlmr ⚡5mo ago
innerJoin is like => ok if we match, else, not this video in the final result leftJoin is like => ok if we match, else, would have null for the joined table results maybe, yes. Not sure about the having if timestamps.videoId is unique if there is no match on videos.id, this video will not be in the final result
dhakan
dhakanOP5mo ago
I'm remembering slowly how joins work by having this discussion 😄
rphlmr ⚡
rphlmr ⚡5mo ago
Did you feel like a superhero who regains his power?
dhakan
dhakanOP5mo ago
It should be unique as it's a FK!
rphlmr ⚡
rphlmr ⚡5mo ago
yes if it is not a composite fk 😄
dhakan
dhakanOP5mo ago
I think I feel like a bank person walking with a fancy suit to work, tripping on every piece of dog poop there is That's fair. It is not 😄
return db
.select()
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);
return db
.select()
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);
Dropped the group by, dropped the having
rphlmr ⚡
rphlmr ⚡5mo ago
looks good to me
dhakan
dhakanOP5mo ago
About 100 ms now
rphlmr ⚡
rphlmr ⚡5mo ago
Now, should I tell you how to change the result shape?
dhakan
dhakanOP5mo ago
Removing the joins nets like 3ms~~ It's completely broken, so that'd be great 😄
rphlmr ⚡
rphlmr ⚡5mo ago
how is the result? does it have the expected columns? select can tell what to get from the tables
dhakan
dhakanOP5mo ago
[
{
"videos": {
"id": "...",
"sourceId": "...",
"title": "誰もが1回は悶絶するDLCで追加された極悪な敵モブ10体を紹介したい【ELDEN RING実況】",
"description": "揺らぎない害悪の種族\n\nエルデンリングDLC初見\nhttps://www.youtube.com/playlist?list=PLX5bu-e6MBQElgr2wCO8WthunAQmVdrBa\n\nエルデンリング-2度目の初見-\nhttps://www.youtube.com/playlist?list=PLX5bu-e6MBQHxq5ud8XTdeUNyMWZOXafl\n\nELDEN RING公式\nhhttps://www.eldenring.jp/index.html\n\nSHADOW OF THE ERDTREE公式\nhttps://www.eldenring.jp/shadowoftheerdtree.html\n\nエンディング曲:「笑顔のテーマ」【楽曲提供(株式会社 光サプライズ)】\n\n#eldenring #エルデンリング#shadowoftheerdtree",
"duration": 836,
"thumbnail": "https://i.ytimg.com/vi/QC2FJnwUzB8/sddefault.jpg",
"createdAt": "2024-07-26 10:50:10",
"updatedAt": "2024-07-26 10:50:10"
},
"timestamps": {
"id": "...",
"originalText": "0:36 視聴者は前回から火の雨の瞳膜をつけてるから「たった5hitで敵を倒せるなんてすごい!」という感想になる",
"cleanedText": "視聴者は前回から火の雨の瞳膜をつけてるから「たった5hitで敵を倒せるなんてすごい!」という感想になる",
"startsAt": 36,
"duration": 112,
"timestamp": "0:36",
"sourceType": "COMMENT",
"createdAt": "2024-07-26 10:50:10",
"updatedAt": "2024-07-26 10:50:10",
"videoId": "..."
},
"thumbnail_jobs": {
"id": "d3e6e3bb-3c35-45d2-ac12-a813e0109080",
"status": "COMPLETED",
"createdAt": "2024-07-26 10:50:10",
"updatedAt": "2024-07-26 10:50:10",
"videoId": "..."
}
}
]
[
{
"videos": {
"id": "...",
"sourceId": "...",
"title": "誰もが1回は悶絶するDLCで追加された極悪な敵モブ10体を紹介したい【ELDEN RING実況】",
"description": "揺らぎない害悪の種族\n\nエルデンリングDLC初見\nhttps://www.youtube.com/playlist?list=PLX5bu-e6MBQElgr2wCO8WthunAQmVdrBa\n\nエルデンリング-2度目の初見-\nhttps://www.youtube.com/playlist?list=PLX5bu-e6MBQHxq5ud8XTdeUNyMWZOXafl\n\nELDEN RING公式\nhhttps://www.eldenring.jp/index.html\n\nSHADOW OF THE ERDTREE公式\nhttps://www.eldenring.jp/shadowoftheerdtree.html\n\nエンディング曲:「笑顔のテーマ」【楽曲提供(株式会社 光サプライズ)】\n\n#eldenring #エルデンリング#shadowoftheerdtree",
"duration": 836,
"thumbnail": "https://i.ytimg.com/vi/QC2FJnwUzB8/sddefault.jpg",
"createdAt": "2024-07-26 10:50:10",
"updatedAt": "2024-07-26 10:50:10"
},
"timestamps": {
"id": "...",
"originalText": "0:36 視聴者は前回から火の雨の瞳膜をつけてるから「たった5hitで敵を倒せるなんてすごい!」という感想になる",
"cleanedText": "視聴者は前回から火の雨の瞳膜をつけてるから「たった5hitで敵を倒せるなんてすごい!」という感想になる",
"startsAt": 36,
"duration": 112,
"timestamp": "0:36",
"sourceType": "COMMENT",
"createdAt": "2024-07-26 10:50:10",
"updatedAt": "2024-07-26 10:50:10",
"videoId": "..."
},
"thumbnail_jobs": {
"id": "d3e6e3bb-3c35-45d2-ac12-a813e0109080",
"status": "COMPLETED",
"createdAt": "2024-07-26 10:50:10",
"updatedAt": "2024-07-26 10:50:10",
"videoId": "..."
}
}
]
So the videos is newly nested (didn't exist as a level before). And timestamps is not an array anymore. And thumbnail_jobs is not camelcase singular 😄
rphlmr ⚡
rphlmr ⚡5mo ago
https://orm.drizzle.team/docs/goodies#get-typed-table-columns
return db
.select( { ...getTableColumns(videos), thumbnailJob: getTableColumns(thumbnailJobs) } )
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);
return db
.select( { ...getTableColumns(videos), thumbnailJob: getTableColumns(thumbnailJobs) } )
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.orderBy(desc(videos.createdAt))
.limit(limit);
Drizzle ORM - Goodies
Drizzle ORM is a lightweight and performant TypeScript ORM with developer experience in mind.
rphlmr ⚡
rphlmr ⚡5mo ago
ohhh I think we miss something about timestamps here isn't it? can you have many timestamps for a video?
dhakan
dhakanOP5mo ago
Yes!!!
rphlmr ⚡
rphlmr ⚡5mo ago
so now you have as many videos as you have timestamp matches lol it's named a cardinality something so a groupby is required too So, to not lose your time, let's recap you have the same request time for both queries?
dhakan
dhakanOP5mo ago
I'm calcing the time elapsed with console.time and console.timeEnd and this is done locally
rphlmr ⚡
rphlmr ⚡5mo ago
are they equal or one is faster?
dhakan
dhakanOP5mo ago
When you say both queries, what do you mean? this one vs? there has been like 10 variants 😄
rphlmr ⚡
rphlmr ⚡5mo ago
the one using query vs the last with select
dhakan
dhakanOP5mo ago
Oh. The original one took 12 seconds or more. This one takes 300 ms (select is faster) but that was also without the indexes wait a second. They are about the same now. I think both are around 300 ms I'm lying to you...
// roughly 100 ms
return db
.select({
...getTableColumns(videos),
thumbnailJob: getTableColumns(thumbnailJobs),
})
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.groupBy(videos.id)
.orderBy(desc(videos.createdAt))
.limit(limit);
// roughly 100 ms
return db
.select({
...getTableColumns(videos),
thumbnailJob: getTableColumns(thumbnailJobs),
})
.from(videos)
.innerJoin(timestamps, eq(timestamps.videoId, videos.id))
.innerJoin(thumbnailJobs, eq(thumbnailJobs.videoId, videos.id))
.groupBy(videos.id)
.orderBy(desc(videos.createdAt))
.limit(limit);
the old one takes 300 ms (non-select)
rphlmr ⚡
rphlmr ⚡5mo ago
ok good (if I can say that lol) so it is useful I try to help you to finish this query with timestamps array
dhakan
dhakanOP5mo ago
Yes! I did try to add timestamps: getTableColumns(timestamps), but then it turned out to be 150ms, and it only returned an object
rphlmr ⚡
rphlmr ⚡5mo ago
oh I am curious about the returned values of this one (with groupBy) yes to array is not built in 😬 we need to agregate the results to group timestamps we can do that in plain javascript but the db can do that too (I'm sure for postgres, but sqlite ... 😄 )
dhakan
dhakanOP5mo ago
Trying to find the docs on this conversion... Any guidance? This query is way too slow 😦 I need to stop for now, but will continue to optimize this If I can't make this at least sub 50ms, then I feel something is off This database should be insanely quick to read... every step of the way is causing me problems. Never encountered something like this in the world of SQL. Always used postgres and maybe I should've stuck with that
rphlmr ⚡
rphlmr ⚡5mo ago
yes sorry I was building a helper:
import { SQL, sql } from "drizzle-orm";
import { SelectResultFields } from "drizzle-orm/query-builders/select.types";
import { SelectedFields } from "drizzle-orm/sqlite-core";

export function jsonObject<T extends SelectedFields>(shape: T) {
const chunks: SQL[] = [];

Object.entries(shape).forEach(([key, value]) => {
if (chunks.length > 0) {
chunks.push(sql.raw(`,`));
}

chunks.push(sql.raw(`'${key}',`));

chunks.push(sql`${value}`);
});

return sql<SelectResultFields<T>>`coalesce(json_object(${sql.join(
chunks,
)}), ${sql`json_object()`})`;
}

export function jsonAggObject<T extends SelectedFields>(shape: T) {
return sql<
SelectResultFields<T>[]
>`coalesce(json_group_array(${jsonObject(
shape,
)}), ${sql`json_array()`})`.mapWith(
(v) => JSON.parse(v) as SelectResultFields<T>[],
);
}

db
.select({
...getTableColumns(videos),
timestamps: jsonAggObject(getTableColumns(timestamps)),
})
import { SQL, sql } from "drizzle-orm";
import { SelectResultFields } from "drizzle-orm/query-builders/select.types";
import { SelectedFields } from "drizzle-orm/sqlite-core";

export function jsonObject<T extends SelectedFields>(shape: T) {
const chunks: SQL[] = [];

Object.entries(shape).forEach(([key, value]) => {
if (chunks.length > 0) {
chunks.push(sql.raw(`,`));
}

chunks.push(sql.raw(`'${key}',`));

chunks.push(sql`${value}`);
});

return sql<SelectResultFields<T>>`coalesce(json_object(${sql.join(
chunks,
)}), ${sql`json_object()`})`;
}

export function jsonAggObject<T extends SelectedFields>(shape: T) {
return sql<
SelectResultFields<T>[]
>`coalesce(json_group_array(${jsonObject(
shape,
)}), ${sql`json_array()`})`.mapWith(
(v) => JSON.parse(v) as SelectResultFields<T>[],
);
}

db
.select({
...getTableColumns(videos),
timestamps: jsonAggObject(getTableColumns(timestamps)),
})
dhakan
dhakanOP5mo ago
Hi again @Raphaël M (@rphlmr) ⚡ . Thanks again for the assistance. I tried your helper and it worked, but I couldn't manage to make it quicker than this:
// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

// Select only videos with at least 1 timestamp
return db.query.videos.findMany({
where: gt(numberOfTimestamps, 0),
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit,
extras: {
timestampsCount: sql`${numberOfTimestamps}`.as("timestampsCount"),
},
});
// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

// Select only videos with at least 1 timestamp
return db.query.videos.findMany({
where: gt(numberOfTimestamps, 0),
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit,
extras: {
timestampsCount: sql`${numberOfTimestamps}`.as("timestampsCount"),
},
});
This query takes roughly 130 ms for a limit of 12 videos (It turns out I don't actually need more than the count) 130 ms is still slow if you ask me... like, really slow haha but it's better than 300 that I had
rphlmr ⚡
rphlmr ⚡5mo ago
Hey thanks for reporting back. Could you share the generated sql of this request? (you can enable logger in drizzle https://orm.drizzle.team/docs/goodies#logging)
Drizzle ORM - Goodies
Drizzle ORM is a lightweight and performant TypeScript ORM with developer experience in mind.
dhakan
dhakanOP5mo ago
Does this help?
Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 158.676ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 117.11ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 113.088ms
Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 158.676ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 117.11ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 113.088ms
(default logging on localhost) Seems like these queries when running in production on Fly (same dataset) takes 350-400 ms 😦 (also with a limit of 12)
rphlmr ⚡
rphlmr ⚡5mo ago
It will depend on how far your server is from your database 😬 Formatted it gives that
SELECT "id",
"source_id",
"name",
"description",
"duration",
"thumbnail",
"created_at",
"updated_at",
(
SELECT COUNT("id")
FROM "timestamps"
WHERE "timestamps"."video_id" = "videos"."id"
) AS "timestampsCount"
FROM "videos"
WHERE (
SELECT COUNT("id")
FROM "timestamps"
WHERE "timestamps"."video_id" = "videos"."id"
) > 0
ORDER BY "videos"."created_at" DESC
LIMIT 12
SELECT "id",
"source_id",
"name",
"description",
"duration",
"thumbnail",
"created_at",
"updated_at",
(
SELECT COUNT("id")
FROM "timestamps"
WHERE "timestamps"."video_id" = "videos"."id"
) AS "timestampsCount"
FROM "videos"
WHERE (
SELECT COUNT("id")
FROM "timestamps"
WHERE "timestamps"."video_id" = "videos"."id"
) > 0
ORDER BY "videos"."created_at" DESC
LIMIT 12
for me, looks like there is too many sub select that query the same thing
dhakan
dhakanOP5mo ago
My db is running on a mounted volume in what’s called LiteFS. So every query runs on the same machine And the times printed above are from the ORM typescript code Hmmm, looks funky indeed!
rphlmr ⚡
rphlmr ⚡5mo ago
// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

// Select only videos with at least 1 timestamp
return db.query.videos.findMany({
where: gt(sql`"timestampsCount"`, 0),
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit,
extras: {
timestampsCount: sql`${numberOfTimestamps}`.as("timestampsCount"),
},
});
// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

// Select only videos with at least 1 timestamp
return db.query.videos.findMany({
where: gt(sql`"timestampsCount"`, 0),
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit,
extras: {
timestampsCount: sql`${numberOfTimestamps}`.as("timestampsCount"),
},
});
i wonder if this could work
dhakan
dhakanOP5mo ago
Oooh you’re using the extras field? I’ll try that when I get back home! Really curious how the deployed version of identical data takes 3 times longer Will have to investigate that
rphlmr ⚡
rphlmr ⚡5mo ago
Ok works as expected in drizzle run
// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

const timestampsCount = sql`${numberOfTimestamps}`
.mapWith(Number)
.as("timestampsCount");

await db.query.videos.findMany({
where: gt(timestampsCount, 0),
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit: 10,
extras: {
timestampsCount,
},
});
// Subquery for count of timestamps
const numberOfTimestamps = db
.select({ count: count(timestamps.id) })
.from(timestamps)
.where(eq(timestamps.videoId, videos.id));

const timestampsCount = sql`${numberOfTimestamps}`
.mapWith(Number)
.as("timestampsCount");

await db.query.videos.findMany({
where: gt(timestampsCount, 0),
orderBy: (videos, { desc }) => [desc(videos.createdAt)],
limit: 10,
extras: {
timestampsCount,
},
});
SELECT "id",
"name",
"created_at",
(
SELECT COUNT("id")
FROM "timestamps"
WHERE "timestamps"."video_id" = "videos"."id"
) AS "timestampsCount"
FROM "videos"
WHERE "timestampsCount" > 0
ORDER BY "videos"."created_at" DESC
LIMIT 12
SELECT "id",
"name",
"created_at",
(
SELECT COUNT("id")
FROM "timestamps"
WHERE "timestamps"."video_id" = "videos"."id"
) AS "timestampsCount"
FROM "videos"
WHERE "timestampsCount" > 0
ORDER BY "videos"."created_at" DESC
LIMIT 12
dhakan
dhakanOP5mo ago
Great! Is it any quicker?
rphlmr ⚡
rphlmr ⚡5mo ago
nope, you have to split your declaration if you want to prevent errors on the column alias ex: you rename timestampsCount extra, it will still works of course you can where: gt(sql"timestampsCount", 0), But be careful, you have no guard against a misspelling. My personal rule: whether to split or not, do what you think is best for your future self
dhakan
dhakanOP5mo ago
Thanks Raphael. I’ve been talking to people in the Fly community, and I got an answer like this:
litefs just primarily handles the global replication + write node to primary right? If you're reading from your local sqlite file, it should take microseconds for simple queries and single digit ms for complex ones. I have queries that join/filter/select json props and they average ~5ms.
Something seems very off here I’m going to try to see what happens without drizzle
rphlmr ⚡
rphlmr ⚡5mo ago
I think it is more a driver issue than Drizzle itself because drizzle only build queries that are then passed to the driver. (i can be wrong though) Interesting reporting you are sharing here! @Andrew Sherman are you aware of issues like this? SQLite on fly litefs, better sqlite 3 driver, a query that is very long with a simple join (better with select api but not that fast), column indexed.
dhakan
dhakanOP5mo ago
Someone speculated that my deployed variant being slower might be Fly machine config (shared cpu instead of performance), but that doesn’t explain the 130 ms one locally… I must have overlooked one important detail: FK:s are NOT automatically indexed. I didn't actually know this. After adding the FK index to timestamps.video_id these are the results:
Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where "timestampsCount" > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 3.937ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where "timestampsCount" > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 2.905ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where "timestampsCount" > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 2.351ms
Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where "timestampsCount" > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 3.937ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where "timestampsCount" > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 2.905ms

Query: select "id", "source_id", "name", "description", "duration", "thumbnail", "created_at", "updated_at", (select count("id") from "timestamps" where "timestamps"."video_id" = "videos"."id") as "timestampsCount" from "videos" where "timestampsCount" > ? order by "videos"."created_at" desc limit ? -- params: [0, 12]
selectVideos: 2.351ms
@Raphaël M (@rphlmr) ⚡ this is an improvement of astronomical proportions
rphlmr ⚡
rphlmr ⚡5mo ago
Wow ! Thanks for sharing. I thought they were too Actually they are implicitly on association table with a composite PK made with them, but it is not your case here. Today I learned! I know what to do after holidays on my client project 😅
dhakan
dhakanOP5mo ago
I learnt as well! Thanks a bunch for the back and forth. Really appreciated your help.
Want results from more Discord servers?
Add your server