Parent Document Retriever in Xata

Hello. Is it possible to organize Parent Document Retriever in Xata? https://python.langchain.com/v0.1/docs/modules/data_connection/retrievers/parent_document_retriever/
Parent Document Retriever | 🦜️🔗 LangChain
When splitting documents for retrieval, there are often conflicting desires:
2 Replies
kostas
kostas7mo ago
Hi, I gave it a try. Looks like ParentDocumentRetriever works just fine with Xata as the vectorstore. Just need to create a string column "doc_id" in Xata to support the generated child document ids - on top of the standard "embedding" (vector), "content" (text) and "source" (string) columns. Here's an example:
import os
from langchain.storage import InMemoryStore
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.retrievers import ParentDocumentRetriever

from langchain_core.documents import Document
from langchain_community.vectorstores.xata import XataVectorStore
from langchain_openai import OpenAIEmbeddings

api_key = os.environ["XATA_API_KEY"]
db_url = "https://ws-123456.us-east-1.xata.sh/db/test:main"

loaders = [
TextLoader("./lotr.txt"),
]
docs = []
for loader in loaders:
docs.extend(loader.load())

embeddings = OpenAIEmbeddings()

vectorstore = XataVectorStore.from_documents(
docs, embeddings, api_key=api_key, db_url=db_url, table_name="vectors"
)

# The storage layer for the parent documents
store = InMemoryStore()
retriever = ParentDocumentRetriever(
vectorstore=vectorstore,
docstore=store,
child_splitter=RecursiveCharacterTextSplitter(chunk_size=400),
)
retriever.add_documents(docs, ids=None)
print(list(store.yield_keys()))
sub_docs = vectorstore.similarity_search("hobbit")
print(sub_docs[0].page_content)
import os
from langchain.storage import InMemoryStore
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.retrievers import ParentDocumentRetriever

from langchain_core.documents import Document
from langchain_community.vectorstores.xata import XataVectorStore
from langchain_openai import OpenAIEmbeddings

api_key = os.environ["XATA_API_KEY"]
db_url = "https://ws-123456.us-east-1.xata.sh/db/test:main"

loaders = [
TextLoader("./lotr.txt"),
]
docs = []
for loader in loaders:
docs.extend(loader.load())

embeddings = OpenAIEmbeddings()

vectorstore = XataVectorStore.from_documents(
docs, embeddings, api_key=api_key, db_url=db_url, table_name="vectors"
)

# The storage layer for the parent documents
store = InMemoryStore()
retriever = ParentDocumentRetriever(
vectorstore=vectorstore,
docstore=store,
child_splitter=RecursiveCharacterTextSplitter(chunk_size=400),
)
retriever.add_documents(docs, ids=None)
print(list(store.yield_keys()))
sub_docs = vectorstore.similarity_search("hobbit")
print(sub_docs[0].page_content)
Андрей
АндрейOP6mo ago
Thank you!!!
Want results from more Discord servers?
Add your server