KEBA

Installing required packages
“””

!pip install langchain
!pip install openai
!pip install docx2txt

“””Get an API key from https://openai.com/product “””

import os
os.environ[“OPENAI_API_KEY”] = “”

“””Subscribe for the free version https://www.pinecone.io/ and get an API Key
Importing necessary libraries
“””

!pip install pinecone-client

import os
import openai
import pinecone
from langchain.document_loaders import Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

“””Loading documents”””

file = ‘./data/mtd.docx’
loader = Docx2txtLoader(file)
documents = loader.load()

documents

“””Splitting documents”””

def split_docs(documents, chunk_size=1000, chunk_overlap=20):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
docs = text_splitter.split_documents(documents)
return docs

docs = split_docs(documents)
print(len(docs))

“””Embedding documents with OpenAI”””

!pip install tiktoken -q

embeddings = OpenAIEmbeddings()

query_result = embeddings.embed_query(“Hello world”)
len(query_result)

“””Vector search with Pinecone”””

pinecone.init(
api_key=””,
environment=””
)

index_name = “keba”

index = Pinecone.from_documents(docs, embeddings, index_name=index_name)

“”” Finding similar documents”””

def get_similiar_docs(query, k=2, score=False):
if score:
similar_docs = index.similarity_search_with_score(query, k=k)
else:
similar_docs = index.similarity_search(query, k=k)
return similar_docs

“””Question answering using LangChain and OpenAI LLM”””

model_name = “text-davinci-003”

model_name = “gpt-3.5-turbo”
model_name = “gpt-4”

llm = OpenAI(model_name=model_name)

chain = load_qa_chain(llm, chain_type=”stuff”)

def get_answer(query):
similar_docs = get_similiar_docs(query)
answer = chain.run(input_documents=similar_docs, question=query)
return answer

“””Example queries and answers”””

query = “In welchen Bereichen wird ausgebildet?”
answer = get_answer(query)
print(answer)