# âœ… RAG JuJutsu PoC (Notebook with Joblib, FAISS, ChatGPT API)

In [None]:

!pip install --quiet openai langchain faiss-cpu PyPDF2 sentence-transformers joblib
!pip install ipywidgets==7.7.2
!jupyter nbextension enable --py widgetsnbextension
!jupyter notebook


usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: console dejavu events execute kernel kernelspec lab
labextension labhub migrate nbconvert notebook qtconsole run server
troubleshoot trust

Jupyter command `jupyter-nbextension` not found.
[32m[I 2025-06-14 22:49:12.578 ServerApp][m Package notebook took 0.0000s to import

In [1]:

from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def load_pdf_chunks(pdf_path):
    reader = PdfReader(pdf_path)
    raw_text = ""
    for page in reader.pages:
        raw_text += page.extract_text() + "\n"

    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    return splitter.split_text(raw_text)

chunks = load_pdf_chunks("JuJutsu-Contexto-Significado-Conexiones-Historia.pdf")
print(f"Loaded {len(chunks)} chunks")


Loaded 329 chunks


In [3]:

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import joblib

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
embeddings = model.encode(chunks)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))

joblib.dump((model, chunks, index), "rag_model.joblib")
print("Model, chunks, and index serialized to rag_model.joblib")


Model, chunks, and index serialized to rag_model.joblib


In [5]:

import joblib

model, chunks, index = joblib.load("rag_model.joblib")
print("Model, chunks, and index loaded from rag_model.joblib")


Model, chunks, and index loaded from rag_model.joblib


In [7]:

def search(query, k=3):
    query_vec = model.encode([query])
    scores, indices = index.search(np.array(query_vec), k)
    return [chunks[i] for i in indices[0]]


In [23]:

import os

import openai
from openai import OpenAI

os.environ["OPENAI_API_KEY"] = "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"


client = OpenAI()  # Uses env variable OPENAI_API_KEY

def chat_no_rag(question):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": question}
        ],
        temperature=0.5,
        max_tokens=200,  # ðŸ‘ˆ Limit output to ~150-200 words

    )
    return response.choices[0].message.content

def chat_with_rag(question, retrieved_chunks):
    context = "\n".join(retrieved_chunks)
    prompt = f"Usa el siguiente contexto para responder la pregunta:\n\n{context}\n\nPregunta: {question}"

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.3,
        max_tokens=200,  # ðŸ‘ˆ Limit output to ~150-200 words
    )
    return response.choices[0].message.content


def chat_with_rag_enhanced(question, retrieved_chunks):
    context = "\n".join(retrieved_chunks)
    prompt = (
        "Eres un experto en historia marcial. "
        "Usa el siguiente contexto histÃ³rico para responder con precisiÃ³n y detalle.\n\n"
        f"Contexto:\n{context}\n\n"
        f"Pregunta: {question}\nRespuesta:"
    )

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
        max_tokens=200,  # ðŸ‘ˆ Limit output to ~150-200 words
    )
    return response.choices[0].message.content






In [25]:

# Example query
query = "Â¿CuÃ¡l es el origen del JuJutsu en JapÃ³n?"
retrieved = search(query)

print("ðŸ”¹ Sin RAG:")
print(chat_no_rag(query))

print("\nðŸ”¹ Con RAG:")
print(chat_with_rag(query, retrieved))

print("\nðŸ”¹ Con RAG + Prompt mejorado:")
print(chat_with_rag_enhanced(query, retrieved))


ðŸ”¹ Sin RAG:


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}