Skip to main content
With the Chat API’s memory option, the SDK handles context retrieval, prompt construction, and LLM calls in one step. No need to build a RAG pipeline yourself.
import { NdxClient } from '@neuradex/sdk';

const client = new NdxClient({
  apiKey: process.env.NEURADEX_API_KEY,
  projectId: process.env.NEURADEX_PROJECT_ID,
});

async function ragAnswer(question: string): Promise<string> {
  const stream = client.chat.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: 'Answer the question based on the following information. If information is insufficient, say so honestly.',
      },
      { role: 'user', content: question },
    ],
    memory: {
      enabled: true,
      maxTokens: 3000,
      includeEpisodes: true,
    },
  });

  return await stream.text;
}
Chat API internally calls Memory API’s getContext() and auto-injects the result into the system message. The code above achieves the same result as the manual RAG pipeline below.

Tool Auto-Execution Agent

With Chat API’s automatic tool execution, the SDK handles the loop of LLM calling tools, receiving results, and re-inferring.
const stream = client.chat.create({
  model: 'gpt-4o',
  messages: [
    { role: 'system', content: 'You are an internal help desk assistant.' },
    { role: 'user', content: 'Check the booking status for Meeting Room A today' },
  ],
  tools: {
    checkRoomAvailability: {
      description: 'Check meeting room booking status',
      parameters: {
        type: 'object',
        properties: {
          room: { type: 'string', description: 'Room name' },
          date: { type: 'string', format: 'date' },
        },
        required: ['room'],
      },
      execute: async ({ room, date }) => {
        const bookings = await roomApi.getBookings(room, date);
        return JSON.stringify(bookings);
      },
    },
  },
  memory: { enabled: true },
  maxToolRoundtrips: 3,
});

// Track tool execution in real-time
for await (const event of stream.fullStream) {
  if (event.type === 'tool-call') console.log(`Executing: ${event.name}`);
  if (event.type === 'text-delta') process.stdout.write(event.textDelta);
}

RAG Pipeline (Manual)

A typical pattern for implementing RAG (Retrieval Augmented Generation) with an external LLM. This uses the Memory API for context retrieval and the Episodes API for recording Q&A history.
import { NdxClient } from '@neuradex/sdk';
import OpenAI from 'openai';

const neuradex = new NdxClient({
  apiKey: process.env.NEURADEX_API_KEY,
  projectId: process.env.NEURADEX_PROJECT_ID,
});

const openai = new OpenAI();

async function ragAnswer(question: string): Promise<string> {
  // 1. Get context
  const context = await neuradex.memory.getContext(question, {
    tokenBudget: 3000,
    includeEpisodes: true,
    maxDepth: 2,
  });

  // 2. Generate answer with LLM
  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `Answer the question based on the following information.
If information is insufficient, say so honestly.

${context.formatted}`,
      },
      { role: 'user', content: question },
    ],
  });

  const answer = response.choices[0].message.content ?? '';

  // 3. Record Q&A as episodes (for learning)
  const questionEpisode = await neuradex.episodes.create({
    actorType: 'user',
    episodeType: 'question',
    content: question,
    scopeType: 'project',
    scopeId: process.env.NEURADEX_PROJECT_ID,
    channel: 'api',
  });

  await neuradex.episodes.create({
    actorType: 'agent',
    episodeType: 'answer',
    content: answer,
    scopeType: 'project',
    scopeId: process.env.NEURADEX_PROJECT_ID,
    channel: 'api',
    parentEpisodeId: questionEpisode.id,
  });

  return answer;
}

Customer Support Bot

A pattern that maintains conversation history and generates context-aware responses, using Knowledge for reference information.
interface ChatSession {
  sessionId: string;
  userId: string;
}

async function handleCustomerQuery(
  query: string,
  session: ChatSession
): Promise<{ answer: string; sources: string[] }> {
  // 1. Get past conversations for the session
  const history = await neuradex.episodes.getBySession(session.sessionId);

  // 2. Get context
  const context = await neuradex.memory.getContext(query, {
    tokenBudget: 4000,
    includeEpisodes: true,
  });

  // 3. Build prompt combining conversation history and context
  const conversationHistory = history.data
    .map(e => `${e.episodeType === 'question' ? 'Customer' : 'Agent'}: ${e.content}`)
    .join('\n');

  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [
      {
        role: 'system',
        content: `You are a helpful customer support agent.

## Reference Information
${context.formatted}

## Conversation History
${conversationHistory}`,
      },
      { role: 'user', content: query },
    ],
  });

  const answer = response.choices[0].message.content ?? '';

  // 4. Record conversation
  const questionEpisode = await neuradex.episodes.create({
    actorType: 'user',
    actorId: session.userId,
    episodeType: 'question',
    content: query,
    scopeType: 'project',
    scopeId: process.env.NEURADEX_PROJECT_ID,
    channel: 'widget',
    sessionId: session.sessionId,
  });

  await neuradex.episodes.create({
    actorType: 'agent',
    actorName: 'Support Bot',
    episodeType: 'answer',
    content: answer,
    scopeType: 'project',
    scopeId: process.env.NEURADEX_PROJECT_ID,
    channel: 'widget',
    sessionId: session.sessionId,
    parentEpisodeId: questionEpisode.id,
  });

  // 5. Return referenced knowledge titles
  const sources = context.items
    .filter(item => item.type === 'knowledge')
    .map(item => item.title ?? item.id);

  return { answer, sources };
}

Bulk FAQ Import

A pattern for bulk importing FAQs from CSV or JSON.
import { parse } from 'csv-parse/sync';
import fs from 'fs';

interface FaqRecord {
  question: string;
  answer: string;
  category: string;
}

async function importFaqFromCsv(filePath: string) {
  const csv = fs.readFileSync(filePath, 'utf-8');
  const records: FaqRecord[] = parse(csv, { columns: true });

  // Process in chunks (for large datasets)
  const chunkSize = 50;
  const chunks = [];
  for (let i = 0; i < records.length; i += chunkSize) {
    chunks.push(records.slice(i, i + chunkSize));
  }

  let totalCreated = 0;

  for (const chunk of chunks) {
    const items = chunk.map(record => ({
      title: record.question,
      content: record.answer,
      tags: ['faq', record.category],
    }));

    const created = await neuradex.knowledge.bulkCreate(items);
    totalCreated += created.length;

    console.log(`Progress: ${totalCreated}/${records.length}`);
  }

  console.log(`Import complete: ${totalCreated} items`);
}

Two-Step Search Pattern

A pattern that retrieves lightweight search results first, then fetches details as needed.
search() returns only title, tags, and score, making it fast. Call get() only when full content is needed to optimize performance.
async function searchWithDetails(query: string, topK: number = 5) {
  // 1. Lightweight search (fast)
  const results = await neuradex.knowledge.search(query, { limit: topK });

  console.log(`Found ${results.length} results`);

  // 2. Get details only for high-scoring results
  const threshold = 0.7;
  const relevantResults = results.filter(r => r.score >= threshold);

  const detailedResults = await Promise.all(
    relevantResults.map(async result => {
      const detail = await neuradex.knowledge.get(result.id);
      return {
        ...result,
        content: detail.content,
        connectedKnowledge: detail.connectedKnowledge,
      };
    })
  );

  return detailedResults;
}

Error Handling

Recommended error handling pattern for production environments.
interface ActionResult<T> {
  data?: T;
  error?: string;
}

async function safeApiCall<T>(
  fn: () => Promise<T>
): Promise<ActionResult<T>> {
  try {
    const data = await fn();
    return { data };
  } catch (error) {
    const message = error instanceof Error ? error.message : 'Unknown error';
    console.error('API Error:', message);
    return { error: message };
  }
}

// Usage
async function searchKnowledge(query: string) {
  const result = await safeApiCall(() =>
    neuradex.knowledge.search(query, { limit: 10 })
  );

  if (result.error) {
    // Handle error (display in UI, retry, etc.)
    return [];
  }

  return result.data;
}

Best Practices

  • Small models (GPT-3.5, etc.): tokenBudget: 2000
  • Large models (GPT-4, etc.): tokenBudget: 4000-8000
  • Disable episodes with includeEpisodes: false when not needed
  • Use search() first to narrow down candidates
  • Only use get() when details are needed
  • memory.getContext() is optimal for complex questions
  • Record Q&A for learning data
  • Group conversations with sessionId
  • Link questions and answers with parentEpisodeId
  • Chunk large data with bulkCreate()
  • Rebuild index with tasks.registerReindex() after updates

Next Steps

Chat API

Chat Completions with memory

Knowledge API

Detailed knowledge operations

Memory API

Context assembly details