Gemini Agent
Build an agent using Google’s Gemini function calling API that pushes ggui UIs to collect structured data from users.
npm install @ggui-ai/mcp-client @google/generative-aiSet environment variables:
export GOOGLE_AI_API_KEY="AIza..."export GGUI_API_KEY="ggui_sk_..."export GGUI_APP_ID="app_..."import { GoogleGenerativeAI } from "@google/generative-ai";import { GguiClient } from "@ggui-ai/mcp-client";
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_AI_API_KEY!);const ggui = new GguiClient({ apiKey: process.env.GGUI_API_KEY!, appId: process.env.GGUI_APP_ID!,});
// Define ggui as a Gemini function declarationconst tools = [ { functionDeclarations: [ { name: "show_ui", description: "Show an interactive UI to the user to collect structured data, display information, or present choices.", parameters: { type: "object", properties: { prompt: { type: "string", description: "Natural language description of the UI to generate", }, context: { type: "object", description: "Data to pass into the UI", }, }, required: ["prompt"], }, }, ], },];
async function showUi(args: { prompt: string; context?: Record<string, unknown>;}): Promise<string> { const { sessionId, url } = await ggui.push({ story: { intent: args.prompt, prompt: args.prompt, context: args.context, }, });
console.log(`\n--- UI Ready: ${url} ---\n`);
const events = await ggui.waitForCompletion(sessionId, { pollInterval: 2000, maxWait: 300_000, });
await ggui.close(sessionId);
const submitEvent = events.find((e) => e.type === "data:submit"); return JSON.stringify( submitEvent ? { status: "submitted", data: submitEvent.payload } : { status: "no_submission" } );}
async function chat(userMessage: string) { console.log(`\nUser: ${userMessage}`);
const model = genAI.getGenerativeModel({ model: "gemini-2.5-flash", tools, systemInstruction: "You are a helpful assistant. Use the show_ui function when you need to collect structured data from the user.", });
const chatSession = model.startChat(); let result = await chatSession.sendMessage(userMessage); let response = result.response;
// Handle function calls in a loop while (response.candidates?.[0]?.content?.parts) { const functionCallPart = response.candidates[0].content.parts.find( (part) => "functionCall" in part );
if (!functionCallPart || !("functionCall" in functionCallPart)) break;
const { name, args } = functionCallPart.functionCall;
if (name === "show_ui") { const toolResult = await showUi( args as { prompt: string; context?: Record<string, unknown> } );
result = await chatSession.sendMessage([ { functionResponse: { name: "show_ui", response: JSON.parse(toolResult), }, }, ]); response = result.response; } else { break; } }
const textPart = response.candidates?.[0]?.content?.parts?.find((part) => "text" in part); console.log(`\nAssistant: ${textPart && "text" in textPart ? textPart.text : "(no response)"}`);}
chat("I need to schedule a meeting with my team for next week").catch(console.error);npx tsx gemini-agent.tsKey Differences from Claude/OpenAI Examples
Section titled “Key Differences from Claude/OpenAI Examples”- Uses
@google/generative-aiSDK withfunctionDeclarationsformat - Function call results are sent as
functionResponseparts - Uses
model.startChat()for multi-turn conversation - The ggui SDK usage is identical —
push(),waitForCompletion(),close()