Skip to content

Gemini Agent

Build an agent using Google’s Gemini function calling API that pushes ggui UIs to collect structured data from users.

Terminal window
npm install @ggui-ai/mcp-client @google/generative-ai

Set environment variables:

Terminal window
export GOOGLE_AI_API_KEY="AIza..."
export GGUI_API_KEY="ggui_sk_..."
export GGUI_APP_ID="app_..."
gemini-agent.ts
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GguiClient } from "@ggui-ai/mcp-client";
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_AI_API_KEY!);
const ggui = new GguiClient({
apiKey: process.env.GGUI_API_KEY!,
appId: process.env.GGUI_APP_ID!,
});
// Define ggui as a Gemini function declaration
const tools = [
{
functionDeclarations: [
{
name: "show_ui",
description:
"Show an interactive UI to the user to collect structured data, display information, or present choices.",
parameters: {
type: "object",
properties: {
prompt: {
type: "string",
description: "Natural language description of the UI to generate",
},
context: {
type: "object",
description: "Data to pass into the UI",
},
},
required: ["prompt"],
},
},
],
},
];
async function showUi(args: {
prompt: string;
context?: Record<string, unknown>;
}): Promise<string> {
const { sessionId, url } = await ggui.push({
story: {
intent: args.prompt,
prompt: args.prompt,
context: args.context,
},
});
console.log(`\n--- UI Ready: ${url} ---\n`);
const events = await ggui.waitForCompletion(sessionId, {
pollInterval: 2000,
maxWait: 300_000,
});
await ggui.close(sessionId);
const submitEvent = events.find((e) => e.type === "data:submit");
return JSON.stringify(
submitEvent ? { status: "submitted", data: submitEvent.payload } : { status: "no_submission" }
);
}
async function chat(userMessage: string) {
console.log(`\nUser: ${userMessage}`);
const model = genAI.getGenerativeModel({
model: "gemini-2.5-flash",
tools,
systemInstruction:
"You are a helpful assistant. Use the show_ui function when you need to collect structured data from the user.",
});
const chatSession = model.startChat();
let result = await chatSession.sendMessage(userMessage);
let response = result.response;
// Handle function calls in a loop
while (response.candidates?.[0]?.content?.parts) {
const functionCallPart = response.candidates[0].content.parts.find(
(part) => "functionCall" in part
);
if (!functionCallPart || !("functionCall" in functionCallPart)) break;
const { name, args } = functionCallPart.functionCall;
if (name === "show_ui") {
const toolResult = await showUi(
args as { prompt: string; context?: Record<string, unknown> }
);
result = await chatSession.sendMessage([
{
functionResponse: {
name: "show_ui",
response: JSON.parse(toolResult),
},
},
]);
response = result.response;
} else {
break;
}
}
const textPart = response.candidates?.[0]?.content?.parts?.find((part) => "text" in part);
console.log(`\nAssistant: ${textPart && "text" in textPart ? textPart.text : "(no response)"}`);
}
chat("I need to schedule a meeting with my team for next week").catch(console.error);
Terminal window
npx tsx gemini-agent.ts

Key Differences from Claude/OpenAI Examples

Section titled “Key Differences from Claude/OpenAI Examples”
  • Uses @google/generative-ai SDK with functionDeclarations format
  • Function call results are sent as functionResponse parts
  • Uses model.startChat() for multi-turn conversation
  • The ggui SDK usage is identical — push(), waitForCompletion(), close()