إنشاء الصور باستخدام Gemini

يمكن لـ Gemini إنشاء الصور ومعالجتها بشكل حواري. يمكنك تقديم طلب إلى Gemini باستخدام نص أو صور أو مزيج من الاثنين معًا لإنجاز مهام مختلفة متعلقة بالصور، مثل إنشاء الصور وتعديلها. تتضمّن جميع الصور التي يتم إنشاؤها علامة مائية من SynthID.

قد لا تتوفّر ميزة إنشاء الصور في جميع المناطق والبلدان، لذا يُرجى مراجعة صفحة نماذج Gemini للحصول على مزيد من المعلومات.

إنشاء الصور (من نص إلى صورة)

يوضّح الرمز التالي كيفية إنشاء صورة استنادًا إلى طلب وصفي. يجب تضمين responseModalities: ["TEXT", "IMAGE"] في الإعدادات. لا تتوافق هذه الطُرز مع إخراج الصور فقط.

Python

from google import genai
from google.genai import types
from PIL import Image
from io import BytesIO
import base64

client = genai.Client()

contents = ('Hi, can you create a 3d rendered image of a pig '
            'with wings and a top hat flying over a happy '
            'futuristic scifi city with lots of greenery?')

response = client.models.generate_content(
    model="gemini-2.0-flash-preview-image-generation",
    contents=contents,
    config=types.GenerateContentConfig(
      response_modalities=['TEXT', 'IMAGE']
    )
)

for part in response.candidates[0].content.parts:
  if part.text is not None:
    print(part.text)
  elif part.inline_data is not None:
    image = Image.open(BytesIO((part.inline_data.data)))
    image.save('gemini-native-image.png')
    image.show()

JavaScript

import { GoogleGenAI, Modality } from "@google/genai";
import * as fs from "node:fs";

async function main() {

  const ai = new GoogleGenAI({});

  const contents =
    "Hi, can you create a 3d rendered image of a pig " +
    "with wings and a top hat flying over a happy " +
    "futuristic scifi city with lots of greenery?";

  // Set responseModalities to include "Image" so the model can generate  an image
  const response = await ai.models.generateContent({
    model: "gemini-2.0-flash-preview-image-generation",
    contents: contents,
    config: {
      responseModalities: [Modality.TEXT, Modality.IMAGE],
    },
  });
  for (const part of response.candidates[0].content.parts) {
    // Based on the part type, either show the text or save the image
    if (part.text) {
      console.log(part.text);
    } else if (part.inlineData) {
      const imageData = part.inlineData.data;
      const buffer = Buffer.from(imageData, "base64");
      fs.writeFileSync("gemini-native-image.png", buffer);
      console.log("Image saved as gemini-native-image.png");
    }
  }
}

main();

انتقال

package main

import (
  "context"
  "fmt"
  "os"
  "google.golang.org/genai"
)

func main() {

  ctx := context.Background()
  client, err := genai.NewClient(ctx, nil)
  if err != nil {
      log.Fatal(err)
  }

  config := &genai.GenerateContentConfig{
      ResponseModalities: []string{"TEXT", "IMAGE"},
  }

  result, _ := client.Models.GenerateContent(
      ctx,
      "gemini-2.0-flash-preview-image-generation",
      genai.Text("Hi, can you create a 3d rendered image of a pig " +
                 "with wings and a top hat flying over a happy " +
                 "futuristic scifi city with lots of greenery?"),
      config,
  )

  for _, part := range result.Candidates[0].Content.Parts {
      if part.Text != "" {
          fmt.Println(part.Text)
      } else if part.InlineData != nil {
          imageBytes := part.InlineData.Data
          outputFilename := "gemini_generated_image.png"
          _ = os.WriteFile(outputFilename, imageBytes, 0644)
      }
  }
}

REST

curl -s -X POST
  "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-preview-image-generation:generateContent" \
  -H "x-goog-api-key: $GEMINI_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "contents": [{
      "parts": [
        {"text": "Hi, can you create a 3d rendered image of a pig with wings and a top hat flying over a happy futuristic scifi city with lots of greenery?"}
      ]
    }],
    "generationConfig":{"responseModalities":["TEXT","IMAGE"]}
  }' \
  | grep -o '"data": "[^"]*"' \
  | cut -d'"' -f4 \
  | base64 --decode > gemini-native-image.png

صورة من إنشاء الذكاء الاصطناعي لخنزير طائر خيالي

تعديل الصور (تحويل النص والصورة إلى صورة)

لتعديل صورة، أضِف صورة كإدخال. يوضّح المثال التالي كيفية تحميل صور مشفّرة بتشفير base64. بالنسبة إلى الصور المتعددة والحِزم الأكبر حجمًا، راجِع قسم إدخال الصور.

Python

from google import genai
from google.genai import types
from PIL import Image
from io import BytesIO

import PIL.Image

image = PIL.Image.open('/path/to/image.png')

client = genai.Client()

text_input = ('Hi, This is a picture of me.'
            'Can you add a llama next to me?',)

response = client.models.generate_content(
    model="gemini-2.0-flash-preview-image-generation",
    contents=[text_input, image],
    config=types.GenerateContentConfig(
      response_modalities=['TEXT', 'IMAGE']
    )
)

for part in response.candidates[0].content.parts:
  if part.text is not None:
    print(part.text)
  elif part.inline_data is not None:
    image = Image.open(BytesIO((part.inline_data.data)))
    image.show()

JavaScript

import { GoogleGenAI, Modality } from "@google/genai";
import * as fs from "node:fs";

async function main() {

  const ai = new GoogleGenAI({});

  // Load the image from the local file system
  const imagePath = "path/to/image.png";
  const imageData = fs.readFileSync(imagePath);
  const base64Image = imageData.toString("base64");

  // Prepare the content parts
  const contents = [
    { text: "Can you add a llama next to the image?" },
    {
      inlineData: {
        mimeType: "image/png",
        data: base64Image,
      },
    },
  ];

  // Set responseModalities to include "Image" so the model can generate an image
  const response = await ai.models.generateContent({
    model: "gemini-2.0-flash-preview-image-generation",
    contents: contents,
    config: {
      responseModalities: [Modality.TEXT, Modality.IMAGE],
    },
  });
  for (const part of response.candidates[0].content.parts) {
    // Based on the part type, either show the text or save the image
    if (part.text) {
      console.log(part.text);
    } else if (part.inlineData) {
      const imageData = part.inlineData.data;
      const buffer = Buffer.from(imageData, "base64");
      fs.writeFileSync("gemini-native-image.png", buffer);
      console.log("Image saved as gemini-native-image.png");
    }
  }
}

main();

انتقال

package main

import (
 "context"
 "fmt"
 "os"
 "google.golang.org/genai"
)

func main() {

 ctx := context.Background()
 client, err := genai.NewClient(ctx, nil)
 if err != nil {
     log.Fatal(err)
 }

 imagePath := "/path/to/image.png"
 imgData, _ := os.ReadFile(imagePath)

 parts := []*genai.Part{
   genai.NewPartFromText("Hi, This is a picture of me. Can you add a llama next to me?"),
   &genai.Part{
     InlineData: &genai.Blob{
       MIMEType: "image/png",
       Data:     imgData,
     },
   },
 }

 contents := []*genai.Content{
   genai.NewContentFromParts(parts, genai.RoleUser),
 }

 config := &genai.GenerateContentConfig{
     ResponseModalities: []string{"TEXT", "IMAGE"},
 }

 result, _ := client.Models.GenerateContent(
     ctx,
     "gemini-2.0-flash-preview-image-generation",
     contents,
     config,
 )

 for _, part := range result.Candidates[0].Content.Parts {
     if part.Text != "" {
         fmt.Println(part.Text)
     } else if part.InlineData != nil {
         imageBytes := part.InlineData.Data
         outputFilename := "gemini_generated_image.png"
         _ = os.WriteFile(outputFilename, imageBytes, 0644)
     }
 }
}

REST

IMG_PATH=/path/to/your/image1.jpeg

if [[ "$(base64 --version 2>&1)" = *"FreeBSD"* ]]; then
  B64FLAGS="--input"
else
  B64FLAGS="-w0"
fi

IMG_BASE64=$(base64 "$B64FLAGS" "$IMG_PATH" 2>&1)

curl -X POST \
  "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-preview-image-generation:generateContent" \
    -H "x-goog-api-key: $GEMINI_API_KEY" \
    -H 'Content-Type: application/json' \
    -d "{
      \"contents\": [{
        \"parts\":[
            {\"text\": \"'Hi, This is a picture of me. Can you add a llama next to me\"},
            {
              \"inline_data\": {
                \"mime_type\":\"image/jpeg\",
                \"data\": \"$IMG_BASE64\"
              }
            }
        ]
      }],
      \"generationConfig\": {\"responseModalities\": [\"TEXT\", \"IMAGE\"]}
    }"  \
  | grep -o '"data": "[^"]*"' \
  | cut -d'"' -f4 \
  | base64 --decode > gemini-edited-image.png

أوضاع أخرى لإنشاء الصور

يتيح Gemini أوضاعًا أخرى للتفاعل مع الصور استنادًا إلى بنية الطلب والسياق، بما في ذلك:

النص إلى صور والنص (متداخل): يعرض صورًا مع نص ذي صلة.
- مثال على الطلب: "أريد إنشاء وصفة مصوّرة لتحضير البايلا".
الصور والنص إلى صور ونص (متداخل): تستخدم هذه الميزة الصور والنصوص المُدخَلة لإنشاء صور ونصوص جديدة ذات صلة.
- مثال على الطلب: (مع صورة لغرفة مفروشة) "ما هي ألوان الأرائك الأخرى التي يمكن استخدامها في مساحتي؟ هل يمكنك تعديل الصورة؟"
تعديل الصور في عدة مراحل (محادثة): مواصلة إنشاء الصور أو تعديلها بشكل تفاعلي
- أمثلة على الطلبات: [حمِّل صورة سيارة زرقاء.] ، "حوِّل هذه السيارة إلى سيارة مكشوفة"، "غيِّر اللون إلى الأصفر".

القيود

للحصول على أفضل أداء، استخدِم اللغات التالية: الإنجليزية والإسبانية (المكسيك) واليابانية والصينية والهندية.
لا تتيح ميزة إنشاء الصور إدخال مقاطع صوتية أو فيديوهات.
قد لا يتم تشغيل ميزة إنشاء الصور في الحالات التالية:
- قد يعرض النموذج نصًا فقط. جرِّب أن تطلب بشكل صريح الحصول على نتائج على شكل صور (مثلاً، "أنشئ صورة" أو "قدِّم صورًا أثناء المحادثة" أو "عدِّل الصورة").
- قد يتوقف النموذج عن إنشاء الرد في منتصف العملية. يُرجى إعادة المحاولة أو تجربة طلب مختلف.
عند إنشاء نص لصورة، يعمل Gemini بشكل أفضل إذا أنشأت النص أولاً ثم طلبت صورة تتضمّن النص.
لا تتوفّر ميزة "إنشاء الصور" في بعض المناطق أو البلدان. يمكنك الاطّلاع على الطُرز لمزيد من المعلومات.

حالات استخدام Imagen

بالإضافة إلى استخدام إمكانات إنشاء الصور المضمّنة في Gemini، يمكنك أيضًا الوصول إلى Imagen، نموذجنا المتخصّص في إنشاء الصور، من خلال Gemini API.

اختَر Gemini في الحالات التالية:

أنت بحاجة إلى صور ذات صلة بالسياق تستفيد من المعرفة العالمية والمنطق.
من المهم دمج النصوص والصور بسلاسة.
إذا كنت تريد تضمين مرئيات دقيقة في تسلسلات نصية طويلة
عندما تريد تعديل الصور بشكل حواري مع الحفاظ على السياق

اختَر Imagen في الحالات التالية:

تُعدّ جودة الصورة أو الواقعية الفوتوغرافية أو التفاصيل الفنية أو الأساليب المحدّدة (مثل الانطباعية أو الأنمي) من أهم الأولويات.
تنفيذ مهام تعديل متخصّصة، مثل تعديل خلفية المنتج أو زيادة كثافة بكسل الصورة
إضافة هوية العلامة التجارية أو الأسلوب أو إنشاء شعارات وتصاميم منتجات

ننصحك باستخدام Imagen 4 لإنشاء الصور باستخدام Imagen. اختَر Imagen 4 Ultra لحالات الاستخدام المتقدّمة أو عندما تحتاج إلى أفضل جودة للصور. يُرجى العِلم أنّه لا يمكن لـ Imagen 4 Ultra إنشاء أكثر من صورة واحدة في المرة الواحدة.

الخطوات التالية

اطّلِع على دليل Veo لمعرفة كيفية إنشاء فيديوهات باستخدام Gemini API.
لمزيد من المعلومات حول نماذج Gemini، يمكنك الاطّلاع على نماذج Gemini والنماذج التجريبية.