updated backend code for removal of dall-e

Ragini-Microsoft · Ragini-Microsoft · commit 487b0689fbe1 · 2026-02-16T18:26:04.000+05:30
diff --git a/content-gen/src/backend/agents/__init__.py b/content-gen/src/backend/agents/__init__.py
@@ -4,9 +4,8 @@
 This package provides utility functions used by the orchestrator.
 """
 
-from agents.image_content_agent import generate_dalle_image, generate_image
+from agents.image_content_agent import generate_image
 
 __all__ = [
-    "generate_dalle_image",
     "generate_image",
 ]
diff --git a/content-gen/src/backend/agents/image_content_agent.py b/content-gen/src/backend/agents/image_content_agent.py
@@ -1,7 +1,7 @@
-"""Image Content Agent - Generates marketing images via DALL-E 3, gpt-image-1, or gpt-image-1.5.
+"""Image Content Agent - Generates marketing images.
 
 Provides the generate_image function used by the orchestrator
-to create marketing images using either DALL-E 3, gpt-image-1, or gpt-image-1.5.
+to create marketing images using the image generation model.
 """
 
 import logging
@@ -14,9 +14,9 @@
 logger = logging.getLogger(__name__)
 
 
-def _truncate_for_dalle(product_description: str, max_chars: int = 1500) -> str:
+def _truncate_for_image(product_description: str, max_chars: int = 1500) -> str:
     """
-    Truncate product descriptions to fit DALL-E's 4000 character limit.
+    Truncate product descriptions for image-generation prompt limits.
     Extracts the most visually relevant information (colors, hex codes, finishes).
 
     Args:
@@ -59,12 +59,12 @@ def _truncate_for_dalle(product_description: str, max_chars: int = 1500) -> str:
 
     # If still too long, just truncate with ellipsis
     if len(result) > max_chars:
-        result = result[:max_chars - 50] + '\n\n[Additional details truncated for DALL-E]'
+        result = result[:max_chars - 50] + '\n\n[Additional details truncated for image generation]'
 
     return result
 
 
-async def generate_dalle_image(
+async def generate_image(
     prompt: str,
     product_description: str = "",
     scene_description: str = "",
@@ -95,10 +95,10 @@ async def generate_dalle_image(
     logger.info(f"Using image generation model: {image_model}")
 
     # Use appropriate generator based on model
-    if image_model in ["gpt-image-1", "gpt-image-1.5"]:
-        return await _generate_gpt_image(prompt, product_description, scene_description, size, quality)
-    else:
+    if image_model.lower().startswith("dall-e"):
         return await _generate_dalle_image(prompt, product_description, scene_description, size, quality)
+    else:
+        return await _generate_gpt_image(prompt, product_description, scene_description, size, quality)
 
 
 async def _generate_dalle_image(
@@ -127,9 +127,23 @@ async def _generate_dalle_image(
     size = size or app_settings.azure_openai.image_size
     quality = quality or app_settings.azure_openai.image_quality
 
-    # DALL-E 3 has a 4000 character limit for prompts
+    # Map gpt-image values to DALL-E compatible values when needed
+    quality_mapping = {
+        "low": "standard",
+        "medium": "standard",
+        "high": "hd",
+        "auto": "standard",
+    }
+    quality = quality_mapping.get(quality, quality)
+
+    size_mapping = {
+        "1536x1024": "1792x1024",
+        "1024x1536": "1024x1792",
+    }
+    size = size_mapping.get(size, size)
+
     # Truncate product descriptions to essential visual info
-    truncated_product_desc = _truncate_for_dalle(product_description, max_chars=1500)
+    truncated_product_desc = _truncate_for_image(product_description, max_chars=1500)
 
     # Also truncate the main prompt if it's too long
     main_prompt = prompt[:1000] if len(prompt) > 1000 else prompt
@@ -163,11 +177,11 @@ async def _generate_dalle_image(
 ✓ Professional, polished marketing image
 """
 
-    # Final safety check - DALL-E 3 has 4000 char limit
+    # Final safety check before sending to image generation - if prompt is too long, truncate further and warn
     if len(full_prompt) > 3900:
         logger.warning(f"Prompt too long ({len(full_prompt)} chars), truncating...")
         # Reduce product context further
-        truncated_product_desc = _truncate_for_dalle(product_description, max_chars=800)
+        truncated_product_desc = _truncate_for_image(product_description, max_chars=800)
         full_prompt = f"""⚠️ ZERO TEXT IN IMAGE. NO WORDS. NO LETTERS. NO PRODUCT NAMES.
 
 Create a PURELY VISUAL marketing image with no text whatsoever.
@@ -194,19 +208,18 @@ async def _generate_dalle_image(
         # Get token for Azure OpenAI
         token = await credential.get_token("https://cognitiveservices.azure.com/.default")
 
-        # Use the dedicated DALL-E endpoint if configured, otherwise fall back to main endpoint
-        dalle_endpoint = app_settings.azure_openai.dalle_endpoint or app_settings.azure_openai.endpoint
-        logger.info(f"Using DALL-E endpoint: {dalle_endpoint}")
+        image_endpoint = app_settings.azure_openai.image_endpoint or app_settings.azure_openai.endpoint
+        logger.info(f"Using endpoint: {image_endpoint}")
 
         client = AsyncAzureOpenAI(
-            azure_endpoint=dalle_endpoint,
+            azure_endpoint=image_endpoint,
             azure_ad_token=token.token,
             api_version=app_settings.azure_openai.preview_api_version,
         )
 
         try:
             response = await client.images.generate(
-                model=app_settings.azure_openai.dalle_model,
+                model=app_settings.azure_openai.image_model,
                 prompt=full_prompt,
                 size=size,
                 quality=quality,
@@ -247,7 +260,7 @@ async def _generate_gpt_image(
     """
     Generate a marketing image using gpt-image-1 or gpt-image-1.5.
 
-    gpt-image models have different capabilities than DALL-E 3:
+    gpt-image models:
     - Supports larger prompt sizes
     - Different size options: 1024x1024, 1536x1024, 1024x1536, auto
     - Different quality options: low, medium, high, auto
@@ -265,27 +278,12 @@ async def _generate_gpt_image(
     """
     brand = app_settings.brand_guidelines
 
-    # Use defaults from settings if not provided
-    # Map DALL-E quality settings to gpt-image-1 or gpt-image-1.5 equivalents if needed
+    # Image settings
     size = size or app_settings.azure_openai.image_size
     quality = quality or app_settings.azure_openai.image_quality
 
-    # Map DALL-E quality values to gpt-image-1 or gpt-image-1.5 equivalents
-    quality_mapping = {
-        "standard": "medium",
-        "hd": "high",
-    }
-    quality = quality_mapping.get(quality, quality)
-
-    # Map DALL-E sizes to gpt-image-1 or gpt-image-1.5 equivalents if needed
-    size_mapping = {
-        "1024x1792": "1024x1536",  # Closest equivalent
-        "1792x1024": "1536x1024",  # Closest equivalent
-    }
-    size = size_mapping.get(size, size)
-
     # gpt-image-1 can handle larger prompts, so we can include more context
-    truncated_product_desc = _truncate_for_dalle(product_description, max_chars=3000)
+    truncated_product_desc = _truncate_for_image(product_description, max_chars=3000)
 
     main_prompt = prompt[:2000] if len(prompt) > 2000 else prompt
     scene_desc = scene_description[:1000] if scene_description and len(scene_description) > 1000 else scene_description
@@ -330,9 +328,8 @@ async def _generate_gpt_image(
         # Get token for Azure OpenAI
         token = await credential.get_token("https://cognitiveservices.azure.com/.default")
 
-        # Use gpt-image-1 specific endpoint if configured, otherwise DALL-E endpoint, otherwise main endpoint
+        # Use gpt-image-1 specific endpoint if configured, otherwise main endpoint
         image_endpoint = (app_settings.azure_openai.gpt_image_endpoint
-                          or app_settings.azure_openai.dalle_endpoint
                           or app_settings.azure_openai.endpoint)
         logger.info(f"Using gpt-image-1 endpoint: {image_endpoint}")
 
@@ -398,5 +395,5 @@ async def _generate_gpt_image(
         }
 
 
-# Alias for backwards compatibility
-generate_image = generate_dalle_image
+# Backward-compatible alias
+generate_dalle_image = generate_image
diff --git a/content-gen/src/backend/models.py b/content-gen/src/backend/models.py
@@ -108,7 +108,7 @@ class GeneratedImageContent(BaseModel):
     """Generated marketing image content with compliance status."""
     image_base64: str = Field(description="Base64-encoded image data")
     image_url: Optional[str] = Field(default=None, description="URL if saved to Blob Storage")
-    prompt_used: str = Field(description="DALL-E prompt that generated the image")
+    prompt_used: str = Field(description="Image generation prompt that generated the image")
     alt_text: str = Field(description="Accessibility alt text for the image")
     compliance: ComplianceResult = Field(default_factory=ComplianceResult)
 
diff --git a/content-gen/src/backend/orchestrator.py b/content-gen/src/backend/orchestrator.py
@@ -432,7 +432,7 @@ def _filter_system_prompt_from_response(response_text: str) -> str:
 """
 
 IMAGE_CONTENT_INSTRUCTIONS = f"""You are an Image Content Agent for MARKETING IMAGE GENERATION ONLY.
-Create detailed image prompts for DALL-E based on marketing requirements.
+Create detailed image prompts for GPT-Image based on marketing requirements.
 Your scope is strictly limited to marketing visuals: product images, ads, social media graphics, and promotional materials.
 Do not generate images for non-marketing purposes such as personal art, entertainment, or general creative projects.
 
@@ -445,7 +445,7 @@ def _filter_system_prompt_from_response(response_text: str) -> str:
 - Ensure the prompt aligns with campaign objectives
 
 Return JSON with:
-- "prompt": Detailed DALL-E prompt
+- "prompt": Detailed GPT-Image prompt
 - "style": Visual style description
 - "aspect_ratio": Recommended aspect ratio
 - "notes": Additional considerations
@@ -1249,11 +1249,6 @@ async def _generate_foundry_image(self, image_prompt: str, results: dict) -> Non
             # Adapt API version and payload to the deployed image model
             is_dalle3 = image_deployment.lower().startswith("dall-e")
 
-            if is_dalle3:
-                api_version = app_settings.azure_openai.preview_api_version or "2024-02-01"
-            else:
-                api_version = app_settings.azure_openai.image_api_version or "2025-04-01-preview"
-
             logger.info(f"Calling Foundry direct image API: {image_api_url}")
             logger.info(f"Prompt: {image_prompt[:200]}...")
 
@@ -1265,6 +1260,7 @@ async def _generate_foundry_image(self, image_prompt: str, results: dict) -> Non
             # Build model-appropriate payload
             if is_dalle3:
                 # dall-e-3: quality must be "standard" or "hd"; needs response_format; 4000-char prompt limit
+                api_version = app_settings.azure_openai.preview_api_version or "2024-02-01"
                 payload = {
                     "prompt": image_prompt[:4000],
                     "n": 1,
@@ -1274,11 +1270,12 @@ async def _generate_foundry_image(self, image_prompt: str, results: dict) -> Non
                 }
             else:
                 # gpt-image-1 / gpt-image-1.5: quality is low/medium/high/auto; no response_format
+                api_version = app_settings.azure_openai.image_api_version or "2025-04-01-preview"
                 payload = {
                     "prompt": image_prompt,
                     "n": 1,
-                    "size": "1024x1024",
-                    "quality": "medium",
+                    "size": app_settings.azure_openai.image_size or "1024x1024",
+                    "quality": app_settings.azure_openai.image_quality or "medium",
                 }
 
             async with httpx.AsyncClient(timeout=120.0) as client:
@@ -1505,13 +1502,13 @@ async def generate_content(
                     logger.info("Generating image via Foundry direct API...")
                     await self._generate_foundry_image(image_prompt, results)
                 else:
-                    # Direct mode: use image agent to create prompt, then generate via DALL-E
+                    # Direct mode: use image agent to create prompt, then generate via image generation model
                     image_response = await self._agents["image_content"].run(image_request)
                     results["image_prompt"] = str(image_response)
 
                     # Extract clean prompt from the response and generate actual image
                     try:
-                        from agents.image_content_agent import generate_dalle_image
+                        from agents.image_content_agent import generate_image
 
                         # Try to extract a clean prompt from the agent response
                         prompt_text = str(image_response)
@@ -1536,13 +1533,13 @@ async def generate_content(
                                             exc_info=True
                                         )
 
-                        # Build product description for DALL-E context
+                        # Build product description for image generation context
                         # Include detailed image descriptions if available for better color accuracy
                         product_description = detailed_image_context if detailed_image_context else product_context
 
-                        # Generate the actual image using DALL-E
-                        logger.info(f"Generating DALL-E image with prompt: {prompt_text[:200]}...")
-                        image_result = await generate_dalle_image(
+                        # Generate the actual image using image generation model
+                        logger.info(f"Generating image with prompt: {prompt_text[:200]}...")
+                        image_result = await generate_image(
                             prompt=prompt_text,
                             product_description=product_description,
                             scene_description=brief.visual_guidelines
@@ -1551,16 +1548,16 @@ async def generate_content(
                         if image_result.get("success"):
                             image_base64 = image_result.get("image_base64")
                             results["image_revised_prompt"] = image_result.get("revised_prompt")
-                            logger.info("DALL-E image generated successfully")
+                            logger.info("Image generated successfully")
 
                             # Save to blob storage
                             await self._save_image_to_blob(image_base64, results)
                         else:
-                            logger.warning(f"DALL-E image generation failed: {image_result.get('error')}")
+                            logger.warning(f"Image generation failed: {image_result.get('error')}")
                             results["image_error"] = image_result.get("error")
 
                     except Exception as img_error:
-                        logger.exception(f"Error generating DALL-E image: {img_error}")
+                        logger.exception(f"Error generating image: {img_error}")
                         results["image_error"] = str(img_error)
 
             # Run compliance check
@@ -1713,7 +1710,7 @@ async def regenerate_image(
 3. Maintains the campaign's tone and objectives
 
 Return JSON with:
-- "prompt": The new DALL-E prompt incorporating the modification
+- "prompt": The new image generation prompt incorporating the modification
 - "style": Visual style description
 - "change_summary": Brief summary of what was changed
 """
@@ -1781,12 +1778,12 @@ async def regenerate_image(
 
                 # Generate the actual image
                 try:
-                    from agents.image_content_agent import generate_dalle_image
+                    from agents.image_content_agent import generate_image
 
                     product_description = detailed_image_context if detailed_image_context else product_context
 
-                    logger.info(f"Generating modified DALL-E image: {prompt_text[:200]}...")
-                    image_result = await generate_dalle_image(
+                    logger.info(f"Generating modified image: {prompt_text[:200]}...")
+                    image_result = await generate_image(
                         prompt=prompt_text,
                         product_description=product_description,
                         scene_description=brief.visual_guidelines
@@ -1795,14 +1792,14 @@ async def regenerate_image(
                     if image_result.get("success"):
                         image_base64 = image_result.get("image_base64")
                         results["image_revised_prompt"] = image_result.get("revised_prompt")
-                        logger.info("Modified DALL-E image generated successfully")
+                        logger.info("Modified image generated successfully")
                         await self._save_image_to_blob(image_base64, results)
                     else:
-                        logger.warning(f"Modified DALL-E image generation failed: {image_result.get('error')}")
+                        logger.warning(f"Modified image generation failed: {image_result.get('error')}")
                         results["image_error"] = image_result.get("error")
 
                 except Exception as img_error:
-                    logger.exception(f"Error generating modified DALL-E image: {img_error}")
+                    logger.exception(f"Error generating modified image: {img_error}")
                     results["image_error"] = str(img_error)
 
             logger.info(f"Image regeneration complete. Has image: {bool(results.get('image_base64') or results.get('image_blob_url'))}")
diff --git a/content-gen/src/backend/services/blob_service.py b/content-gen/src/backend/services/blob_service.py
@@ -143,7 +143,7 @@ async def save_generated_image(
         content_type: str = "image/png"
     ) -> str:
         """
-        Save a DALL-E generated image to blob storage.
+        Save the generated image to blob storage.
 
         Args:
             conversation_id: ID of the conversation that generated the image
@@ -205,8 +205,7 @@ async def generate_image_description(self, image_data: bytes) -> str:
         Generate a detailed text description of an image using GPT-5 Vision.
 
         This is used to create descriptions of product images that can be
-        used as context for DALL-E 3 image generation (since DALL-E 3
-        cannot accept image inputs directly).
+        used as context for image generation.
 
         Args:
             image_data: Raw image bytes
diff --git a/content-gen/src/backend/settings.py b/content-gen/src/backend/settings.py

Original file line number	Diff line number	Diff line change
`@@ -4,9 +4,8 @@`
`4`	`4`	`This package provides utility functions used by the orchestrator.`
`5`	`5`	`"""`
`6`	`6`
`7`		`-from agents.image_content_agent import generate_dalle_image, generate_image`
	`7`	`+from agents.image_content_agent import generate_image`
`8`	`8`
`9`	`9`	`__all__ = [`
`10`		`- "generate_dalle_image",`
`11`	`10`	`"generate_image",`
`12`	`11`	`]`