Skip to content

Commit 487b068

Browse files
updated backend code for removal of dall-e
1 parent 69869bc commit 487b068

File tree

6 files changed

+71
-87
lines changed

6 files changed

+71
-87
lines changed

content-gen/src/backend/agents/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
This package provides utility functions used by the orchestrator.
55
"""
66

7-
from agents.image_content_agent import generate_dalle_image, generate_image
7+
from agents.image_content_agent import generate_image
88

99
__all__ = [
10-
"generate_dalle_image",
1110
"generate_image",
1211
]

content-gen/src/backend/agents/image_content_agent.py

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
"""Image Content Agent - Generates marketing images via DALL-E 3, gpt-image-1, or gpt-image-1.5.
1+
"""Image Content Agent - Generates marketing images.
22
33
Provides the generate_image function used by the orchestrator
4-
to create marketing images using either DALL-E 3, gpt-image-1, or gpt-image-1.5.
4+
to create marketing images using the image generation model.
55
"""
66

77
import logging
@@ -14,9 +14,9 @@
1414
logger = logging.getLogger(__name__)
1515

1616

17-
def _truncate_for_dalle(product_description: str, max_chars: int = 1500) -> str:
17+
def _truncate_for_image(product_description: str, max_chars: int = 1500) -> str:
1818
"""
19-
Truncate product descriptions to fit DALL-E's 4000 character limit.
19+
Truncate product descriptions for image-generation prompt limits.
2020
Extracts the most visually relevant information (colors, hex codes, finishes).
2121
2222
Args:
@@ -59,12 +59,12 @@ def _truncate_for_dalle(product_description: str, max_chars: int = 1500) -> str:
5959

6060
# If still too long, just truncate with ellipsis
6161
if len(result) > max_chars:
62-
result = result[:max_chars - 50] + '\n\n[Additional details truncated for DALL-E]'
62+
result = result[:max_chars - 50] + '\n\n[Additional details truncated for image generation]'
6363

6464
return result
6565

6666

67-
async def generate_dalle_image(
67+
async def generate_image(
6868
prompt: str,
6969
product_description: str = "",
7070
scene_description: str = "",
@@ -95,10 +95,10 @@ async def generate_dalle_image(
9595
logger.info(f"Using image generation model: {image_model}")
9696

9797
# Use appropriate generator based on model
98-
if image_model in ["gpt-image-1", "gpt-image-1.5"]:
99-
return await _generate_gpt_image(prompt, product_description, scene_description, size, quality)
100-
else:
98+
if image_model.lower().startswith("dall-e"):
10199
return await _generate_dalle_image(prompt, product_description, scene_description, size, quality)
100+
else:
101+
return await _generate_gpt_image(prompt, product_description, scene_description, size, quality)
102102

103103

104104
async def _generate_dalle_image(
@@ -127,9 +127,23 @@ async def _generate_dalle_image(
127127
size = size or app_settings.azure_openai.image_size
128128
quality = quality or app_settings.azure_openai.image_quality
129129

130-
# DALL-E 3 has a 4000 character limit for prompts
130+
# Map gpt-image values to DALL-E compatible values when needed
131+
quality_mapping = {
132+
"low": "standard",
133+
"medium": "standard",
134+
"high": "hd",
135+
"auto": "standard",
136+
}
137+
quality = quality_mapping.get(quality, quality)
138+
139+
size_mapping = {
140+
"1536x1024": "1792x1024",
141+
"1024x1536": "1024x1792",
142+
}
143+
size = size_mapping.get(size, size)
144+
131145
# Truncate product descriptions to essential visual info
132-
truncated_product_desc = _truncate_for_dalle(product_description, max_chars=1500)
146+
truncated_product_desc = _truncate_for_image(product_description, max_chars=1500)
133147

134148
# Also truncate the main prompt if it's too long
135149
main_prompt = prompt[:1000] if len(prompt) > 1000 else prompt
@@ -163,11 +177,11 @@ async def _generate_dalle_image(
163177
✓ Professional, polished marketing image
164178
"""
165179

166-
# Final safety check - DALL-E 3 has 4000 char limit
180+
# Final safety check before sending to image generation - if prompt is too long, truncate further and warn
167181
if len(full_prompt) > 3900:
168182
logger.warning(f"Prompt too long ({len(full_prompt)} chars), truncating...")
169183
# Reduce product context further
170-
truncated_product_desc = _truncate_for_dalle(product_description, max_chars=800)
184+
truncated_product_desc = _truncate_for_image(product_description, max_chars=800)
171185
full_prompt = f"""⚠️ ZERO TEXT IN IMAGE. NO WORDS. NO LETTERS. NO PRODUCT NAMES.
172186
173187
Create a PURELY VISUAL marketing image with no text whatsoever.
@@ -194,19 +208,18 @@ async def _generate_dalle_image(
194208
# Get token for Azure OpenAI
195209
token = await credential.get_token("https://cognitiveservices.azure.com/.default")
196210

197-
# Use the dedicated DALL-E endpoint if configured, otherwise fall back to main endpoint
198-
dalle_endpoint = app_settings.azure_openai.dalle_endpoint or app_settings.azure_openai.endpoint
199-
logger.info(f"Using DALL-E endpoint: {dalle_endpoint}")
211+
image_endpoint = app_settings.azure_openai.image_endpoint or app_settings.azure_openai.endpoint
212+
logger.info(f"Using endpoint: {image_endpoint}")
200213

201214
client = AsyncAzureOpenAI(
202-
azure_endpoint=dalle_endpoint,
215+
azure_endpoint=image_endpoint,
203216
azure_ad_token=token.token,
204217
api_version=app_settings.azure_openai.preview_api_version,
205218
)
206219

207220
try:
208221
response = await client.images.generate(
209-
model=app_settings.azure_openai.dalle_model,
222+
model=app_settings.azure_openai.image_model,
210223
prompt=full_prompt,
211224
size=size,
212225
quality=quality,
@@ -247,7 +260,7 @@ async def _generate_gpt_image(
247260
"""
248261
Generate a marketing image using gpt-image-1 or gpt-image-1.5.
249262
250-
gpt-image models have different capabilities than DALL-E 3:
263+
gpt-image models:
251264
- Supports larger prompt sizes
252265
- Different size options: 1024x1024, 1536x1024, 1024x1536, auto
253266
- Different quality options: low, medium, high, auto
@@ -265,27 +278,12 @@ async def _generate_gpt_image(
265278
"""
266279
brand = app_settings.brand_guidelines
267280

268-
# Use defaults from settings if not provided
269-
# Map DALL-E quality settings to gpt-image-1 or gpt-image-1.5 equivalents if needed
281+
# Image settings
270282
size = size or app_settings.azure_openai.image_size
271283
quality = quality or app_settings.azure_openai.image_quality
272284

273-
# Map DALL-E quality values to gpt-image-1 or gpt-image-1.5 equivalents
274-
quality_mapping = {
275-
"standard": "medium",
276-
"hd": "high",
277-
}
278-
quality = quality_mapping.get(quality, quality)
279-
280-
# Map DALL-E sizes to gpt-image-1 or gpt-image-1.5 equivalents if needed
281-
size_mapping = {
282-
"1024x1792": "1024x1536", # Closest equivalent
283-
"1792x1024": "1536x1024", # Closest equivalent
284-
}
285-
size = size_mapping.get(size, size)
286-
287285
# gpt-image-1 can handle larger prompts, so we can include more context
288-
truncated_product_desc = _truncate_for_dalle(product_description, max_chars=3000)
286+
truncated_product_desc = _truncate_for_image(product_description, max_chars=3000)
289287

290288
main_prompt = prompt[:2000] if len(prompt) > 2000 else prompt
291289
scene_desc = scene_description[:1000] if scene_description and len(scene_description) > 1000 else scene_description
@@ -330,9 +328,8 @@ async def _generate_gpt_image(
330328
# Get token for Azure OpenAI
331329
token = await credential.get_token("https://cognitiveservices.azure.com/.default")
332330

333-
# Use gpt-image-1 specific endpoint if configured, otherwise DALL-E endpoint, otherwise main endpoint
331+
# Use gpt-image-1 specific endpoint if configured, otherwise main endpoint
334332
image_endpoint = (app_settings.azure_openai.gpt_image_endpoint
335-
or app_settings.azure_openai.dalle_endpoint
336333
or app_settings.azure_openai.endpoint)
337334
logger.info(f"Using gpt-image-1 endpoint: {image_endpoint}")
338335

@@ -398,5 +395,5 @@ async def _generate_gpt_image(
398395
}
399396

400397

401-
# Alias for backwards compatibility
402-
generate_image = generate_dalle_image
398+
# Backward-compatible alias
399+
generate_dalle_image = generate_image

content-gen/src/backend/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ class GeneratedImageContent(BaseModel):
108108
"""Generated marketing image content with compliance status."""
109109
image_base64: str = Field(description="Base64-encoded image data")
110110
image_url: Optional[str] = Field(default=None, description="URL if saved to Blob Storage")
111-
prompt_used: str = Field(description="DALL-E prompt that generated the image")
111+
prompt_used: str = Field(description="Image generation prompt that generated the image")
112112
alt_text: str = Field(description="Accessibility alt text for the image")
113113
compliance: ComplianceResult = Field(default_factory=ComplianceResult)
114114

content-gen/src/backend/orchestrator.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def _filter_system_prompt_from_response(response_text: str) -> str:
432432
"""
433433

434434
IMAGE_CONTENT_INSTRUCTIONS = f"""You are an Image Content Agent for MARKETING IMAGE GENERATION ONLY.
435-
Create detailed image prompts for DALL-E based on marketing requirements.
435+
Create detailed image prompts for GPT-Image based on marketing requirements.
436436
Your scope is strictly limited to marketing visuals: product images, ads, social media graphics, and promotional materials.
437437
Do not generate images for non-marketing purposes such as personal art, entertainment, or general creative projects.
438438
@@ -445,7 +445,7 @@ def _filter_system_prompt_from_response(response_text: str) -> str:
445445
- Ensure the prompt aligns with campaign objectives
446446
447447
Return JSON with:
448-
- "prompt": Detailed DALL-E prompt
448+
- "prompt": Detailed GPT-Image prompt
449449
- "style": Visual style description
450450
- "aspect_ratio": Recommended aspect ratio
451451
- "notes": Additional considerations
@@ -1249,11 +1249,6 @@ async def _generate_foundry_image(self, image_prompt: str, results: dict) -> Non
12491249
# Adapt API version and payload to the deployed image model
12501250
is_dalle3 = image_deployment.lower().startswith("dall-e")
12511251

1252-
if is_dalle3:
1253-
api_version = app_settings.azure_openai.preview_api_version or "2024-02-01"
1254-
else:
1255-
api_version = app_settings.azure_openai.image_api_version or "2025-04-01-preview"
1256-
12571252
logger.info(f"Calling Foundry direct image API: {image_api_url}")
12581253
logger.info(f"Prompt: {image_prompt[:200]}...")
12591254

@@ -1265,6 +1260,7 @@ async def _generate_foundry_image(self, image_prompt: str, results: dict) -> Non
12651260
# Build model-appropriate payload
12661261
if is_dalle3:
12671262
# dall-e-3: quality must be "standard" or "hd"; needs response_format; 4000-char prompt limit
1263+
api_version = app_settings.azure_openai.preview_api_version or "2024-02-01"
12681264
payload = {
12691265
"prompt": image_prompt[:4000],
12701266
"n": 1,
@@ -1274,11 +1270,12 @@ async def _generate_foundry_image(self, image_prompt: str, results: dict) -> Non
12741270
}
12751271
else:
12761272
# gpt-image-1 / gpt-image-1.5: quality is low/medium/high/auto; no response_format
1273+
api_version = app_settings.azure_openai.image_api_version or "2025-04-01-preview"
12771274
payload = {
12781275
"prompt": image_prompt,
12791276
"n": 1,
1280-
"size": "1024x1024",
1281-
"quality": "medium",
1277+
"size": app_settings.azure_openai.image_size or "1024x1024",
1278+
"quality": app_settings.azure_openai.image_quality or "medium",
12821279
}
12831280

12841281
async with httpx.AsyncClient(timeout=120.0) as client:
@@ -1505,13 +1502,13 @@ async def generate_content(
15051502
logger.info("Generating image via Foundry direct API...")
15061503
await self._generate_foundry_image(image_prompt, results)
15071504
else:
1508-
# Direct mode: use image agent to create prompt, then generate via DALL-E
1505+
# Direct mode: use image agent to create prompt, then generate via image generation model
15091506
image_response = await self._agents["image_content"].run(image_request)
15101507
results["image_prompt"] = str(image_response)
15111508

15121509
# Extract clean prompt from the response and generate actual image
15131510
try:
1514-
from agents.image_content_agent import generate_dalle_image
1511+
from agents.image_content_agent import generate_image
15151512

15161513
# Try to extract a clean prompt from the agent response
15171514
prompt_text = str(image_response)
@@ -1536,13 +1533,13 @@ async def generate_content(
15361533
exc_info=True
15371534
)
15381535

1539-
# Build product description for DALL-E context
1536+
# Build product description for image generation context
15401537
# Include detailed image descriptions if available for better color accuracy
15411538
product_description = detailed_image_context if detailed_image_context else product_context
15421539

1543-
# Generate the actual image using DALL-E
1544-
logger.info(f"Generating DALL-E image with prompt: {prompt_text[:200]}...")
1545-
image_result = await generate_dalle_image(
1540+
# Generate the actual image using image generation model
1541+
logger.info(f"Generating image with prompt: {prompt_text[:200]}...")
1542+
image_result = await generate_image(
15461543
prompt=prompt_text,
15471544
product_description=product_description,
15481545
scene_description=brief.visual_guidelines
@@ -1551,16 +1548,16 @@ async def generate_content(
15511548
if image_result.get("success"):
15521549
image_base64 = image_result.get("image_base64")
15531550
results["image_revised_prompt"] = image_result.get("revised_prompt")
1554-
logger.info("DALL-E image generated successfully")
1551+
logger.info("Image generated successfully")
15551552

15561553
# Save to blob storage
15571554
await self._save_image_to_blob(image_base64, results)
15581555
else:
1559-
logger.warning(f"DALL-E image generation failed: {image_result.get('error')}")
1556+
logger.warning(f"Image generation failed: {image_result.get('error')}")
15601557
results["image_error"] = image_result.get("error")
15611558

15621559
except Exception as img_error:
1563-
logger.exception(f"Error generating DALL-E image: {img_error}")
1560+
logger.exception(f"Error generating image: {img_error}")
15641561
results["image_error"] = str(img_error)
15651562

15661563
# Run compliance check
@@ -1713,7 +1710,7 @@ async def regenerate_image(
17131710
3. Maintains the campaign's tone and objectives
17141711
17151712
Return JSON with:
1716-
- "prompt": The new DALL-E prompt incorporating the modification
1713+
- "prompt": The new image generation prompt incorporating the modification
17171714
- "style": Visual style description
17181715
- "change_summary": Brief summary of what was changed
17191716
"""
@@ -1781,12 +1778,12 @@ async def regenerate_image(
17811778

17821779
# Generate the actual image
17831780
try:
1784-
from agents.image_content_agent import generate_dalle_image
1781+
from agents.image_content_agent import generate_image
17851782

17861783
product_description = detailed_image_context if detailed_image_context else product_context
17871784

1788-
logger.info(f"Generating modified DALL-E image: {prompt_text[:200]}...")
1789-
image_result = await generate_dalle_image(
1785+
logger.info(f"Generating modified image: {prompt_text[:200]}...")
1786+
image_result = await generate_image(
17901787
prompt=prompt_text,
17911788
product_description=product_description,
17921789
scene_description=brief.visual_guidelines
@@ -1795,14 +1792,14 @@ async def regenerate_image(
17951792
if image_result.get("success"):
17961793
image_base64 = image_result.get("image_base64")
17971794
results["image_revised_prompt"] = image_result.get("revised_prompt")
1798-
logger.info("Modified DALL-E image generated successfully")
1795+
logger.info("Modified image generated successfully")
17991796
await self._save_image_to_blob(image_base64, results)
18001797
else:
1801-
logger.warning(f"Modified DALL-E image generation failed: {image_result.get('error')}")
1798+
logger.warning(f"Modified image generation failed: {image_result.get('error')}")
18021799
results["image_error"] = image_result.get("error")
18031800

18041801
except Exception as img_error:
1805-
logger.exception(f"Error generating modified DALL-E image: {img_error}")
1802+
logger.exception(f"Error generating modified image: {img_error}")
18061803
results["image_error"] = str(img_error)
18071804

18081805
logger.info(f"Image regeneration complete. Has image: {bool(results.get('image_base64') or results.get('image_blob_url'))}")

content-gen/src/backend/services/blob_service.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ async def save_generated_image(
143143
content_type: str = "image/png"
144144
) -> str:
145145
"""
146-
Save a DALL-E generated image to blob storage.
146+
Save the generated image to blob storage.
147147
148148
Args:
149149
conversation_id: ID of the conversation that generated the image
@@ -205,8 +205,7 @@ async def generate_image_description(self, image_data: bytes) -> str:
205205
Generate a detailed text description of an image using GPT-5 Vision.
206206
207207
This is used to create descriptions of product images that can be
208-
used as context for DALL-E 3 image generation (since DALL-E 3
209-
cannot accept image inputs directly).
208+
used as context for image generation.
210209
211210
Args:
212211
image_data: Raw image bytes

0 commit comments

Comments
 (0)