2025-04-28 10:59:22 -07:00
import base64
2025-04-23 12:38:34 -07:00
import io
2025-04-28 10:59:22 -07:00
import math
2025-04-23 12:38:34 -07:00
from inspect import cleandoc
2025-04-28 10:59:22 -07:00
import numpy as np
import requests
import torch
from PIL import Image
2025-04-23 12:38:34 -07:00
from comfy . comfy_types . node_typing import IO , ComfyNodeABC , InputTypeDict
2025-04-28 10:59:22 -07:00
from comfy . utils import common_upscale
2025-04-23 12:38:34 -07:00
from comfy_api_nodes . apis import (
OpenAIImageEditRequest ,
2025-04-28 10:59:22 -07:00
OpenAIImageGenerationRequest ,
OpenAIImageGenerationResponse ,
2025-04-23 12:38:34 -07:00
)
from comfy_api_nodes . apis . client import ApiEndpoint , HttpMethod , SynchronousOperation
def downscale_input ( image ) :
samples = image . movedim ( - 1 , 1 )
#downscaling input images to roughly the same size as the outputs
total = int ( 1536 * 1024 )
scale_by = math . sqrt ( total / ( samples . shape [ 3 ] * samples . shape [ 2 ] ) )
if scale_by > = 1 :
return image
width = round ( samples . shape [ 3 ] * scale_by )
height = round ( samples . shape [ 2 ] * scale_by )
s = common_upscale ( samples , width , height , " lanczos " , " disabled " )
s = s . movedim ( 1 , - 1 )
return s
2025-04-24 00:29:05 -07:00
def validate_and_cast_response ( response ) :
2025-04-23 12:38:34 -07:00
# validate raw JSON response
data = response . data
if not data or len ( data ) == 0 :
raise Exception ( " No images returned from API endpoint " )
2025-04-24 00:29:05 -07:00
# Initialize list to store image tensors
image_tensors = [ ]
2025-04-23 12:38:34 -07:00
2025-04-24 00:29:05 -07:00
# Process each image in the data array
for image_data in data :
image_url = image_data . url
b64_data = image_data . b64_json
2025-04-23 12:38:34 -07:00
2025-04-24 00:29:05 -07:00
if not image_url and not b64_data :
raise Exception ( " No image was generated in the response " )
2025-04-23 12:38:34 -07:00
2025-04-24 00:29:05 -07:00
if b64_data :
img_data = base64 . b64decode ( b64_data )
img = Image . open ( io . BytesIO ( img_data ) )
2025-04-23 12:38:34 -07:00
2025-04-24 00:29:05 -07:00
elif image_url :
img_response = requests . get ( image_url )
if img_response . status_code != 200 :
raise Exception ( " Failed to download the image " )
img = Image . open ( io . BytesIO ( img_response . content ) )
2025-04-23 12:38:34 -07:00
2025-04-24 00:29:05 -07:00
img = img . convert ( " RGBA " )
# Convert to numpy array, normalize to float32 between 0 and 1
img_array = np . array ( img ) . astype ( np . float32 ) / 255.0
img_tensor = torch . from_numpy ( img_array )
# Add to list of tensors
image_tensors . append ( img_tensor )
return torch . stack ( image_tensors , dim = 0 )
2025-04-23 12:38:34 -07:00
class OpenAIDalle2 ( ComfyNodeABC ) :
"""
Generates images synchronously via OpenAI ' s DALL·E 2 endpoint.
Uses the proxy at / proxy / openai / images / generations . Returned URLs are short ‑ lived ,
so download or cache results if you need to keep them .
"""
def __init__ ( self ) :
pass
@classmethod
def INPUT_TYPES ( cls ) - > InputTypeDict :
return {
" required " : {
" prompt " : ( IO . STRING , {
" multiline " : True ,
" default " : " " ,
" tooltip " : " Text prompt for DALL·E " ,
} ) ,
} ,
" optional " : {
" seed " : ( IO . INT , {
" default " : 0 ,
" min " : 0 ,
" max " : 2 * * 31 - 1 ,
" step " : 1 ,
" display " : " number " ,
" tooltip " : " not implemented yet in backend " ,
} ) ,
" size " : ( IO . COMBO , {
" options " : [ " 256x256 " , " 512x512 " , " 1024x1024 " ] ,
" default " : " 1024x1024 " ,
" tooltip " : " Image size " ,
} ) ,
" n " : ( IO . INT , {
" default " : 1 ,
" min " : 1 ,
" max " : 8 ,
" step " : 1 ,
" display " : " number " ,
" tooltip " : " How many images to generate " ,
} ) ,
" image " : ( IO . IMAGE , {
" default " : None ,
" tooltip " : " Optional reference image for image editing. " ,
} ) ,
" mask " : ( IO . MASK , {
" default " : None ,
" tooltip " : " Optional mask for inpainting (white areas will be replaced) " ,
} ) ,
} ,
" hidden " : {
" auth_token " : " AUTH_TOKEN_COMFY_ORG "
}
}
RETURN_TYPES = ( IO . IMAGE , )
FUNCTION = " api_call "
CATEGORY = " api node "
DESCRIPTION = cleandoc ( __doc__ or " " )
API_NODE = True
def api_call ( self , prompt , seed = 0 , image = None , mask = None , n = 1 , size = " 1024x1024 " , auth_token = None ) :
model = " dall-e-2 "
path = " /proxy/openai/images/generations "
request_class = OpenAIImageGenerationRequest
img_binary = None
if image is not None and mask is not None :
path = " /proxy/openai/images/edits "
request_class = OpenAIImageEditRequest
input_tensor = image . squeeze ( ) . cpu ( )
height , width , channels = input_tensor . shape
rgba_tensor = torch . ones ( height , width , 4 , device = " cpu " )
rgba_tensor [ : , : , : channels ] = input_tensor
if mask . shape [ 1 : ] != image . shape [ 1 : - 1 ] :
raise Exception ( " Mask and Image must be the same size " )
rgba_tensor [ : , : , 3 ] = ( 1 - mask . squeeze ( ) . cpu ( ) )
rgba_tensor = downscale_input ( rgba_tensor . unsqueeze ( 0 ) ) . squeeze ( )
image_np = ( rgba_tensor . numpy ( ) * 255 ) . astype ( np . uint8 )
img = Image . fromarray ( image_np )
img_byte_arr = io . BytesIO ( )
img . save ( img_byte_arr , format = ' PNG ' )
img_byte_arr . seek ( 0 )
img_binary = img_byte_arr #.getvalue()
img_binary . name = " image.png "
elif image is not None or mask is not None :
raise Exception ( " Dall-E 2 image editing requires an image AND a mask " )
# Build the operation
operation = SynchronousOperation (
endpoint = ApiEndpoint (
path = path ,
method = HttpMethod . POST ,
request_model = request_class ,
response_model = OpenAIImageGenerationResponse
) ,
request = request_class (
model = model ,
prompt = prompt ,
n = n ,
size = size ,
seed = seed ,
) ,
files = {
" image " : img_binary ,
} if img_binary else None ,
auth_token = auth_token
)
response = operation . execute ( )
img_tensor = validate_and_cast_response ( response )
return ( img_tensor , )
class OpenAIDalle3 ( ComfyNodeABC ) :
"""
Generates images synchronously via OpenAI ' s DALL·E 3 endpoint.
Uses the proxy at / proxy / openai / images / generations . Returned URLs are short ‑ lived ,
so download or cache results if you need to keep them .
"""
def __init__ ( self ) :
pass
@classmethod
def INPUT_TYPES ( cls ) - > InputTypeDict :
return {
" required " : {
" prompt " : ( IO . STRING , {
" multiline " : True ,
" default " : " " ,
" tooltip " : " Text prompt for DALL·E " ,
} ) ,
} ,
" optional " : {
" seed " : ( IO . INT , {
" default " : 0 ,
" min " : 0 ,
" max " : 2 * * 31 - 1 ,
" step " : 1 ,
" display " : " number " ,
" tooltip " : " not implemented yet in backend " ,
} ) ,
" quality " : ( IO . COMBO , {
" options " : [ " standard " , " hd " ] ,
" default " : " standard " ,
" tooltip " : " Image quality " ,
} ) ,
" style " : ( IO . COMBO , {
" options " : [ " natural " , " vivid " ] ,
" default " : " natural " ,
" tooltip " : " Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. " ,
} ) ,
" size " : ( IO . COMBO , {
" options " : [ " 1024x1024 " , " 1024x1792 " , " 1792x1024 " ] ,
" default " : " 1024x1024 " ,
" tooltip " : " Image size " ,
} ) ,
} ,
" hidden " : {
" auth_token " : " AUTH_TOKEN_COMFY_ORG "
}
}
RETURN_TYPES = ( IO . IMAGE , )
FUNCTION = " api_call "
CATEGORY = " api node "
DESCRIPTION = cleandoc ( __doc__ or " " )
API_NODE = True
def api_call ( self , prompt , seed = 0 , style = " natural " , quality = " standard " , size = " 1024x1024 " , auth_token = None ) :
model = " dall-e-3 "
# build the operation
operation = SynchronousOperation (
endpoint = ApiEndpoint (
path = " /proxy/openai/images/generations " ,
method = HttpMethod . POST ,
request_model = OpenAIImageGenerationRequest ,
response_model = OpenAIImageGenerationResponse
) ,
request = OpenAIImageGenerationRequest (
model = model ,
prompt = prompt ,
quality = quality ,
size = size ,
style = style ,
seed = seed ,
) ,
auth_token = auth_token
)
response = operation . execute ( )
img_tensor = validate_and_cast_response ( response )
return ( img_tensor , )
class OpenAIGPTImage1 ( ComfyNodeABC ) :
"""
Generates images synchronously via OpenAI ' s GPT Image 1 endpoint.
Uses the proxy at / proxy / openai / images / generations . Returned URLs are short ‑ lived ,
so download or cache results if you need to keep them .
"""
def __init__ ( self ) :
pass
@classmethod
def INPUT_TYPES ( cls ) - > InputTypeDict :
return {
" required " : {
" prompt " : ( IO . STRING , {
" multiline " : True ,
" default " : " " ,
" tooltip " : " Text prompt for GPT Image 1 " ,
} ) ,
} ,
" optional " : {
" seed " : ( IO . INT , {
" default " : 0 ,
" min " : 0 ,
" max " : 2 * * 31 - 1 ,
" step " : 1 ,
" display " : " number " ,
" tooltip " : " not implemented yet in backend " ,
} ) ,
" quality " : ( IO . COMBO , {
" options " : [ " low " , " medium " , " high " ] ,
" default " : " low " ,
" tooltip " : " Image quality, affects cost and generation time. " ,
} ) ,
" background " : ( IO . COMBO , {
" options " : [ " opaque " , " transparent " ] ,
" default " : " opaque " ,
" tooltip " : " Return image with or without background " ,
} ) ,
" size " : ( IO . COMBO , {
" options " : [ " auto " , " 1024x1024 " , " 1024x1536 " , " 1536x1024 " ] ,
" default " : " auto " ,
" tooltip " : " Image size " ,
} ) ,
" n " : ( IO . INT , {
" default " : 1 ,
" min " : 1 ,
" max " : 8 ,
" step " : 1 ,
" display " : " number " ,
" tooltip " : " How many images to generate " ,
} ) ,
" image " : ( IO . IMAGE , {
" default " : None ,
" tooltip " : " Optional reference image for image editing. " ,
} ) ,
" mask " : ( IO . MASK , {
" default " : None ,
" tooltip " : " Optional mask for inpainting (white areas will be replaced) " ,
} ) ,
2025-04-28 10:59:22 -07:00
" moderation " : ( IO . COMBO , {
" options " : [ " low " , " auto " ] ,
" default " : " low " ,
" tooltip " : " Moderation level " ,
} ) ,
2025-04-23 12:38:34 -07:00
} ,
" hidden " : {
" auth_token " : " AUTH_TOKEN_COMFY_ORG "
}
}
RETURN_TYPES = ( IO . IMAGE , )
FUNCTION = " api_call "
CATEGORY = " api node "
DESCRIPTION = cleandoc ( __doc__ or " " )
API_NODE = True
2025-04-28 10:59:22 -07:00
def api_call ( self , prompt , seed = 0 , quality = " low " , background = " opaque " , image = None , mask = None , n = 1 , size = " 1024x1024 " , auth_token = None , moderation = " low " ) :
2025-04-23 12:38:34 -07:00
model = " gpt-image-1 "
path = " /proxy/openai/images/generations "
request_class = OpenAIImageGenerationRequest
2025-04-23 13:10:10 -07:00
img_binaries = [ ]
2025-04-23 12:38:34 -07:00
mask_binary = None
2025-04-23 13:10:10 -07:00
files = [ ]
2025-04-23 12:38:34 -07:00
if image is not None :
path = " /proxy/openai/images/edits "
request_class = OpenAIImageEditRequest
2025-04-23 13:10:10 -07:00
batch_size = image . shape [ 0 ]
2025-04-23 12:38:34 -07:00
2025-04-23 13:10:10 -07:00
for i in range ( batch_size ) :
single_image = image [ i : i + 1 ]
scaled_image = downscale_input ( single_image ) . squeeze ( )
image_np = ( scaled_image . numpy ( ) * 255 ) . astype ( np . uint8 )
img = Image . fromarray ( image_np )
img_byte_arr = io . BytesIO ( )
img . save ( img_byte_arr , format = ' PNG ' )
img_byte_arr . seek ( 0 )
img_binary = img_byte_arr
img_binary . name = f " image_ { i } .png "
img_binaries . append ( img_binary )
if batch_size == 1 :
files . append ( ( " image " , img_binary ) )
else :
files . append ( ( " image[] " , img_binary ) )
2025-04-23 12:38:34 -07:00
if mask is not None :
2025-04-23 13:10:10 -07:00
if image . shape [ 0 ] != 1 :
raise Exception ( " Cannot use a mask with multiple image " )
2025-04-23 12:38:34 -07:00
if image is None :
raise Exception ( " Cannot use a mask without an input image " )
if mask . shape [ 1 : ] != image . shape [ 1 : - 1 ] :
raise Exception ( " Mask and Image must be the same size " )
batch , height , width = mask . shape
rgba_mask = torch . zeros ( height , width , 4 , device = " cpu " )
rgba_mask [ : , : , 3 ] = ( 1 - mask . squeeze ( ) . cpu ( ) )
scaled_mask = downscale_input ( rgba_mask . unsqueeze ( 0 ) ) . squeeze ( )
mask_np = ( scaled_mask . numpy ( ) * 255 ) . astype ( np . uint8 )
mask_img = Image . fromarray ( mask_np )
mask_img_byte_arr = io . BytesIO ( )
mask_img . save ( mask_img_byte_arr , format = ' PNG ' )
mask_img_byte_arr . seek ( 0 )
2025-04-23 13:10:10 -07:00
mask_binary = mask_img_byte_arr
2025-04-23 12:38:34 -07:00
mask_binary . name = " mask.png "
2025-04-23 13:10:10 -07:00
files . append ( ( " mask " , mask_binary ) )
2025-04-23 12:38:34 -07:00
# Build the operation
operation = SynchronousOperation (
endpoint = ApiEndpoint (
path = path ,
method = HttpMethod . POST ,
request_model = request_class ,
response_model = OpenAIImageGenerationResponse
) ,
request = request_class (
model = model ,
prompt = prompt ,
quality = quality ,
background = background ,
n = n ,
seed = seed ,
size = size ,
2025-04-28 10:59:22 -07:00
moderation = moderation ,
2025-04-23 12:38:34 -07:00
) ,
files = files if files else None ,
auth_token = auth_token
)
response = operation . execute ( )
img_tensor = validate_and_cast_response ( response )
return ( img_tensor , )
# A dictionary that contains all nodes you want to export with their names
# NOTE: names should be globally unique
NODE_CLASS_MAPPINGS = {
" OpenAIDalle2 " : OpenAIDalle2 ,
" OpenAIDalle3 " : OpenAIDalle3 ,
" OpenAIGPTImage1 " : OpenAIGPTImage1 ,
}
# A dictionary that contains the friendly/humanly readable titles for the nodes
NODE_DISPLAY_NAME_MAPPINGS = {
" OpenAIDalle2 " : " OpenAI DALL·E 2 " ,
" OpenAIDalle3 " : " OpenAI DALL·E 3 " ,
" OpenAIGPTImage1 " : " OpenAI GPT Image 1 " ,
}