3232
3333def create_product_index_schema (index_name : str ) -> SearchIndex :
3434 """Create the schema for the product index.
35-
35+
3636 Args:
3737 index_name: Name of the index to create
38-
38+
3939 Returns:
4040 SearchIndex object with the schema
4141 """
@@ -127,7 +127,7 @@ def create_product_index_schema(index_name: str) -> SearchIndex:
127127
128128def create_index (index_client : SearchIndexClient , index_name : str ) -> None :
129129 """Create the search index if it doesn't exist.
130-
130+
131131 Args:
132132 index_client: Azure Search Index Client
133133 index_name: Name of the index to create
@@ -140,46 +140,45 @@ def create_index(index_client: SearchIndexClient, index_name: str) -> None:
140140
141141def generate_embeddings (openai_client : OpenAI , products : list [dict [str , Any ]]) -> None :
142142 """Generate embeddings for products using OpenAI.
143-
143+
144144 Args:
145145 openai_client: OpenAI client
146146 products: List of product dictionaries (modified in place)
147147 """
148148 print ("Generating embeddings for products..." )
149-
149+
150150 for i , product in enumerate (products ):
151151 # Create text to embed from name, description, and categories
152152 text_to_embed = f"{ product ['name' ]} { product ['description' ]} { ' ' .join (product ['categories' ])} "
153-
153+
154154 # Generate embedding
155155 response = openai_client .embeddings .create (
156- model = os .environ ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT" ],
157- input = text_to_embed
156+ model = os .environ ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT" ], input = text_to_embed
158157 )
159158 product ["embedding" ] = response .data [0 ].embedding
160-
159+
161160 if (i + 1 ) % 100 == 0 :
162161 print (f" Generated embeddings for { i + 1 } /{ len (products )} products" )
163-
162+
164163 print (f"Generated embeddings for all { len (products )} products." )
165164
166165
167166def upload_products (search_client : SearchClient , products : list [dict [str , Any ]]) -> None :
168167 """Upload products to the search index.
169-
168+
170169 Args:
171170 search_client: Azure Search Client
172171 products: List of product dictionaries
173172 """
174173 print (f"Uploading { len (products )} products..." )
175-
174+
176175 # Upload in batches of 1000 (Azure AI Search limit)
177176 batch_size = 1000
178177 for i in range (0 , len (products ), batch_size ):
179- batch = products [i : i + batch_size ]
178+ batch = products [i : i + batch_size ]
180179 search_client .upload_documents (documents = batch )
181180 print (f"Uploaded batch { i // batch_size + 1 } ({ len (batch )} products)" )
182-
181+
183182 print (f"Successfully uploaded { len (products )} products." )
184183
185184
@@ -189,40 +188,39 @@ def main() -> None:
189188 search_service = os .environ ["AZURE_SEARCH_SERVICE" ]
190189 index_name = "zava-products-index"
191190 tenant_id = os .environ ["AZURE_TENANT_ID" ]
192-
191+
193192 # Create credential
194193 azure_credential = azure .identity .AzureCliCredential (tenant_id = tenant_id )
195-
194+
196195 # Create token provider for OpenAI
197196 token_provider = azure .identity .get_bearer_token_provider (
198197 azure_credential , "https://cognitiveservices.azure.com/.default"
199198 )
200-
199+
201200 # Create clients
202201 search_endpoint = f"https://{ search_service } .search.windows.net"
203202 index_client = SearchIndexClient (endpoint = search_endpoint , credential = azure_credential )
204203 search_client = SearchClient (endpoint = search_endpoint , index_name = index_name , credential = azure_credential )
205-
204+
206205 openai_client = OpenAI (
207- base_url = f"https://{ os .environ ['AZURE_OPENAI_SERVICE' ]} .openai.azure.com/openai/v1" ,
208- api_key = token_provider
206+ base_url = f"https://{ os .environ ['AZURE_OPENAI_SERVICE' ]} .openai.azure.com/openai/v1" , api_key = token_provider
209207 )
210-
208+
211209 # Create the index
212210 create_index (index_client , index_name )
213-
211+
214212 # Load product data
215213 print ("Loading product data from product_data_flat.json..." )
216214 with open ("zava_product_data/product_data_flat.json" ) as f :
217215 products = json .load (f )
218216 print (f"Loaded { len (products )} products." )
219-
217+
220218 # Generate embeddings
221219 generate_embeddings (openai_client , products )
222-
220+
223221 # Upload products
224222 upload_products (search_client , products )
225-
223+
226224 print ("\n ✓ All operations completed successfully!" )
227225 print (f" - Index: { index_name } " )
228226 print (f" - Products uploaded: { len (products )} " )
0 commit comments