@@ -192,7 +192,7 @@ def get_tasks(splitArrays, anchor_id, c, model_id, username, after):
192192 tasks = []
193193
194194 # Scenario 1: Empty DB, or just hunting for brand new messages
195- if len ( splitArrays ) == 0 :
195+ if not splitArrays :
196196 tasks .append (
197197 scrape_messages (
198198 c ,
@@ -207,20 +207,24 @@ def get_tasks(splitArrays, anchor_id, c, model_id, username, after):
207207
208208 # Scenarios 2 & 3: Dynamic Chunking
209209 for i , chunk in enumerate (splitArrays ):
210- is_first_chunk = i == 0
211- is_final_chunk = i == len (splitArrays ) - 1
212-
213- # The first chunk uses the teleport anchor.
214- # Subsequent chunks use the ID of the OLDEST message in the previous chunk.
215- start_id = (
216- anchor_id if is_first_chunk else splitArrays [i - 1 ][- 1 ].get ("post_id" )
217- )
210+
211+ # --- 1. Define the Start ID ---
212+ if i == 0 :
213+ start_id = None # Chunk 0: Teleport to the absolute newest message
214+ elif i == 1 :
215+ start_id = anchor_id # Chunk 1: Pick up at the cached anchor
216+ else :
217+ # Chunk 2+: Pick up at the tail of the previous chunk
218+ start_id = splitArrays [i - 1 ][- 1 ].get ("post_id" )
219+
220+ # --- 2. Define the Start Timestamp ---
218221 start_timestamp = (
219222 arrow .now ().float_timestamp
220- if is_first_chunk
223+ if i == 0
221224 else float (splitArrays [i - 1 ][- 1 ].get ("created_at" ))
222225 )
223226
227+ # --- 3. Build the Task ---
224228 tasks .append (
225229 scrape_messages (
226230 c ,
@@ -232,7 +236,7 @@ def get_tasks(splitArrays, anchor_id, c, model_id, username, after):
232236 {"post_id" : ele .get ("post_id" ), "timestamp" : ele .get ("created_at" )}
233237 for ele in chunk
234238 ],
235- is_last_chunk = is_final_chunk ,
239+ is_last_chunk = ( i == len ( splitArrays ) - 1 ) ,
236240 after = after ,
237241 )
238242 )
0 commit comments