Skip to content

Support syncing by label#19

Open
ithinkihaveacat wants to merge 4 commits intomarcboeker:mainfrom
ithinkihaveacat:sync-by-label
Open

Support syncing by label#19
ithinkihaveacat wants to merge 4 commits intomarcboeker:mainfrom
ithinkihaveacat:sync-by-label

Conversation

@ithinkihaveacat
Copy link
Contributor

# only sync messages with the label "Transactional"
$ uv run main.py sync --label Transactional --data-dir ./data

@marcboeker
Copy link
Owner

Thanks for the PR. I love the idea to filter emails before synching, but I think it would be more flexible if we simply add a query argument to the commandline tool. Then you could say

uv run main.oy sync --query "label:Transactional" --data-dir ./data

But you are also able to only sync emails without attachments, or for a specific user etc. WDYT?

@ithinkihaveacat
Copy link
Contributor Author

@marcboeker Yes that makes sense, especially if that use case is documented in the README. I'll update this PR.

@ithinkihaveacat
Copy link
Contributor Author

Updated PR.

Comment on lines +309 to +344
service = _create_service(credentials)
labels = get_labels(service)
all_message_ids = []
base_query = query if query else ""

if not full_sync:
# Fetch messages newer than the last indexed
last = db.last_indexed()
if last:
query.append(f"after:{int(last.timestamp())}")
new_query = f"{base_query} after:{int(last.timestamp())}".strip()
logging.info(f"Fetching new messages with query: {new_query}")
all_message_ids.extend(
get_message_ids_from_gmail(service, new_query, check_shutdown)
)

# Fetch messages older than the first indexed (backfill)
first = db.first_indexed()
if first:
query.append(f"before:{int(first.timestamp())}")

service = _create_service(credentials)
labels = get_labels(service)
old_query = f"{base_query} before:{int(first.timestamp())}".strip()
logging.info(f"Backfilling old messages with query: {old_query}")
all_message_ids.extend(
get_message_ids_from_gmail(service, old_query, check_shutdown)
)

all_message_ids = get_message_ids_from_gmail(service, query, check_shutdown)
# If it's the very first sync, there's no last or first, so run with the base query
if not last and not first:
logging.info(f"Performing initial sync with query: {base_query}")
all_message_ids.extend(
get_message_ids_from_gmail(service, base_query, check_shutdown)
)
else:
# Full sync requested
logging.info(f"Performing full sync with query: {base_query}")
all_message_ids = get_message_ids_from_gmail(
service, base_query, check_shutdown
)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can simplify this a bit. WDYT?

try:
    service = _create_service(credentials)
    labels = get_labels(service)
    base_query = query or ""

    if full_sync:
        # Full sync uses only the base query
        query_str = base_query
        logging.info(f"Performing full sync with query: {query_str}")
    else:
        # Build after/before filters into one compound query
        last = db.last_indexed()
        first = db.first_indexed()
        filters = []
        if last:
            filters.append(f"after:{int(last.timestamp())}")
        if first:
            filters.append(f"before:{int(first.timestamp())}")

        query_str = " ".join([base_query] + filters).strip()
        logging.info(f"Fetching messages with compound query: {query_str}")

    all_message_ids = get_message_ids_from_gmail(
        service, query_str, check_shutdown
    )
except Exception:
    logging.exception("Failed to sync messages")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants