forked from mikeizbicki/twitter_postgres_parallel
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_tweets_sequential.sh
More file actions
executable file
·24 lines (20 loc) · 1.08 KB
/
load_tweets_sequential.sh
File metadata and controls
executable file
·24 lines (20 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/bin/bash
files=$(find data/*)
echo '================================================================================'
echo 'load denormalized'
echo '================================================================================'
time for file in $files; do
unzip -p "$file" | sed 's/\\u0000//g' | psql "postgresql://postgres:pass@localhost:1317" -c "COPY tweets_jsonb (data) FROM STDIN csv quote e'\x01' delimiter e'\x02';"
done
echo '================================================================================'
echo 'load pg_normalized'
echo '================================================================================'
time for file in $files; do
python3 load_tweets.py --db "postgresql://postgres:pass@localhost:1318" --inputs "$file"
done
echo '================================================================================'
echo 'load pg_normalized_batch'
echo '================================================================================'
time for file in $files; do
python3 -u load_tweets_batch.py --db=postgresql://postgres:pass@localhost:1320/ --inputs $file
done