Skip to content

Commit 949619b

Browse files
committed
fixed for readmes and robust04
1 parent 3a27e48 commit 949619b

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

index

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,14 @@ for collection in args.json["collections"]:
7373
{0}""".format(path).split(), env=my_env)
7474

7575
#grep out any readmes
76-
subprocess.run(["/bin/sh", "-c", "egrep -vi (readme|dtd) /work/terrier-core/etc/collection.spec > /work/terrier-core/etc/collection.spec.new; mv /work/terrier-core/etc/collection.spec.new /work/terrier-core/etc/collection.spec"], env=my_env)
76+
subprocess.run(["/bin/sh", "-c", "egrep -vi 'readme' /work/terrier-core/etc/collection.spec > /work/terrier-core/etc/collection.spec.new; mv /work/terrier-core/etc/collection.spec.new /work/terrier-core/etc/collection.spec"], env=my_env)
77+
78+
#grep out congressional record
79+
if name == "robust04":
80+
subprocess.run(["/bin/sh", "-c", "egrep -vi 'cr93|read|dtd' /work/terrier-core/etc/collection.spec > /work/terrier-core/etc/collection.spec.new; mv /work/terrier-core/etc/collection.spec.new /work/terrier-core/etc/collection.spec"], env=my_env)
81+
82+
print("Files to index...")
83+
subprocess.run(["wc", "-l", "/work/terrier-core/etc/collection.spec"])
7784

7885
cmd="""
7986
/work/terrier-core/bin/terrier batchindexing -p

0 commit comments

Comments
 (0)