File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -124,7 +124,7 @@ def flip_image(img_name, img_dir):
124124 return
125125
126126
127- def check_text (file_path , lim = 3 ):
127+ def check_text (file_path , lim = 3 , convert_to_grayscale = False ):
128128 """Checks an image for burned-in text.
129129
130130 Parameters
@@ -140,6 +140,8 @@ def check_text(file_path, lim=3):
140140 True if the image has more than lim words, else False.
141141 """
142142 img = cv2 .imread (file_path )
143+ if convert_to_grayscale :
144+ img = cv2 .cvtColor (img , cv2 .COLOR_BGR2GRAY )
143145 tess = pytesseract .image_to_data (img , output_type = Output .DICT )
144146 words = tess ['text' ]
145147 if np .any ([len (w ) > lim for w in words ]):
Original file line number Diff line number Diff line change 2828 type = int ,
2929 default = 3 ,
3030 help = 'maximum allowable number of words per image' )
31+ parser .add_argument ('--convert_to_grayscale' ,
32+ action = 'store_true' )
3133 parser .add_argument ('--no_multiprocessing' ,
3234 action = 'store_true' )
33- parser .set_defaults (no_multiprocessing = False )
35+ parser .set_defaults (no_multiprocessing = False ,
36+ convert_to_grayscale = False )
3437 args = parser .parse_args ()
3538
3639 # Setting globals
3740 IMG_DIR = args .img_dir
3841 TEXT_DIR = args .text_dir
3942 NUM_WORDS = args .num_words
4043 USE_MULTIPROCESSING = not args .no_multiprocessing
44+ GRAY = args .convert_to_grayscale
4145
4246 # Importing the data
4347 files = os .listdir (IMG_DIR )
4448
4549 # Checking the files
4650 if USE_MULTIPROCESSING :
4751 with Pool () as p :
48- input = [(IMG_DIR + f , NUM_WORDS ) for f in files ]
52+ input = [(IMG_DIR + f , NUM_WORDS , GRAY ) for f in files ]
4953 res1 = p .starmap (check_text , input )
5054 p .close ()
5155 p .join ()
5963 p .close ()
6064 p .join ()
6165 else :
62- res1 = [check_text (IMG_DIR + f , NUM_WORDS ) for f in files ]
66+ res1 = [check_text (IMG_DIR + f , NUM_WORDS , GRAY ) for f in files ]
6367 with_text = np .where (res1 )[0 ]
6468 to_move = [files [i ] for i in with_text ]
6569 res2 = [os .rename (IMG_DIR + f , TEXT_DIR + f ) for f in to_move ]
You can’t perform that action at this time.
0 commit comments