-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsecondCommentsTikTok.py
More file actions
193 lines (157 loc) · 9.01 KB
/
secondCommentsTikTok.py
File metadata and controls
193 lines (157 loc) · 9.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# Using the TikTok Native App API to retrieve all the first layer comments made on the posts
# This file is different, because when the first order of TikTok runs crashes,
# we create a list of the videos that were not scrapped, and run them over here in this file
# the comments are added to output1.json Then copied from here and pasted into our original output.json file
# most of the code in this file in identical to commentsTiktok.py
# with the exception to how videos are fed into the scrapping function
# importing all required libraries
import requests
import json
import utc
# post_uri = "https://www.tiktok.com/@hungrymanbutteranch/video/7219118527607983406" # TikTok reel link url
headers = {
'accept': '*/**',
'accept-language': 'en-US,en;q=0.9, fa;q=0.8',
'cache-control': 'no-cache',
'pragma': 'no-cache',
'priority': 'u=1, i',
'referer': 'https://www.tiktok.com/explore',
'sec-ch-ua': '"Google Chrome"; v="129", "Not-A?Brand"; v="8", "Chromium";v="129"',
'sec-ch-ua-mobile': '0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36'
}
# Creating a request to the TikTok API for the comments
def req(post_id, curs):
# post_id and curs are the external variables that are fed into the url formatting to get the desired comments
url = f'https://www.tiktok.com/api/comment/list/?WebIdLastTime=1729409061&aid=1988&app_language=en&app_name=tiktok_web&aweme_id={post_id}&browser_language=en-GB&browser_name=Mozilla&browser_online=true&browser_platform=MacIntel&browser_version=5.0%20%28Linux%3B%20Android%206.0%3B%20Nexus%205%20Build%2FMRA58N%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Chrome%2F128.0.0.0%20Mobile%20Safari%2F537.36&channel=tiktok_web&cookie_enabled=true&count=200&cursor={curs}&data_collection_enabled=true&device_id=7427755272205075973&device_platform=web_mobile&focus_state=false&from_page=video&history_len=3&is_fullscreen=true&is_page_visible=true&odinId=7427755310612186117&os=android&priority_region=&referer=®ion=CA&screen_height=1146&screen_width=1534&tz_name=America%2FToronto&user_is_login=false&verifyFp=verify_latio6ct_5FzXpKng_5ZAZ_4unS_AkTf_iCW6vWcdOHIQ&webcast_language=en&msToken=qQqwH0ExH-xY6AzFnFs0j_wVEckhRYWRx333JYcTAeFVrG8lEVaWXPdpNNNjLuJEpba7iKL-0zawApLPRqtf5y2izVrEsx1vg5A738_qWf8YDQyUJ7pNcmCvcI9fBer50jhGlZYwgbQHOW3ISBksR6xPhkY=&X-Bogus=DFSzswVuqE0ANJoutQDZ3GhyS0lt&_signature=_02B4Z6wo00001VebAhQAAIDAZDtjuzmji8FXuwaAADL1f2'
response = requests.get(url=url, headers=headers)
info = response.text
# print("info: ")
# print(info)
raw_data = json.loads(info) # loading the data into a parsable json format
# print(raw_data)
return raw_data
# Creating a parser method to chop down and get only the desired information form the API calls made
def parser(data, comments):
comment = data['comments'] # Getting only the comments data from the data that was scrapped by our API in the app
print("number of comments in this pull: \t")
print(len(comment)) # total number of comments in this parser
# print(comment)
# list of all the data in the comments API that we want to keep a track of
coms = [] # comment body
replys = [] # no. of replies to the comment
names = [] # name of the person who commented
times = [] # datetime of the comment made
titles = [] # title of the video the comment was made on
urlis = [] # url of the video
# parsing through json comment retrieved and selecting the fields of our interest
for cm in comment:
# print(cm)
com = cm['share_info']['desc']
reply_num = cm['reply_comment_total']
name = cm['user']['nickname']
time = cm['create_time']
title = cm['share_info']['title']
urli = cm['share_info']['url']
# chopping down comment body we don't need
if "comment: " in com:
com = com.split("comment: ")[1]
# in case the comment is empty
if com == "":
com = cm['text']
# print(com)
# in case the comment user name field is empty
if name == "":
name = "anonymous"
# in case the comment date time field is empty
if time == "":
time = utc.now() # if the datetime field is empty we use the current datetime as a default
time = str(time) # making sure to convert the datetime field to a string for easy parsing
else:
time = utc.fromtimestamp(time) # retrieving the datetime from the timestamp in the json file
time = str(time) # making sure to convert the datetime field to a string for east parsing
# in case the title of the video is empty
if title == "":
title = "Error couldn't be retrieved"
# in case the url of the video is empty
if urli == "":
urli = "Error couldn't be retrieved"
# print(name)
# print(reply_num)
# print()
# Adding the data retrieved from each comment to the file where we keep track of teh video field metadata
coms.append(com)
replys.append(reply_num)
names.append(name)
times.append(time)
titles.append(title)
urlis.append(urli)
# creating a final master dictionary list of all meta data retrieved from the API calls made
for i in range(len(comment)):
comments.append({
"user_name": names[i],
"comment": coms[i],
"replies": replys[i],
"titles": titles[i],
"urlis": urlis[i],
"times": times[i],
"comapany": "BudLight" # ToDo Name of the company added here. Can be changed from a case to case basis
})
return data
# Get comments from post function
def get_comments(posts):
# iterating through the list of post urls to get the comments from each post one at a time
for post_url in posts:
print(post_url)
post_id = post_url.split("/")[-1] # getting the post if from the post url list created
comments = [] # empty list to store all comments that are retrieved
# comments.append({'post_url': post_url})
curs = 0 # setting initial value of the cursor to 0
# creating a loop that iterates till all the comments are retrieved from the post
while True:
# print(len(comments))
raw_data = req(post_id, curs) # calling the request to TikTok API function
same_data = parser(raw_data, comments) # calling the comment parsing function
if same_data['has_more'] == 1: # if more comments are still available to be scrapped in the post
curs += 200 # we update the cursor to get more data
print('moving to the next cursor')
else: # if no more data is available in the post we break the loop and move to the next comment
print('no more data available')
break
print()
# pushing all the data to a json file in the end of each post analysis
with open('output1.json', 'a', encoding='utf-8') as f:
json.dump(comments, f, ensure_ascii=False, indent=4)
print("\ndata has been saved into a JSON file")
# list of all the videos whose comments need to be scrapped
posts = [
"https://www.tiktok.com/@zakkittle/video/7223213680941485354",
"https://www.tiktok.com/@stephenamon_/video/7223839056256437510",
"https://www.tiktok.com/@sterling_archer_ba/video/7222793317883333930",
"https://www.tiktok.com/@newoldheads/video/7221932764721941802",
"https://www.tiktok.com/@silverbackgreybeard/video/7225656897964150062",
"https://www.tiktok.com/@darcealearlgates/video/7221956957366652203",
"https://www.tiktok.com/@funnycleanvideos/video/7227278269509389610",
"https://www.tiktok.com/@pjadz/video/7230545398425767211",
"https://www.tiktok.com/@_laura_elisa/video/7228036362782510378",
"https://www.tiktok.com/@chefload/video/7225757610274737450",
"https://www.tiktok.com/@bigboytater/video/7226340084356828462",
"https://www.tiktok.com/@scrizzapp/video/7225956910254542126",
"https://www.tiktok.com/@arnegeerdts/video/7230103582169730309",
"https://www.tiktok.com/@spencerjordan/video/7231701360624848170",
"https://www.tiktok.com/@throughbeingcooltattoo/video/7227528018653138219",
"https://www.tiktok.com/@miggys79/video/7223800498464230702",
"https://www.tiktok.com/@feralgigi/video/7222330177219513643",
"https://www.tiktok.com/@leighlou1970/video/7227963733866204458",
"https://www.tiktok.com/@newslitproject/video/7223500471430860074",
"https://www.tiktok.com/@realmattthewelder/video/7225367108186574123",
"https://www.tiktok.com/@thealabamaboss/video/7230690770984897838",
"https://www.tiktok.com/@moethunder092/video/7224282931450154246",
"https://www.tiktok.com/@nwslsoccer/video/7223824375068282155"
]
# calling the function to get comments
get_comments(posts)