Skip to content

Commit 44c6066

Browse files
Merge pull request #267 from Countly/fix/264-networking-deadlock-recovery
Fix/264 networking deadlock recovery
2 parents d79b4ac + dea12f1 commit 44c6066

8 files changed

Lines changed: 843 additions & 18 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
## XX.XX.XX
2+
* Fixed a bug where a non-JSON server response would cause a permanent networking deadlock, preventing all subsequent requests from being sent.
23
* Fixed a bug where a NullPointerException in SDKCore.recover() would permanently block SDK initialization when a crash file from a previous session existed on disk.
34

45
## 24.1.4

app-java/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ dependencies {
1616
//implementation "ly.count.sdk:java:${CLY_VERSION}"
1717
}
1818

19-
mainClassName = 'ly.count.java.demo.Sample'
19+
mainClassName = 'ly.count.java.demo.ReproduceIssue264'
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package ly.count.java.demo;
2+
3+
import com.sun.net.httpserver.HttpServer;
4+
import java.io.File;
5+
import java.io.OutputStream;
6+
import java.lang.reflect.Field;
7+
import java.net.InetSocketAddress;
8+
import java.util.concurrent.atomic.AtomicInteger;
9+
import ly.count.sdk.java.Config;
10+
import ly.count.sdk.java.Countly;
11+
12+
/**
13+
* Reproduces GitHub Issue #264:
14+
* "Non-JSON Server Response Causes Permanent Networking Deadlock"
15+
*
16+
* This app starts a local HTTP server that returns HTML (simulating a 502 error page),
17+
* initializes the Countly SDK against it, records events, and checks whether the SDK
18+
* gets permanently stuck.
19+
*
20+
* Run with: ./gradlew app-java:run
21+
* (after setting mainClassName = 'ly.count.java.demo.ReproduceIssue264' in app-java/build.gradle)
22+
*/
23+
public class ReproduceIssue264 {
24+
25+
public static void main(String[] args) throws Exception {
26+
AtomicInteger requestCount = new AtomicInteger(0);
27+
AtomicInteger successCount = new AtomicInteger(0);
28+
29+
// Start a local HTTP server that returns HTML for the first 3 requests,
30+
// then valid JSON for subsequent requests (simulating server recovery)
31+
HttpServer server = HttpServer.create(new InetSocketAddress(0), 0);
32+
int port = server.getAddress().getPort();
33+
34+
server.createContext("/", exchange -> {
35+
int count = requestCount.incrementAndGet();
36+
String body;
37+
int code;
38+
39+
if (count <= 3) {
40+
code = 502;
41+
body = "<html><body><h1>502 Bad Gateway</h1><p>The server is temporarily unavailable.</p></body></html>";
42+
System.out.println("[Mock Server] Request #" + count + " -> returning HTML 502 (simulating outage)");
43+
} else {
44+
code = 200;
45+
body = "{\"result\":\"Success\"}";
46+
successCount.incrementAndGet();
47+
System.out.println("[Mock Server] Request #" + count + " -> returning JSON 200 (server recovered)");
48+
}
49+
50+
exchange.sendResponseHeaders(code, body.length());
51+
OutputStream os = exchange.getResponseBody();
52+
os.write(body.getBytes());
53+
os.close();
54+
});
55+
56+
server.start();
57+
System.out.println("=== Issue #264 Reproduction ===");
58+
System.out.println("[Mock Server] Started on port " + port);
59+
System.out.println();
60+
61+
// Setup SDK storage directory
62+
String[] sdkStorageRootPath = { System.getProperty("user.home"), "__COUNTLY", "java_issue264" };
63+
File sdkStorageRootDirectory = new File(String.join(File.separator, sdkStorageRootPath));
64+
if (!(sdkStorageRootDirectory.exists() && sdkStorageRootDirectory.isDirectory())) {
65+
sdkStorageRootDirectory.mkdirs();
66+
}
67+
68+
// Initialize SDK pointing to our mock server
69+
Config config = new Config("http://localhost:" + port, "TEST_APP_KEY", sdkStorageRootDirectory)
70+
.setLoggingLevel(Config.LoggingLevel.WARN)
71+
.setDeviceIdStrategy(Config.DeviceIdStrategy.UUID)
72+
.enableFeatures(Config.Feature.Events, Config.Feature.Sessions)
73+
.setEventQueueSizeToSend(1);
74+
75+
Countly.instance().init(config);
76+
System.out.println("[SDK] Initialized against mock server");
77+
78+
// Start session (triggers first request -> will get HTML 502)
79+
Countly.session().begin();
80+
System.out.println("[SDK] Session started");
81+
82+
// Record an event (triggers another request -> will get HTML 502)
83+
Countly.instance().events().recordEvent("test_event_during_outage");
84+
System.out.println("[SDK] Event recorded");
85+
86+
// Wait for requests to be attempted
87+
System.out.println();
88+
System.out.println("[Test] Waiting 3 seconds for initial requests...");
89+
Thread.sleep(3000);
90+
91+
// Check if SDK is deadlocked via reflection (SDKCore.instance.networking is protected)
92+
boolean isSending = isNetworkingSending();
93+
94+
System.out.println();
95+
System.out.println("============================================================");
96+
if (isSending) {
97+
System.out.println(" BUG REPRODUCED: isSending() = true (DEADLOCKED!)");
98+
System.out.println(" The SDK is permanently stuck. No further requests");
99+
System.out.println(" will ever be sent, even when the server recovers.");
100+
} else {
101+
System.out.println(" FIX CONFIRMED: isSending() = false (recovered)");
102+
System.out.println(" The SDK handled the non-JSON response gracefully.");
103+
}
104+
System.out.println("============================================================");
105+
System.out.println();
106+
107+
// Try to trigger recovery by calling check
108+
System.out.println("[Test] Triggering networking check cycles (server now returns JSON)...");
109+
triggerNetworkingChecks(5);
110+
111+
int totalRequests = requestCount.get();
112+
int successes = successCount.get();
113+
114+
System.out.println();
115+
System.out.println("============================================================");
116+
System.out.println(" Total requests received by server: " + totalRequests);
117+
System.out.println(" Successful (JSON 200) responses: " + successes);
118+
if (successes > 0) {
119+
System.out.println(" SDK successfully retried after server recovered!");
120+
} else if (!isNetworkingSending()) {
121+
System.out.println(" SDK recovered from error. Requests will retry on");
122+
System.out.println(" the next timer tick (no deadlock).");
123+
} else {
124+
System.out.println(" SDK is STILL deadlocked. Bug confirmed.");
125+
}
126+
System.out.println("============================================================");
127+
128+
// Cleanup
129+
Countly.instance().stop();
130+
server.stop(0);
131+
System.out.println();
132+
System.out.println("[Done] Cleanup complete.");
133+
}
134+
135+
/**
136+
* Access SDKCore.instance.networking.isSending() via reflection
137+
* since these fields are protected/package-private.
138+
*/
139+
private static boolean isNetworkingSending() throws Exception {
140+
Class<?> sdkCoreClass = Class.forName("ly.count.sdk.java.internal.SDKCore");
141+
Field instanceField = sdkCoreClass.getDeclaredField("instance");
142+
instanceField.setAccessible(true);
143+
Object sdkCore = instanceField.get(null);
144+
145+
Field networkingField = sdkCoreClass.getDeclaredField("networking");
146+
networkingField.setAccessible(true);
147+
Object networking = networkingField.get(sdkCore);
148+
149+
return (boolean) networking.getClass().getMethod("isSending").invoke(networking);
150+
}
151+
152+
/**
153+
* Trigger SDKCore.instance.networking.check(config) via reflection.
154+
*/
155+
private static void triggerNetworkingChecks(int count) throws Exception {
156+
Class<?> sdkCoreClass = Class.forName("ly.count.sdk.java.internal.SDKCore");
157+
Field instanceField = sdkCoreClass.getDeclaredField("instance");
158+
instanceField.setAccessible(true);
159+
Object sdkCore = instanceField.get(null);
160+
161+
Field networkingField = sdkCoreClass.getDeclaredField("networking");
162+
networkingField.setAccessible(true);
163+
Object networking = networkingField.get(sdkCore);
164+
165+
Field configField = sdkCoreClass.getDeclaredField("config");
166+
configField.setAccessible(true);
167+
Object internalConfig = configField.get(sdkCore);
168+
169+
java.lang.reflect.Method checkMethod = networking.getClass().getMethod("check",
170+
Class.forName("ly.count.sdk.java.internal.InternalConfig"));
171+
172+
for (int i = 0; i < count; i++) {
173+
if (!isNetworkingSending()) {
174+
checkMethod.invoke(networking, internalConfig);
175+
}
176+
Thread.sleep(1000);
177+
}
178+
}
179+
}

sdk-java/src/main/java/ly/count/sdk/java/internal/Tasks.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public class Tasks {
2222
* Service which runs {@link Callable}s
2323
*/
2424
private final ExecutorService executor;
25-
private Long running = null;
25+
private volatile Long running = null;
2626

2727
/**
2828
* Map of {@link Future}s for {@link Callable}s not yet resolved
@@ -92,18 +92,20 @@ <T> Future<T> run(final Task<T> task, final Callback<T> callback) {
9292
@Override
9393
public T call() throws Exception {
9494
running = task.id;
95-
T result = task.call();
96-
synchronized (pending) {
97-
if (!task.id.equals(0L)) {
98-
pending.remove(task.id);
95+
try {
96+
T result = task.call();
97+
if (callback != null) {
98+
callback.call(result);
99+
}
100+
return result;
101+
} finally {
102+
synchronized (pending) {
103+
if (!task.id.equals(0L)) {
104+
pending.remove(task.id);
105+
}
106+
running = null;
99107
}
100-
running = null;
101-
// L.d("pending " + pending.keySet() + ", done running " + task.id);
102-
}
103-
if (callback != null) {
104-
callback.call(result);
105108
}
106-
return result;
107109
}
108110
});
109111

sdk-java/src/main/java/ly/count/sdk/java/internal/Transport.java

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ public Boolean send() {
342342
} catch (IOException e) {
343343
L.w("[network] Error while sending request " + request + " " + e);
344344
return false;
345+
} catch (Exception e) {
346+
L.e("[network] Unexpected error while sending request " + request + " " + e);
347+
return false;
345348
} finally {
346349
if (connection != null) {
347350
connection.disconnect();
@@ -354,12 +357,22 @@ public Boolean send() {
354357
Boolean processResponse(int code, String response, Long requestId) {
355358
L.i("[network] [processResponse] Code [" + code + "] response [" + response + "] for request[" + requestId + "]");
356359

357-
JSONObject jsonObject = new JSONObject(response);
358-
if (code >= 200 && code < 300 && jsonObject.has("result")) {
359-
L.d("[network] Success");
360-
return true;
361-
} else {
362-
L.w("[network] Fail: code :" + code + ", result: " + response);
360+
if (response == null) {
361+
L.w("[network] Null response for request [" + requestId + "]");
362+
return false;
363+
}
364+
365+
try {
366+
JSONObject jsonObject = new JSONObject(response);
367+
if (code >= 200 && code < 300 && jsonObject.has("result")) {
368+
L.d("[network] Success");
369+
return true;
370+
} else {
371+
L.w("[network] Fail: code :" + code + ", result: " + response);
372+
return false;
373+
}
374+
} catch (Exception e) {
375+
L.w("[network] Failed to parse response as JSON for request [" + requestId + "], response: [" + response + "], error: [" + e.getMessage() + "]");
363376
return false;
364377
}
365378
}

0 commit comments

Comments
 (0)