2 Commits

Author SHA1 Message Date
enricobuehler a3e1ea2b44 fix(android/ci): retry transient Play API failures in play-upload.py
android / android (push) Waiting to run
ci / bench (push) Waiting to run
ci / docs-site (push) Waiting to run
ci / web (push) Waiting to run
ci / rust (push) Waiting to run
deb / build-publish (push) Waiting to run
decky / build-publish (push) Waiting to run
docker / deploy-docs (push) Blocked by required conditions
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Waiting to run
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Waiting to run
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Waiting to run
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Waiting to run
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Waiting to run
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Waiting to run
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Waiting to run
apple / swift (push) Successful in 1m9s
apple / screenshots (push) Successful in 4m2s
The uploader only caught HTTPError — a URLError (TLS "EOF occurred in
violation of protocol", the failure that dropped two release uploads on
2026-07-02) or a Google 5xx killed the job outright. Retry those with
3/9/27 s backoff; 4xx still fails fast. The edits API is transactional
until commit, so re-sending is safe.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 23:05:27 +00:00
enricobuehler 6686fcdded fix(gamestream/tests): sender_delivers_batches flaked under CI load — burst overflowed the default socket buffer
apple / swift (push) Successful in 1m12s
apple / screenshots (push) Successful in 4m26s
windows-host / package (push) Successful in 6m25s
ci / rust (push) Successful in 5m5s
ci / web (push) Successful in 51s
ci / docs-site (push) Successful in 1m4s
android / android (push) Failing after 10m7s
deb / build-publish (push) Successful in 3m35s
decky / build-publish (push) Successful in 21s
docker / build-push (--build-arg FEDORA_VERSION=44, ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora44-rpm) (push) Successful in 6s
docker / build-push (., web/Dockerfile, punktfunk-web) (push) Successful in 5s
docker / build-push (ci, ci/fedora-rpm.Dockerfile, punktfunk-fedora-rpm) (push) Successful in 4s
docker / build-push (ci, ci/rust-ci.Dockerfile, punktfunk-rust-ci) (push) Successful in 4s
docker / build-push (docs-site, docs-site/Dockerfile, punktfunk-docs) (push) Successful in 3s
rpm / build-publish (bazzite, punktfunk-fedora-rpm) (push) Has started running
docker / deploy-docs (push) Waiting to run
ci / bench (push) Successful in 4m38s
rpm / build-publish (fedora-44, punktfunk-fedora44-rpm) (push) Has started running
The test burst 3×100 1200 B datagrams into an undrained loopback socket: at
~2.5 KB kernel truesize each, the default ~212 KB rmem holds only ~80, so on
a starved CI runner (parallel release builds) the kernel silently dropped the
overflow and the recv loop could never reach 300 — surfacing as WouldBlock
after the 3 s timeout. Size the burst (3×20) to fit the default buffer even
with zero concurrent draining, and give recv a starvation-tolerant 10 s.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-02 22:35:23 +00:00
2 changed files with 36 additions and 12 deletions
+24 -7
View File
@@ -37,13 +37,30 @@ def call(method, url, token=None, data=None, content_type=None, want_json=True):
headers["Authorization"] = f"Bearer {token}"
if content_type:
headers["Content-Type"] = content_type
req = urllib.request.Request(url, data=data, method=method, headers=headers)
try:
with urllib.request.urlopen(req, timeout=300) as r:
body = r.read()
except urllib.error.HTTPError as e:
raise ApiError(e.code, method, url, e.read().decode("utf-8", "replace"))
return json.loads(body) if (want_json and body) else body
# Transient-fault retries: googleapis.com occasionally drops the TLS session ("EOF
# occurred in violation of protocol" — failed two release uploads on 2026-07-02) or
# answers 5xx. Retry those with backoff; 4xx raises immediately (a real API error).
# The edits API is transactional until commit, so re-sending any of these is safe.
last = None
for attempt in range(4):
if attempt:
delay = 3**attempt
print(f"transient Play API failure ({last}); retry {attempt}/3 in {delay}s")
time.sleep(delay)
req = urllib.request.Request(url, data=data, method=method, headers=headers)
try:
with urllib.request.urlopen(req, timeout=300) as r:
body = r.read()
return json.loads(body) if (want_json and body) else body
except urllib.error.HTTPError as e:
if e.code >= 500:
last = f"HTTP {e.code}"
continue
raise ApiError(e.code, method, url, e.read().decode("utf-8", "replace"))
except urllib.error.URLError as e:
last = str(getattr(e, "reason", e))
continue
sys.exit(f"ERROR: {method} {url} still failing after retries: {last}")
def load_sa():
+12 -5
View File
@@ -820,8 +820,10 @@ mod tests {
#[test]
fn sender_delivers_batches() {
let rx_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
// Generous: on a CI host saturated by parallel release builds, this thread can be
// starved for whole seconds between recv() wakeups.
rx_sock
.set_read_timeout(Some(Duration::from_secs(3)))
.set_read_timeout(Some(Duration::from_secs(10)))
.unwrap();
let tx_sock = UdpSocket::bind("127.0.0.1:0").unwrap();
tx_sock.connect(rx_sock.local_addr().unwrap()).unwrap();
@@ -837,10 +839,15 @@ mod tests {
)
.unwrap();
// 3 frames of 100 packets, content-tagged for verification.
// 3 frames of 20 packets, content-tagged for verification. The TOTAL burst must fit
// the receive socket's DEFAULT buffer even if this thread never drains concurrently
// (a starved CI runner): a 1200 B datagram costs ~2.5 KB kernel truesize, and the
// default rmem (~212 KB) holds only ~80 — a bigger burst gets silently dropped by
// the kernel and the test can never complete (the old 3×100 flaked exactly there).
const PER_FRAME: usize = 20;
let mut sent = Vec::new();
for f in 0..3u8 {
let batch: PacketBatch = (0..100u8)
let batch: PacketBatch = (0..PER_FRAME as u8)
.map(|i| {
let mut p = vec![0u8; 1200];
p[0] = f;
@@ -859,10 +866,10 @@ mod tests {
let n = rx_sock.recv(&mut buf).expect("packet within timeout");
assert_eq!(n, 1200);
let (f, i) = (buf[0] as usize, buf[1] as usize);
assert_eq!(&buf[..n], &sent[f * 100 + i][..], "payload intact");
assert_eq!(&buf[..n], &sent[f * PER_FRAME + i][..], "payload intact");
got += 1;
}
assert_eq!(got, 300);
assert_eq!(got, 3 * PER_FRAME);
assert!(running.load(Ordering::SeqCst), "no spurious client-gone");
}
}