diff --git a/translate/translate.go b/translate/translate.go index 73418d80..e407d367 100644 --- a/translate/translate.go +++ b/translate/translate.go @@ -15,9 +15,11 @@ package translate import ( "compress/flate" "compress/gzip" + "context" "crypto/rand" "encoding/hex" "encoding/json" + "errors" "fmt" "io" "net/http" @@ -27,6 +29,7 @@ import ( "strings" "sync" "time" + "unicode/utf8" "github.com/andybalholm/brotli" "github.com/imroc/req/v3" @@ -57,6 +60,26 @@ const ( impersonatedChromeMajor = "120" chromeExtensionVersion = "1.86.0" chromeExtensionID = "cofdbpoegempjloogbagkncekinflcnj" + + // oneshot enforces a 1500-character hard cap on the total length of + // the `text` array (sum across all items). Source: the extension's + // own `G.notLoggedIn = 1500` constant in background.js. The server + // returns 400 `{"errors":{"text":["text exceeds maximum length"]}}` + // past this; bail early to spare the upstream and give the caller a + // faster, less ambiguous error. + maxFreeTextLength = 1500 + + // oneshotTimeout caps how long we wait on a single translate request. + // Without an explicit timeout, a hung upstream connection would + // dangle indefinitely and the caller (e.g. browser extension) would + // sit on a spinner forever — observed in the field. + oneshotTimeout = 20 * time.Second + + // warmupTimeout caps the initial GET to www.deepl.com that seeds the + // cookie jar. Shorter than oneshotTimeout because warmup typically + // completes in well under a second; we'd rather skip a slow warmup + // (cookies are best-effort anyway) than block the first translation. + warmupTimeout = 5 * time.Second ) // instanceID mirrors the UUID the extension persists in chrome.storage on @@ -76,6 +99,14 @@ var ( cookieWarmer sync.Once ) +// oneshotClients caches one req.Client per proxy URL so all translate +// calls share the underlying TCP / TLS / HTTP/2 connection pool. +// Creating a fresh req.Client per request meant a brand-new TLS +// handshake every time (~200-400ms of overhead on top of DeepL's own +// ~1.5s processing latency). Reusing the client lets keep-alive + +// session tickets cut that to near zero on the warm path. +var oneshotClients sync.Map // map[string]*req.Client + func sharedCookieJar() http.CookieJar { cookieJarOnce.Do(func() { j, _ := cookiejar.New(nil) @@ -87,10 +118,15 @@ func sharedCookieJar() http.CookieJar { // warmCookies primes the shared jar by GETting www.deepl.com once. // The Set-Cookie response (userCountry / verifiedBot) lands on .deepl.com, // which is the eTLD+1 of oneshot-free.www.deepl.com, so subsequent POSTs -// to the oneshot endpoint will carry those cookies automatically. +// to the oneshot endpoint will carry those cookies automatically. The +// same request doubles as a TLS-handshake warmup: it leaves a live +// HTTP/2 connection to www.deepl.com in the client pool, which the +// first oneshot POST then resumes via TLS session tickets. func warmCookies(client *req.Client) { cookieWarmer.Do(func() { - _, _ = client.R().Get("https://www.deepl.com/translator") + ctx, cancel := context.WithTimeout(context.Background(), warmupTimeout) + defer cancel() + _, _ = client.R().SetContext(ctx).Get("https://www.deepl.com/translator") }) } @@ -239,8 +275,33 @@ type oneshotRequest struct { // headers (pragma, cache-control, upgrade-insecure-requests, sec-fetch-user) // that a fetch() never emits — wipe those so the WAF cannot tell us apart // on that axis. +// getOneshotClient returns a process-wide cached client for the given +// proxy URL, creating it on first use. Sharing the client across +// requests is the single biggest latency win we have on the warm path: +// it keeps the TLS / HTTP/2 connection in the pool so subsequent +// requests skip the handshake entirely. Kicks off cookie-jar warmup +// in the background on first creation so that the first real translate +// call lands on an already-established connection. +func getOneshotClient(proxyURL string) (*req.Client, error) { + if c, ok := oneshotClients.Load(proxyURL); ok { + return c.(*req.Client), nil + } + c, err := newOneshotClient(proxyURL) + if err != nil { + return nil, err + } + if actual, loaded := oneshotClients.LoadOrStore(proxyURL, c); loaded { + return actual.(*req.Client), nil + } + // First time we've seen this proxy. Kick warmup off in the + // background so the very first translate call can run in parallel + // with the TLS handshake to www.deepl.com. + go warmCookies(c) + return c, nil +} + func newOneshotClient(proxyURL string) (*req.Client, error) { - client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar()) + client := req.C().ImpersonateChrome().SetCookieJar(sharedCookieJar()).SetTimeout(oneshotTimeout) for _, h := range []string{ "Pragma", "Cache-Control", @@ -270,11 +331,10 @@ func newOneshotClient(proxyURL string) (*req.Client, error) { // exactly. Omitting that header instead would put the request on a // different server-side auth branch. func callOneshot(endpoint string, body []byte, bearerToken, proxyURL string) (gjson.Result, int, error) { - client, err := newOneshotClient(proxyURL) + client, err := getOneshotClient(proxyURL) if err != nil { return gjson.Result{}, 0, err } - warmCookies(client) // no-op after the first translation in the process authValue := "None" if bearerToken != "" { @@ -349,6 +409,13 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string, }, nil } + if n := utf8.RuneCountInString(text); n > maxFreeTextLength { + return DeepLXTranslationResult{ + Code: http.StatusRequestEntityTooLarge, + Message: fmt.Sprintf("text exceeds maximum length: %d characters (anonymous oneshot limit is %d)", n, maxFreeTextLength), + }, nil + } + reqStruct := oneshotRequest{ Text: []string{text}, TargetLang: resolvedTarget, @@ -372,6 +439,16 @@ func TranslateByDeepLX(sourceLang, targetLang, text string, tagHandling string, id := time.Now().UnixMilli() result, status, err := callOneshot(endpoint, bodyBytes, dlSession, proxyURL) if err != nil { + // Map upstream timeouts to 504 so callers can distinguish "DeepL + // took too long" from other 503 failure modes (DNS, TLS, etc.). + var ue *url.Error + if errors.Is(err, context.DeadlineExceeded) || (errors.As(err, &ue) && ue.Timeout()) { + return DeepLXTranslationResult{ + ID: id, + Code: http.StatusGatewayTimeout, + Message: fmt.Sprintf("upstream DeepL request timed out after %s", oneshotTimeout), + }, nil + } return DeepLXTranslationResult{ ID: id, Code: http.StatusServiceUnavailable,