-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-28265: Fix JDBC timeout message for hive.query.timeout.seconds #6412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
eec11d2
ab425d3
c5399f3
eaae132
aeb5a7c
6c5d8e9
ac0063c
9449a43
2ca54af
32aafd2
8283cd8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| package org.apache.hive.jdbc; | ||
|
|
||
| import org.apache.commons.lang3.StringUtils; | ||
| import org.apache.hadoop.hive.conf.HiveConf; | ||
| import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; | ||
| import org.apache.hive.jdbc.logs.InPlaceUpdateStream; | ||
| import org.apache.hive.service.cli.RowSet; | ||
|
|
@@ -57,6 +58,9 @@ | |
| import java.util.Map; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
| import java.util.concurrent.TimeUnit; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
|
|
||
| import static org.apache.hadoop.hive.ql.ErrorMsg.CLIENT_POLLING_OPSTATUS_INTERRUPTED; | ||
|
|
||
|
|
@@ -70,6 +74,10 @@ public class HiveStatement implements java.sql.Statement { | |
|
|
||
| public static final String QUERY_CANCELLED_MESSAGE = "Query was cancelled."; | ||
|
|
||
| /** Last assignment wins if multiple appear (e.g. multi-line script). Uses find(), not full-string match. */ | ||
| private static final Pattern SET_HIVE_QUERY_TIMEOUT_SECONDS = Pattern.compile( | ||
| "(?i)set\\s+hive\\.query\\.timeout\\.seconds\\s*=\\s*([^;\\n]+)"); | ||
|
|
||
| private final HiveConnection connection; | ||
| private TCLIService.Iface client; | ||
| private Optional<TOperationHandle> stmtHandle; | ||
|
|
@@ -298,6 +306,7 @@ public void closeOnCompletion() throws SQLException { | |
| public boolean execute(String sql) throws SQLException { | ||
| runAsyncOnServer(sql); | ||
| TGetOperationStatusResp status = waitForOperationToComplete(); | ||
| trackSessionQueryTimeoutIfSet(sql); | ||
|
|
||
| // The query should be completed by now | ||
| if (!status.isHasResultSet() && stmtHandle.isPresent() && !stmtHandle.get().isHasResultSet()) { | ||
|
|
@@ -398,20 +407,120 @@ private TGetOperationStatusResp waitForResultSetStatus() throws SQLException { | |
| return statusResp; | ||
| } | ||
|
|
||
| /** | ||
| * When {@code SET hive.query.timeout.seconds=...} succeeds, remember the effective value on the | ||
| * connection so {@code TIMEDOUT_STATE} can report it if the server omits {@code errorMessage} | ||
| * (HIVE-28265). | ||
| */ | ||
| private void trackSessionQueryTimeoutIfSet(String sql) { | ||
| if (sql == null) { | ||
| return; | ||
| } | ||
| Matcher m = SET_HIVE_QUERY_TIMEOUT_SECONDS.matcher(sql); | ||
| Long lastSec = null; | ||
| while (m.find()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found this PR interesting. Unfortunately, I have no time to finish this review as I go for a long vacation. But this part made me suspicious as I'm pretty sure we usually don't read Hive this way. My fault, I should start with this one. Just thinking out loud: As I see HiveStatement doesn't contain any reference to Hive Configuration. Creating a hiveConf object is not a top of my mind but I affraid with this way you ignore the actual HiveConf loaded in the Hive Server session. I'm sad that I have no time to debug it out but for me, it looks suspicious. Anyway, good luck with the PR. If you have still have open questions at the end of the next week, I would be happy to help and learn this part of the code.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thank you @InvisibleProgrammer This was reproducable. Could you please let me know any setting or runtime configs, if you feel can fix the issue? Could you also please let me know, how to refactor? As I am new to the community
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks again for pointing at HIVE-28265 first — we stayed focused on the wrong error message, not changing how timeout itself works. What we changed in this update
|
||
| try { | ||
| HiveConf conf = new HiveConf(); | ||
| conf.set(HiveConf.ConfVars.HIVE_QUERY_TIMEOUT_SECONDS.varname, m.group(1).trim()); | ||
| long sec = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_QUERY_TIMEOUT_SECONDS, TimeUnit.SECONDS); | ||
| lastSec = sec; | ||
| } catch (Exception e) { | ||
| LOG.debug("Could not parse session query timeout fragment: {}", m.group(0), e); | ||
| } | ||
| } | ||
| if (lastSec != null) { | ||
| connection.setSessionQueryTimeoutSeconds(lastSec); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * HIVE-28265: Prefer server error text unless it is empty or the known-broken "0 seconds" case; | ||
| * otherwise derive seconds from JDBC {@link #setQueryTimeout(int)} or last session SET. | ||
| */ | ||
| private String sqlTimeoutMessageForTimedOutState(String serverMessage) { | ||
| if (!needsLocalTimeoutMessageForTimedOut(serverMessage)) { | ||
| return serverMessage; | ||
| } | ||
| long effectiveSec = resolveEffectiveTimeoutSecondsForMessage(); | ||
| if (effectiveSec > 0) { | ||
| return "Query timed out after " + effectiveSec + " seconds"; | ||
| } | ||
| return "Query timed out"; | ||
| } | ||
|
|
||
| private boolean needsLocalTimeoutMessageForTimedOut(String timeoutMsg) { | ||
| return StringUtils.isBlank(timeoutMsg) | ||
| || StringUtils.containsIgnoreCase(timeoutMsg, "after 0 seconds"); | ||
| } | ||
|
|
||
| private long resolveEffectiveTimeoutSecondsForMessage() { | ||
| if (queryTimeout > 0) { | ||
| return queryTimeout; | ||
| } | ||
| long tracked = connection.getSessionQueryTimeoutSeconds(); | ||
| if (tracked > 0) { | ||
| return tracked; | ||
| } | ||
| return 0L; | ||
| } | ||
|
|
||
| private SQLException sqlExceptionForCanceledState(TGetOperationStatusResp statusResp) { | ||
| final String errMsg = statusResp.getErrorMessage(); | ||
| final String fullErrMsg; | ||
| if (errMsg == null || errMsg.isEmpty()) { | ||
| fullErrMsg = QUERY_CANCELLED_MESSAGE; | ||
| } else { | ||
| fullErrMsg = QUERY_CANCELLED_MESSAGE + " " + errMsg; | ||
| } | ||
| return new SQLException(fullErrMsg, "01000"); | ||
| } | ||
|
|
||
| /** | ||
| * One GetOperationStatus response: progress update, Thrift status check, then terminal states. | ||
| * Extracted to keep {@link #waitForOperationToComplete()} smaller for static analysis (Sonar). | ||
| */ | ||
| private void processOperationStatusResponse(TGetOperationStatusResp statusResp) throws SQLException { | ||
| if (!isOperationComplete && inPlaceUpdateStream.isPresent()) { | ||
| inPlaceUpdateStream.get().update(statusResp.getProgressUpdateResponse()); | ||
| } | ||
| Utils.verifySuccessWithInfo(statusResp.getStatus()); | ||
| if (!statusResp.isSetOperationState()) { | ||
| return; | ||
| } | ||
| switch (statusResp.getOperationState()) { | ||
| case CLOSED_STATE: | ||
| case FINISHED_STATE: | ||
| isOperationComplete = true; | ||
| isLogBeingGenerated = false; | ||
| break; | ||
| case CANCELED_STATE: | ||
| throw sqlExceptionForCanceledState(statusResp); | ||
| case TIMEDOUT_STATE: | ||
| throw new SQLTimeoutException(sqlTimeoutMessageForTimedOutState(statusResp.getErrorMessage())); | ||
| case ERROR_STATE: | ||
| throw new SQLException(statusResp.getErrorMessage(), statusResp.getSqlState(), statusResp.getErrorCode()); | ||
| case UKNOWN_STATE: | ||
| throw new SQLException("Unknown query", "HY000"); | ||
| case INITIALIZED_STATE: | ||
| case PENDING_STATE: | ||
| case RUNNING_STATE: | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| TGetOperationStatusResp waitForOperationToComplete() throws SQLException { | ||
| TGetOperationStatusResp statusResp = null; | ||
|
|
||
| final TGetOperationStatusReq statusReq = new TGetOperationStatusReq(stmtHandle.get()); | ||
| statusReq.setGetProgressUpdate(inPlaceUpdateStream.isPresent()); | ||
| boolean progressUpdates = inPlaceUpdateStream.isPresent(); | ||
| statusReq.setGetProgressUpdate(progressUpdates); | ||
|
|
||
| // Progress bar is completed if there is nothing to request | ||
| if (inPlaceUpdateStream.isPresent()) { | ||
| if (progressUpdates) { | ||
| inPlaceUpdateStream.get().getEventNotifier().progressBarCompleted(); | ||
| } | ||
|
|
||
| LOG.debug("Waiting on operation to complete: Polling operation status"); | ||
|
|
||
| // Poll on the operation status, till the operation is complete | ||
| do { | ||
| try { | ||
| if (Thread.currentThread().isInterrupted()) { | ||
|
|
@@ -424,37 +533,7 @@ TGetOperationStatusResp waitForOperationToComplete() throws SQLException { | |
| */ | ||
| statusResp = client.GetOperationStatus(statusReq); | ||
| LOG.debug("Status response: {}", statusResp); | ||
| if (!isOperationComplete && inPlaceUpdateStream.isPresent()) { | ||
| inPlaceUpdateStream.get().update(statusResp.getProgressUpdateResponse()); | ||
| } | ||
| Utils.verifySuccessWithInfo(statusResp.getStatus()); | ||
| if (statusResp.isSetOperationState()) { | ||
| switch (statusResp.getOperationState()) { | ||
| case CLOSED_STATE: | ||
| case FINISHED_STATE: | ||
| isOperationComplete = true; | ||
| isLogBeingGenerated = false; | ||
| break; | ||
| case CANCELED_STATE: | ||
| // 01000 -> warning | ||
| final String errMsg = statusResp.getErrorMessage(); | ||
| final String fullErrMsg = | ||
| (errMsg == null || errMsg.isEmpty()) ? QUERY_CANCELLED_MESSAGE : QUERY_CANCELLED_MESSAGE + " " + errMsg; | ||
| throw new SQLException(fullErrMsg, "01000"); | ||
| case TIMEDOUT_STATE: | ||
| throw new SQLTimeoutException("Query timed out after " + queryTimeout + " seconds"); | ||
| case ERROR_STATE: | ||
| // Get the error details from the underlying exception | ||
| throw new SQLException(statusResp.getErrorMessage(), statusResp.getSqlState(), | ||
| statusResp.getErrorCode()); | ||
| case UKNOWN_STATE: | ||
| throw new SQLException("Unknown query", "HY000"); | ||
| case INITIALIZED_STATE: | ||
| case PENDING_STATE: | ||
| case RUNNING_STATE: | ||
| break; | ||
| } | ||
| } | ||
| processOperationStatusResponse(statusResp); | ||
| } catch (SQLException e) { | ||
| isLogBeingGenerated = false; | ||
| throw e; | ||
|
|
@@ -464,8 +543,7 @@ TGetOperationStatusResp waitForOperationToComplete() throws SQLException { | |
| } | ||
| } while (!isOperationComplete); | ||
|
|
||
| // set progress bar to be completed when hive query execution has completed | ||
| if (inPlaceUpdateStream.isPresent()) { | ||
| if (progressUpdates) { | ||
| inPlaceUpdateStream.get().getEventNotifier().progressBarCompleted(); | ||
| } | ||
| return statusResp; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thinking out loud: I wonder if a connection can have concurrency issue: I mean, you can have multiple individual connections to Hive, but inside a connection itself, can we have multiple hive statements in parallel?
I have no such use case in my mind, but let me ping Ayush about this question.
@ayushtkn , what do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a single JDBC Connection can be shared across multiple threads, and it is entirely possible to have multiple HiveStatement objects executing concurrently on the same connection (which maps to a single session on the HS2 side).
via Beeline or so maybe not but In Hive Server 2 (HS2), a single JDBC Connection corresponds to a single HS2 Session. You can absolutely execute multiple queries concurrently within the same session by spawning multiple threads on the client side, each using a different HiveStatement created from that single HiveConnection.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thx