acryldata/datahub-helm

Ingestion Issues while trying to persist in postgres

karavanis opened this issue · 4 comments

I am facing the below issue when I am running a ingestion. Logs from the gms. Any idea what may be wrong?

2023-06-16 08:57:41,163 [qtp1820383114-281] ERROR c.l.m.filter.RestliLoggingFilter:38 - Rest.li error:
com.linkedin.restli.server.RestLiServiceException: javax.persistence.PersistenceException: Error when batch flush on sql: update metadata_aspect_v2 set metadata=?, createdOn=?, createdBy=?, createdFor=?, systemmetadata=? where urn=? and aspect=? and version=?
	at com.linkedin.metadata.restli.RestliUtil.toTask(RestliUtil.java:42)
	at com.linkedin.metadata.restli.RestliUtil.toTask(RestliUtil.java:50)
	at com.linkedin.metadata.resources.entity.EntityResource.ingest(EntityResource.java:261)
	at jdk.internal.reflect.GeneratedMethodAccessor237.invoke(Unknown Source)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at com.linkedin.restli.internal.server.RestLiMethodInvoker.doInvoke(RestLiMethodInvoker.java:177)
	at com.linkedin.restli.internal.server.RestLiMethodInvoker.invoke(RestLiMethodInvoker.java:333)
	at com.linkedin.restli.internal.server.filter.FilterChainDispatcherImpl.onRequestSuccess(FilterChainDispatcherImpl.java:47)
	at com.linkedin.restli.internal.server.filter.RestLiFilterChainIterator.onRequest(RestLiFilterChainIterator.java:86)
	at com.linkedin.restli.internal.server.filter.RestLiFilterChainIterator.lambda$onRequest$0(RestLiFilterChainIterator.java:73)
	at java.base/java.util.concurrent.CompletableFuture.uniAcceptNow(CompletableFuture.java:753)
	at java.base/java.util.concurrent.CompletableFuture.uniAcceptStage(CompletableFuture.java:731)
	at java.base/java.util.concurrent.CompletableFuture.thenAccept(CompletableFuture.java:2108)
	at com.linkedin.restli.internal.server.filter.RestLiFilterChainIterator.onRequest(RestLiFilterChainIterator.java:72)
	at com.linkedin.restli.internal.server.filter.RestLiFilterChain.onRequest(RestLiFilterChain.java:55)
	at com.linkedin.restli.server.BaseRestLiServer.handleResourceRequest(BaseRestLiServer.java:262)
	at com.linkedin.restli.server.RestRestLiServer.handleResourceRequestWithRestLiResponse(RestRestLiServer.java:294)
	at com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:262)
	at com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:232)
	at com.linkedin.restli.server.RestRestLiServer.doHandleRequest(RestRestLiServer.java:215)
	at com.linkedin.restli.server.RestRestLiServer.handleRequest(RestRestLiServer.java:171)
	at com.linkedin.restli.server.RestLiServer.handleRequest(RestLiServer.java:130)
	at com.linkedin.restli.server.DelegatingTransportDispatcher.handleRestRequest(DelegatingTransportDispatcher.java:70)
	at com.linkedin.r2.filter.transport.DispatcherRequestFilter.onRestRequest(DispatcherRequestFilter.java:70)
	at com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:76)
	at com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)
	at com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)
	at com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)
	at com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)
	at com.linkedin.r2.filter.transport.ServerQueryTunnelFilter.onRestRequest(ServerQueryTunnelFilter.java:58)
	at com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:76)
	at com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)
	at com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)
	at com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)
	at com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)
	at com.linkedin.r2.filter.message.rest.RestFilter.onRestRequest(RestFilter.java:50)
	at com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:76)
	at com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)
	at com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)
	at com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)
	at com.linkedin.r2.filter.FilterChainImpl.onRestRequest(FilterChainImpl.java:106)
	at com.linkedin.r2.filter.transport.FilterChainDispatcher.handleRestRequest(FilterChainDispatcher.java:75)
	at com.linkedin.r2.util.finalizer.RequestFinalizerDispatcher.handleRestRequest(RequestFinalizerDispatcher.java:61)
	at com.linkedin.r2.transport.http.server.HttpDispatcher.handleRequest(HttpDispatcher.java:101)
	at com.linkedin.r2.transport.http.server.AbstractR2Servlet.service(AbstractR2Servlet.java:105)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
	at com.linkedin.restli.server.RestliHandlerServlet.service(RestliHandlerServlet.java:21)
	at com.linkedin.restli.server.RestliHandlerServlet.handleRequest(RestliHandlerServlet.java:26)
	at org.springframework.web.context.support.HttpRequestHandlerServlet.service(HttpRequestHandlerServlet.java:73)
	at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
	at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
	at org.eclipse.jetty.servlet.ServletHandler$ChainEnd.doFilter(ServletHandler.java:1631)
	at com.datahub.auth.authentication.filter.AuthenticationFilter.doFilter(AuthenticationFilter.java:102)
	at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193)
	at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1601)
	at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:548)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
	at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:600)
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
	at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235)
	at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1624)
	at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233)
	at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440)
	at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188)
	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
	at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1594)
	at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186)
	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
	at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:191)
	at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:146)
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
	at org.eclipse.jetty.server.Server.handle(Server.java:516)
	at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487)
	at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732)
	at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479)
	at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
	at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
	at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
	at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
	at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338)
	at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315)
	at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
	at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
	at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409)
	at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
	at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
	at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: javax.persistence.PersistenceException: Error when batch flush on sql: update metadata_aspect_v2 set metadata=?, createdOn=?, createdBy=?, createdFor=?, systemmetadata=? where urn=? and aspect=? and version=?
	at io.ebean.config.dbplatform.SqlCodeTranslator.translate(SqlCodeTranslator.java:52)
	at io.ebean.config.dbplatform.DatabasePlatform.translate(DatabasePlatform.java:219)
	at io.ebeaninternal.server.transaction.TransactionManager.translate(TransactionManager.java:246)
	at io.ebeaninternal.server.transaction.JdbcTransaction.translate(JdbcTransaction.java:698)
	at io.ebeaninternal.server.transaction.JdbcTransaction.batchFlush(JdbcTransaction.java:680)
	at io.ebeaninternal.server.transaction.JdbcTransaction.internalBatchFlush(JdbcTransaction.java:796)
	at io.ebeaninternal.server.transaction.JdbcTransaction.flushCommitAndNotify(JdbcTransaction.java:1005)
	at io.ebeaninternal.server.transaction.JdbcTransaction.commit(JdbcTransaction.java:1057)
	at io.ebeaninternal.api.ScopeTrans.commitTransaction(ScopeTrans.java:136)
	at io.ebeaninternal.api.ScopedTransaction.commit(ScopedTransaction.java:110)
	at com.linkedin.metadata.entity.ebean.EbeanAspectDao.runInTransactionWithRetry(EbeanAspectDao.java:503)
	at com.linkedin.metadata.entity.EntityService.ingestAspectsToLocalDB(EntityService.java:659)
	at com.linkedin.metadata.entity.EntityService.wrappedIngestAspectsToLocalDB(EntityService.java:547)
	at com.linkedin.metadata.entity.EntityService.ingestAspects(EntityService.java:712)
	at com.linkedin.metadata.entity.EntityService.ingestSnapshotUnion(EntityService.java:1468)
	at com.linkedin.metadata.entity.EntityService.ingestEntity(EntityService.java:1373)
	at com.linkedin.metadata.resources.entity.EntityResource.lambda$ingest$5(EntityResource.java:262)
	at com.linkedin.metadata.restli.RestliUtil.toTask(RestliUtil.java:30)
	... 88 common frames omitted
Caused by: java.sql.BatchUpdateException: Batch entry 1 update metadata_aspect_v2 set metadata='{"paths":["/path_striped_out_from_the_logs_by_me"]}', createdOn='2023-06-16 08:57:40.914+00', createdBy='urn:li:corpuser:__datahub_system', createdFor=NULL, systemmetadata='{"registryVersion":"0.0.0.0-dev","runId":"fac34b55-5f39-4eb1-8464-262e0b407673","registryName":"unknownRegistry","lastObserved":1686905860909}' where urn='urn:li:dataset:(urn:li:dataPlatform:oracle,path.striped.out.from.the.logs.by.me)' and aspect='browsePaths' and version=0 was aborted: ERROR: could not serialize access due to concurrent update  Call getNextException to see other errors in the batch.
	at org.postgresql.jdbc.BatchResultHandler.handleError(BatchResultHandler.java:165)
	at org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:2366)
	at org.postgresql.core.v3.QueryExecutorImpl.execute(QueryExecutorImpl.java:559)
	at org.postgresql.jdbc.PgStatement.internalExecuteBatch(PgStatement.java:887)
	at org.postgresql.jdbc.PgStatement.executeBatch(PgStatement.java:910)
	at org.postgresql.jdbc.PgPreparedStatement.executeBatch(PgPreparedStatement.java:1649)
	at io.ebean.datasource.delegate.PreparedStatementDelegator.executeBatch(PreparedStatementDelegator.java:357)
	at io.ebeaninternal.server.persist.BatchedPstmt.executeAndCheckRowCounts(BatchedPstmt.java:130)
	at io.ebeaninternal.server.persist.BatchedPstmt.executeBatch(BatchedPstmt.java:97)
	at io.ebeaninternal.server.persist.BatchedPstmtHolder.flush(BatchedPstmtHolder.java:124)
	at io.ebeaninternal.server.persist.BatchControl.flushPstmtHolder(BatchControl.java:206)
	at io.ebeaninternal.server.persist.BatchControl.executeNow(BatchControl.java:220)
	at io.ebeaninternal.server.persist.BatchedBeanHolder.executeNow(BatchedBeanHolder.java:100)
	at io.ebeaninternal.server.persist.BatchControl.flush(BatchControl.java:271)
	at io.ebeaninternal.server.persist.BatchControl.flush(BatchControl.java:227)
	at io.ebeaninternal.server.transaction.JdbcTransaction.batchFlush(JdbcTransaction.java:678)
	... 101 common frames omitted
Caused by: org.postgresql.util.PSQLException: ERROR: could not serialize access due to concurrent update
	at org.postgresql.core.v3.QueryExecutorImpl.receiveErrorResponse(QueryExecutorImpl.java:2675)
	at org.postgresql.core.v3.QueryExecutorImpl.processResults(QueryExecutorImpl.java:2365)
	... 115 common frames omitted

This issue is known and I have prepared a fix for it yesterday/today, see also here: datahub-project/datahub#8257

Alternatively in recipe for the ingestion the max_threads parameter can be set to 1 for the datahub-rest sink until the fix is merged/released:

sink:
    type: datahub-rest
    config:
        server: 'http://datahub-datahub-gms:8080'
        max_threads: 1

The server must also be provided when you want to set the max_threads parameter, but the value depends on the name of the release or the namespace (the release and the namespace are called the same on our cluster, therefore I am not sure).

This worked for me. Thanks!

This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io