Slow blob fetch into java procedure
AlexBekhtin opened this issue · 5 comments
in DB:
create or alter procedure java_split (
dummy blob SUB_TYPE TEXT,
delimiter varchar(10)
) returns (item varchar(60))
external name 'org.firebirdsql.fbjava.examples.fbjava_example.FbSplit.split()'
engine java;Java:
public class FbSplit {
public static ExternalResultSet split() throws Exception {
final ProcedureContext context = ProcedureContext.get();
return new ExternalResultSet() {
ValuesMetadata outMetadata = context.getOutputMetadata();
Values outValues = context.getOutputValues();
Values inputValues = context.getInputValues();
String[] items_list = null;
int item_index;
@Override
public boolean fetch() throws Exception {
if (items_list == null) {
java.lang.Object blob_object = inputValues.getObject(1);
// FBBlob blob = (FBBlob) blob_object;
java.sql.Blob blob = (java.sql.Blob) blob_object;
byte[] bytes_data = blob.getBytes(1, (int) (blob.length())); // TO SLOW
String blob_string = new String(bytes_data);
items_list = blob_string.split(inputValues.getObject(2).toString());
}
if (item_index < items_list.length) {
outValues.setObject(1, items_list[item_index]);
item_index++;
return true;
}
return false;
}
};
}
}then call
select * from java_split(6 000 000 characters length blob, ',')40 sec execution time
but if I call from standalone java application ~1-2 sec
env:
java 8
jaybird 2.2.10
@AlexBekhtin can you please test Jaybird 3 with embedded connection string in standalone application?
By your code looks like as this should be registered in Jaybird project.
Maybe @mrotteveel knows something about.
Jaybird is currently allocating a new ByteBuffer for each segment retrieved. This might be unnecessary overhead, I have http://tracker.firebirdsql.org/browse/JDBC-378 open to investigate that. Also, Jaybird currently defaults to retrieval in segments of 16KB, increasing that to 32KB might also help (connection property blobBufferSize).
I will see if I can reproduce the performance problem with native or embedded and investigate options to alleviate that.
Just did a quick test on Jaybird 3 (HEAD of master) with a 6MB (binary) blob. Retrieval with PURE_JAVA is +/- 100ms, NATIVE = +/- 200ms, LOCAL = +/- 75ms, EMBEDDED +/- 50ms. So from a perspective from Jaybird everything seems fine. I will test this weekend if I can reproduce it with fb/java.
I tested again and have no difference (isql embedded connection + jaybird 3.0.0-SNAPSHOT).
I tried to re-write the code:
FBBlob blob = (FBBlob) blob_object;
// java.sql.Blob blob = (java.sql.Blob) blob_object;
if (true) {
throw new Exception("blob length is " + Long.toString(blob.length()));
}
byte[] bytes_data = blob.getBytes(1, (int) (blob.length())); // TO SLOWand have new abnormal result
select * from java_split((select list(word,',') from words_test), ',')
-- java.lang.Exception: blob length is 6645056
-- it's correct size. In the first time I tasted like this
recreate table blob_table (
blob_field blob sub_type text
);
insert into blob_table
select list(word,',') from words_test
select * from java_split((select blob_field from blob_table), ',')
-- java.lang.Exception: blob length is 4287993088
-- ???
execute block as
declare variable cnt int;
declare variable blob_data blob sub_type text;
begin
select blob_field from blob_table into :blob_data;
select count(*) from java_split(:blob_data, ',')
into :cnt;
end
-- java.lang.Exception: blob length is 4287993088I also have spurious results when trying to use this on blobs from a table. It looks like the reported length is wrong for those blobs.