Skip to content

Commit ebef39f

Browse files
authored
fix: double type is not handled correctly in tsz compress on windows (#34847)
* fix: failed compress cases on windows test_compress_basic.py test_compress_alter_table.py * fix ai review comments
1 parent 6e2bc39 commit ebef39f

16 files changed

Lines changed: 88 additions & 83 deletions

contrib/TSZ/sz/inc/ByteToolkit.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,13 @@ extern "C" {
1515
#endif
1616

1717
#include <stdio.h>
18+
#include <stdint.h>
1819

19-
int bytesToInt_bigEndian(unsigned char* bytes);
20-
void intToBytes_bigEndian(unsigned char *b, unsigned int num);
20+
int bytesToInt32_bigEndian(unsigned char* bytes);
21+
void int32ToBytes_bigEndian(unsigned char *b, unsigned int num);
2122

22-
long bytesToLong_bigEndian(unsigned char* b);
23-
void longToBytes_bigEndian(unsigned char *b, long num);
23+
uint64_t bytesToInt64_bigEndian(unsigned char* b);
24+
void int64ToBytes_bigEndian(unsigned char *b, uint64_t num);
2425

2526
short getExponent_float(float value);
2627
short getPrecisionReqLength_float(float precision);

contrib/TSZ/sz/inc/CompressElement.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ extern "C" {
1919
typedef struct DoubleValueCompressElement
2020
{
2121
double data;
22-
long curValue;
22+
uint64_t curValue;
2323
unsigned char curBytes[8]; //big_endian
2424
int reqBytesLength;
2525
int resiBitsLength;
@@ -28,7 +28,7 @@ typedef struct DoubleValueCompressElement
2828
typedef struct FloatValueCompressElement
2929
{
3030
float data; // diffValue + medianValue
31-
int curValue; // diff int value
31+
uint32_t curValue; // diff int value
3232
unsigned char curBytes[4]; // dif bytes value diffValue->iValue big_endian
3333
int reqBytesLength;
3434
int resiBitsLength;

contrib/TSZ/sz/inc/Huffman.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#ifndef _Huffman_H
1111
#define _Huffman_H
1212

13+
#include <stdint.h>
14+
1315
#ifdef __cplusplus
1416
extern "C" {
1517
#endif
@@ -32,7 +34,7 @@ typedef struct HuffmanTree {
3234
node *qqq, *qq; //the root node of the HuffmanTree is qq[1]
3335
int n_nodes; //n_nodes is for compression
3436
int qend;
35-
unsigned long **code;
37+
uint64_t **code;
3638
unsigned char *cout;
3739
int n_inode; //n_inode is for decompression
3840
int maxBitCount;
@@ -45,7 +47,7 @@ node new_node(HuffmanTree *huffmanTree, size_t freq, unsigned int c, node a, nod
4547
node new_node2(HuffmanTree *huffmanTree, unsigned int c, unsigned char t);
4648
void qinsert(HuffmanTree *huffmanTree, node n);
4749
node qremove(HuffmanTree *huffmanTree);
48-
void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, unsigned long out2);
50+
void build_code(HuffmanTree *huffmanTree, node n, int len, uint64_t out1, uint64_t out2);
4951
void init(HuffmanTree *huffmanTree, int *s, size_t length);
5052
void init_static(HuffmanTree *huffmanTree, int *s, size_t length);
5153
void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out, size_t *outSize);

contrib/TSZ/sz/inc/dataCompression.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ float min_f(float a, float b);
3939
float max_f(float a, float b);
4040
double getRealPrecision_double(double valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
4141
double getRealPrecision_float(float valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
42-
double getRealPrecision_int(long valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
42+
double getRealPrecision_int(int64_t valueRangeSize, int errBoundMode, double absErrBound, double relBoundRatio, int *status);
4343
void symTransform_8bytes(unsigned char data[8]);
4444
void symTransform_2bytes(unsigned char data[2]);
4545
void symTransform_4bytes(unsigned char data[4]);

contrib/TSZ/sz/inc/sz.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,15 @@ typedef union lint32
115115

116116
typedef union lint64
117117
{
118-
long lvalue;
119-
unsigned long ulvalue;
118+
int64_t lvalue;
119+
uint64_t ulvalue;
120120
unsigned char byte[8];
121121
} lint64;
122122

123123
typedef union ldouble
124124
{
125125
double value;
126-
unsigned long lvalue;
126+
uint64_t lvalue;
127127
unsigned char byte[8];
128128
} ldouble;
129129

contrib/TSZ/sz/inc/utility.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ extern "C" {
1818

1919

2020
int is_lossless_compressed_data(unsigned char* compressedBytes, size_t cmpSize);
21-
unsigned long sz_lossless_compress(int losslessCompressor, unsigned char* data, unsigned long dataLength, unsigned char* compressBytes);
22-
unsigned long sz_lossless_decompress(int losslessCompressor, unsigned char* compressBytes, unsigned long cmpSize, unsigned char** oriData, unsigned long targetOriSize);
21+
size_t sz_lossless_compress(int losslessCompressor, unsigned char* data, size_t dataLength, unsigned char* compressBytes);
22+
size_t sz_lossless_decompress(int losslessCompressor, unsigned char* compressBytes, size_t cmpSize, unsigned char** oriData, size_t targetOriSize);
2323

2424

2525
#ifdef __cplusplus

contrib/TSZ/sz/src/ByteToolkit.c

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include <string.h>
1212
#include "sz.h"
1313

14-
INLINE int bytesToInt_bigEndian(unsigned char* bytes)
14+
INLINE int bytesToInt32_bigEndian(unsigned char* bytes)
1515
{
1616
int res;
1717
unsigned char* des = (unsigned char*)&res;
@@ -26,7 +26,7 @@ INLINE int bytesToInt_bigEndian(unsigned char* bytes)
2626
* @unsigned char *b the variable to store the converted bytes (length=4)
2727
* @unsigned int num
2828
* */
29-
INLINE void intToBytes_bigEndian(unsigned char *b, unsigned int num)
29+
INLINE void int32ToBytes_bigEndian(unsigned char *b, unsigned int num)
3030
{
3131
unsigned char* sou =(unsigned char*)&num;
3232
b[0] = sou[3];
@@ -38,10 +38,10 @@ INLINE void intToBytes_bigEndian(unsigned char *b, unsigned int num)
3838
/**
3939
* @endianType: refers to the endian_type of unsigned char* b.
4040
* */
41-
INLINE long bytesToLong_bigEndian(unsigned char* b) {
41+
INLINE uint64_t bytesToInt64_bigEndian(unsigned char* b) {
4242

43-
long temp = 0;
44-
long res = 0;
43+
uint64_t temp = 0;
44+
uint64_t res = 0;
4545

4646
res <<= 8;
4747
temp = b[0] & 0xff;
@@ -79,11 +79,9 @@ INLINE long bytesToLong_bigEndian(unsigned char* b) {
7979

8080
}
8181

82-
INLINE void longToBytes_bigEndian(unsigned char *b, long num)
82+
INLINE void int64ToBytes_bigEndian(unsigned char *b, uint64_t num)
8383
{
8484
unsigned char* sou = (unsigned char*)&num;
85-
if(sizeof(num) == 8) {
86-
// 8 bytes
8785
b[7] = sou[0];
8886
b[6] = sou[1];
8987
b[5] = sou[2];
@@ -92,13 +90,6 @@ INLINE void longToBytes_bigEndian(unsigned char *b, long num)
9290
b[2] = sou[5];
9391
b[1] = sou[6];
9492
b[0] = sou[7];
95-
} else {
96-
memset(b, 0, 4);
97-
b[7] = sou[0];
98-
b[6] = sou[1];
99-
b[5] = sou[2];
100-
b[4] = sou[3];
101-
}
10293
}
10394

10495
//TODO: debug: lfBuf.lvalue could be actually little_endian....
@@ -132,9 +123,9 @@ INLINE short getExponent_double(double value)
132123

133124
ldouble lbuf;
134125
lbuf.value = value;
135-
long lvalue = lbuf.lvalue;
126+
uint64_t lvalue = lbuf.lvalue;
136127

137-
int expValue = (int)((lvalue & 0x7FF0000000000000) >> 52);
128+
int expValue = (int)((lvalue & 0x7FF0000000000000ULL) >> 52);
138129
expValue -= 1023;
139130
return (short)expValue;
140131
}
@@ -143,11 +134,11 @@ INLINE short getPrecisionReqLength_double(double precision)
143134
{
144135
ldouble lbuf;
145136
lbuf.value = precision;
146-
long lvalue = lbuf.lvalue;
137+
uint64_t lvalue = lbuf.lvalue;
147138

148-
int expValue = (int)((lvalue & 0x7FF0000000000000) >> 52);
139+
int expValue = (int)((lvalue & 0x7FF0000000000000ULL) >> 52);
149140
expValue -= 1023;
150-
// unsigned char the1stManBit = (unsigned char)((lvalue & 0x0008000000000000) >> 51);
141+
// unsigned char the1stManBit = (unsigned char)((lvalue & 0x0008000000000000ULL) >> 51);
151142
// if(the1stManBit==1)
152143
// expValue--;
153144
return (short)expValue;
@@ -263,18 +254,18 @@ INLINE size_t bytesToSize(unsigned char* bytes, int size_type)
263254
{
264255
size_t result = 0;
265256
if(size_type == 4)
266-
result = bytesToInt_bigEndian(bytes);//4
257+
result = bytesToInt32_bigEndian(bytes);//4
267258
else
268-
result = bytesToLong_bigEndian(bytes);//8
259+
result = bytesToInt64_bigEndian(bytes);//8
269260
return result;
270261
}
271262

272263
INLINE void sizeToBytes(unsigned char* outBytes, size_t size, int size_type)
273264
{
274265
if(size_type == 4)
275-
intToBytes_bigEndian(outBytes, (unsigned int)size);//4
266+
int32ToBytes_bigEndian(outBytes, (unsigned int)size);//4
276267
else
277-
longToBytes_bigEndian(outBytes, (unsigned long)size);//8
268+
int64ToBytes_bigEndian(outBytes, (int64_t)size);//8
278269
}
279270

280271
void convertSZParamsToBytes(sz_params* params, unsigned char* result, char optQuantMode)

contrib/TSZ/sz/src/Huffman.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ HuffmanTree* createHuffmanTree(int stateNum)
2323

2424
huffmanTree->pool = (struct node_t*)malloc(huffmanTree->allNodes*2*sizeof(struct node_t));
2525
huffmanTree->qqq = (node*)malloc(huffmanTree->allNodes*2*sizeof(node));
26-
huffmanTree->code = (unsigned long**)malloc(huffmanTree->stateNum*sizeof(unsigned long*));
26+
huffmanTree->code = (uint64_t**)malloc(huffmanTree->stateNum*sizeof(uint64_t*));
2727
huffmanTree->cout = (unsigned char *)malloc(huffmanTree->stateNum*sizeof(unsigned char));
2828

2929
memset(huffmanTree->pool, 0, huffmanTree->allNodes*2*sizeof(struct node_t));
3030
memset(huffmanTree->qqq, 0, huffmanTree->allNodes*2*sizeof(node));
31-
memset(huffmanTree->code, 0, huffmanTree->stateNum*sizeof(unsigned long*));
31+
memset(huffmanTree->code, 0, huffmanTree->stateNum*sizeof(uint64_t*));
3232
memset(huffmanTree->cout, 0, huffmanTree->stateNum*sizeof(unsigned char));
3333
huffmanTree->qq = huffmanTree->qqq - 1;
3434
huffmanTree->n_nodes = 0;
@@ -119,10 +119,10 @@ node qremove(HuffmanTree* huffmanTree)
119119
* @out2 should be 0 as well.
120120
* @index: the index of the byte
121121
* */
122-
void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, unsigned long out2)
122+
void build_code(HuffmanTree *huffmanTree, node n, int len, uint64_t out1, uint64_t out2)
123123
{
124124
if (n->t) {
125-
huffmanTree->code[n->c] = (unsigned long*)malloc(2*sizeof(unsigned long));
125+
huffmanTree->code[n->c] = (uint64_t*)malloc(2*sizeof(uint64_t));
126126
if(len<=64)
127127
{
128128
if(len == 0)
@@ -234,14 +234,14 @@ void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out,
234234
byteSizep = bitSize/8; //it's used to move the pointer p for next data
235235
if(byteSize<=8)
236236
{
237-
longToBytes_bigEndian(p, (huffmanTree->code[state])[0]);
237+
int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]);
238238
p += byteSizep;
239239
}
240240
else //byteSize>8
241241
{
242-
longToBytes_bigEndian(p, (huffmanTree->code[state])[0]);
242+
int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]);
243243
p += 8;
244-
longToBytes_bigEndian(p, (huffmanTree->code[state])[1]);
244+
int64ToBytes_bigEndian(p, (huffmanTree->code[state])[1]);
245245
p += (byteSizep - 8);
246246
}
247247
*outSize += byteSize;
@@ -254,8 +254,8 @@ void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out,
254254
{
255255
p++;
256256
//(*outSize)++;
257-
long newCode = (huffmanTree->code[state])[0] << lackBits;
258-
longToBytes_bigEndian(p, newCode);
257+
uint64_t newCode = (huffmanTree->code[state])[0] << lackBits;
258+
int64ToBytes_bigEndian(p, newCode);
259259

260260
if(bitSize<=64)
261261
{
@@ -279,7 +279,7 @@ void encode(HuffmanTree *huffmanTree, int *s, size_t length, unsigned char *out,
279279
p++;
280280
//(*outSize)++;
281281
newCode = (huffmanTree->code[state])[1] << lackBits;
282-
longToBytes_bigEndian(p, newCode);
282+
int64ToBytes_bigEndian(p, newCode);
283283
bitSize -= lackBits;
284284
byteSize = bitSize%8==0 ? bitSize/8 : bitSize/8+1;
285285
byteSizep = bitSize/8;
@@ -706,9 +706,9 @@ void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned c
706706
//printf("treeByteSize = %d\n", treeByteSize);
707707

708708
*out = (unsigned char*)malloc(length*sizeof(int)+treeByteSize);
709-
intToBytes_bigEndian(buffer, nodeCount);
709+
int32ToBytes_bigEndian(buffer, nodeCount);
710710
memcpy(*out, buffer, 4);
711-
intToBytes_bigEndian(buffer, huffmanTree->stateNum/2); //real number of intervals
711+
int32ToBytes_bigEndian(buffer, huffmanTree->stateNum/2); //real number of intervals
712712
memcpy(*out+4, buffer, 4);
713713
memcpy(*out+8, treeBytes, treeByteSize);
714714
free(treeBytes);
@@ -724,13 +724,13 @@ void encode_withTree(HuffmanTree* huffmanTree, int *s, size_t length, unsigned c
724724
void decode_withTree(HuffmanTree* huffmanTree, unsigned char *s, size_t targetLength, int *out)
725725
{
726726
size_t encodeStartIndex;
727-
size_t nodeCount = bytesToInt_bigEndian(s);
727+
size_t nodeCount = bytesToInt32_bigEndian(s);
728728
node root = reconstruct_HuffTree_from_bytes_anyStates(huffmanTree,s+8, nodeCount);
729729

730730
//sdi: Debug
731731
/* build_code(root, 0, 0, 0);
732732
int i;
733-
unsigned long code_1, code_2;
733+
uint64_t code_1, code_2;
734734
for (i = 0; i < stateNum; i++)
735735
if (code[i])
736736
{

contrib/TSZ/sz/src/TightDataPointStorageD.c

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,13 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi
8484

8585
for (i = 0; i < 4; i++)
8686
byteBuf[i] = flatBytes[index++];
87-
int max_quant_intervals = bytesToInt_bigEndian(byteBuf);// 4
87+
int max_quant_intervals = bytesToInt32_bigEndian(byteBuf);// 4
8888

8989
pde_params->maxRangeRadius = max_quant_intervals/2;
9090

9191
for (i = 0; i < 4; i++)
9292
byteBuf[i] = flatBytes[index++];
93-
(*this)->intervals = bytesToInt_bigEndian(byteBuf);// 4
93+
(*this)->intervals = bytesToInt32_bigEndian(byteBuf);// 4
9494

9595
for (i = 0; i < 8; i++)
9696
byteBuf[i] = flatBytes[index++];
@@ -136,7 +136,7 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi
136136
if ((*this)->ifAdtFse == 0) {
137137
(*this)->typeArray = &flatBytes[index];
138138
//retrieve the number of states (i.e., stateNum)
139-
(*this)->allNodes = bytesToInt_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum
139+
(*this)->allNodes = bytesToInt32_bigEndian((*this)->typeArray); //the first 4 bytes store the stateNum
140140
(*this)->stateNum = ((*this)->allNodes+1)/2;
141141
index+=(*this)->typeArray_size;
142142
} else {
@@ -147,10 +147,21 @@ int new_TightDataPointStorageD_fromFlatBytes(TightDataPointStorageD **this, unsi
147147
index+=(*this)->transCodeBits_size;
148148
}
149149

150-
// todo need check length
151-
(*this)->residualMidBits_size = flatBytesLength - 1 - 1 - MetaDataByteLength - pde_exe->SZ_SIZE_TYPE - 4 - 4 - 4 - 1 - 8
152-
- pde_exe->SZ_SIZE_TYPE - pde_exe->SZ_SIZE_TYPE
153-
- (*this)->leadNumArray_size - (*this)->exactMidBytes_size;
150+
// todo need check length
151+
(*this)->residualMidBits_size = flatBytesLength // total length of flatBytes
152+
- 1 // version
153+
- 1 // sameByte
154+
- MetaDataByteLength_double // meta data
155+
- pde_exe->SZ_SIZE_TYPE // data series length
156+
- 4 // max quant intervals
157+
- 4 // intervals
158+
- sizeof(double) // median value
159+
- 1 // req length
160+
- sizeof(double) // real precision
161+
- pde_exe->SZ_SIZE_TYPE // leadNumArray_size
162+
- pde_exe->SZ_SIZE_TYPE // exactMidBytes_size
163+
- (*this)->leadNumArray_size // data length of leadNumArray
164+
- (*this)->exactMidBytes_size; // data length of exactMidBytes
154165
if ((*this)->ifAdtFse == 0) {
155166
(*this)->residualMidBits_size = (*this)->residualMidBits_size - (*this)->typeArray_size - pde_exe->SZ_SIZE_TYPE ;
156167
} else {
@@ -250,11 +261,11 @@ void convertTDPStoBytes_double(TightDataPointStorageD* tdps, unsigned char* byte
250261

251262
for(i = 0;i<exe_params->SZ_SIZE_TYPE;i++)//ST: 4 or 8 bytes
252263
bytes[k++] = dsLengthBytes[i];
253-
intToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals);
264+
int32ToBytes_bigEndian(max_quant_intervals_Bytes, confparams_cpr->max_quant_intervals);
254265
for(i = 0;i<4;i++)//4
255266
bytes[k++] = max_quant_intervals_Bytes[i];
256267

257-
intToBytes_bigEndian(intervalsBytes, tdps->intervals);
268+
int32ToBytes_bigEndian(intervalsBytes, tdps->intervals);
258269
for(i = 0;i<4;i++)//4
259270
bytes[k++] = intervalsBytes[i];
260271

@@ -326,9 +337,9 @@ bool convertTDPStoFlatBytes_double(TightDataPointStorageD *tdps, unsigned char*
326337
unsigned char dsLengthBytes[8];
327338

328339
if(exe_params->SZ_SIZE_TYPE==4)
329-
intToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
340+
int32ToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//4
330341
else
331-
longToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
342+
int64ToBytes_bigEndian(dsLengthBytes, tdps->dataSeriesLength);//8
332343

333344
unsigned char sameByte = tdps->allSameData==1?(unsigned char)1:(unsigned char)0;
334345
//sameByte = sameByte | (confparams_cpr->szMode << 1);

0 commit comments

Comments
 (0)