From 84e8458de0cb33333e53f59894254def1ac69830 Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 18 May 2017 18:47:09 +0200 Subject: [PATCH] Option to use memcpy for unaligned data access --- miniz_tdef.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/miniz_tdef.c b/miniz_tdef.c index 37ac70c..ef6e088 100644 --- a/miniz_tdef.c +++ b/miniz_tdef.c @@ -737,13 +737,29 @@ static int tdefl_flush_block(tdefl_compressor *d, int flush) } #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#ifdef MINIZ_UNALIGNED_USE_MEMCPY +static inline mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p) +{ + mz_uint16 ret; + memcpy(&ret, p, sizeof(mz_uint16)); + return ret; +} +static inline mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p) +{ + mz_uint16 ret; + memcpy(&ret, p, sizeof(mz_uint16)); + return ret; +} +#else #define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) +#define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16 *)(p) +#endif static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; - mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD2(s); MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; @@ -767,14 +783,14 @@ static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahe if (!dist) break; q = (const mz_uint16 *)(d->m_dict + probe_pos); - if (TDEFL_READ_UNALIGNED_WORD(q) != s01) + if (TDEFL_READ_UNALIGNED_WORD2(q) != s01) continue; p = s; probe_len = 32; do { - } while ((TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0)); + } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && + (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); if (!probe_len) { *pMatch_dist = dist; @@ -883,8 +899,8 @@ static mz_bool tdefl_compress_fast(tdefl_compressor *d) mz_uint32 probe_len = 32; do { - } while ((TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0)); + } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && + (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); if (!probe_len) cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;