提交 4ca9eabc 编写于 作者: A AntiTopQuark 提交者: wangzelin.wzl

support expr WEIGHT_STRING() in mysql mode. master->3_1_opensource_release

上级 0d3a41c3
......@@ -852,6 +852,7 @@ size_t ObCharset::sortkey(ObCollationType collation_type, const char* str, int64
OB_MAX_WEIGHT,
reinterpret_cast<const unsigned char*>(str),
str_len,
0,
&is_valid_unicode_tmp);
is_valid_unicode = is_valid_unicode_tmp;
}
......
......@@ -72,6 +72,38 @@ extern "C" {
/* A helper macros for "need at least n bytes" */
#define OB_CS_ERR_TOOSMALLN(n) (-100 - (n))
/* Flags for strxfrm */
#define OB_STRXFRM_LEVEL1 0x00000001 /* for primary weights */
#define OB_STRXFRM_LEVEL2 0x00000002 /* for secondary weights */
#define OB_STRXFRM_LEVEL3 0x00000004 /* for tertiary weights */
#define OB_STRXFRM_LEVEL4 0x00000008 /* fourth level weights */
#define OB_STRXFRM_LEVEL5 0x00000010 /* fifth level weights */
#define OB_STRXFRM_LEVEL6 0x00000020 /* sixth level weights */
#define OB_STRXFRM_LEVEL_ALL 0x0000003F /* Bit OR for the above six */
#define OB_STRXFRM_NLEVELS 6 /* Number of possible levels*/
#define OB_STRXFRM_PAD_WITH_SPACE 0x00000040 /* if pad result with spaces */
#define OB_STRXFRM_PAD_TO_MAXLEN 0x00000080 /* if pad tail(for filesort) */
#define OB_STRXFRM_DESC_LEVEL1 0x00000100 /* if desc order for level1 */
#define OB_STRXFRM_DESC_LEVEL2 0x00000200 /* if desc order for level2 */
#define OB_STRXFRM_DESC_LEVEL3 0x00000300 /* if desc order for level3 */
#define OB_STRXFRM_DESC_LEVEL4 0x00000800 /* if desc order for level4 */
#define OB_STRXFRM_DESC_LEVEL5 0x00001000 /* if desc order for level5 */
#define OB_STRXFRM_DESC_LEVEL6 0x00002000 /* if desc order for level6 */
#define OB_STRXFRM_DESC_SHIFT 8
#define OB_STRXFRM_UNUSED_00004000 0x00004000 /* for future extensions */
#define OB_STRXFRM_UNUSED_00008000 0x00008000 /* for future extensions */
#define OB_STRXFRM_REVERSE_LEVEL1 0x00010000 /* if reverse order for level1 */
#define OB_STRXFRM_REVERSE_LEVEL2 0x00020000 /* if reverse order for level2 */
#define OB_STRXFRM_REVERSE_LEVEL3 0x00040000 /* if reverse order for level3 */
#define OB_STRXFRM_REVERSE_LEVEL4 0x00080000 /* if reverse order for level4 */
#define OB_STRXFRM_REVERSE_LEVEL5 0x00100000 /* if reverse order for level5 */
#define OB_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
#define OB_STRXFRM_REVERSE_SHIFT 16
#define _MY_U 01 /* Upper case */
#define _MY_L 02 /* Lower case */
#define _MY_NMR 04 /* Numeral (digit) */
......@@ -209,8 +241,9 @@ typedef struct ObCollationHandler {
// strnxfrm() - makes a sort key suitable for memcmp() corresponding
// to the given string
size_t (*strnxfrm)(const struct ObCharsetInfo*, unsigned char* dst, size_t dst_len, uint32_t nweights,
const unsigned char* src, size_t srclen, int* is_valid_unicode);
const unsigned char* src, size_t srclen, unsigned int flags ,int* is_valid_unicode);
// strnxfrmlen() - get the length of the result of weight_string()
size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
// like_range() - creates a LIKE range, for optimizer
int (*like_range)(const struct ObCharsetInfo* cs, const char* str, size_t str_len, int w_prefix, int w_one,
int w_many, size_t res_length, char* min_str, char* max_str, size_t* min_len, size_t* max_len);
......@@ -288,7 +321,12 @@ size_t ob_scan_8bit(const char* str, const char* end, int sq);
/* For 8-bit character set */
int ob_like_range_simple(const ObCharsetInfo* cs, const char* str, size_t str_len, int escape, int w_one, int w_many,
size_t res_length, char* min_str, char* max_str, size_t* min_length, size_t* max_length);
void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *end,
unsigned int flags, unsigned int level);
size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
unsigned char *str, unsigned char *frm_str_ptr, unsigned char *end,
unsigned int nweights, unsigned int flags, unsigned int level);
int64_t ob_strntoll(const char* str, size_t str_len, int base, char** end, int* err);
int64_t ob_strntoull(const char* str, size_t str_len, int base, char** end, int* err);
......@@ -333,14 +371,20 @@ uint32_t ob_convert(char* to, uint32_t to_length, const ObCharsetInfo* to_cs, co
const ObCharsetInfo* from_cs, uint32_t* errors);
size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo* cs, unsigned char* dst, size_t dstlen, uint32_t nweights,
const unsigned char* src, size_t srclen, int* is_valid_unicode);
const unsigned char* src, size_t srclen, unsigned int flags , int* is_valid_unicode);
size_t ob_strnxfrm_unicode(const ObCharsetInfo* cs, unsigned char* dst, size_t dstlen, uint32_t nweights,
const unsigned char* src, size_t src_len, int* is_valid_unicode);
const unsigned char* src, size_t src_len, unsigned int flags , int* is_valid_unicode);
int ob_wildcmp_unicode(const ObCharsetInfo* cs, const char* str, const char* str_end, const char* wildstr,
const char* wildend, int escape, int w_one, int w_many, uint32_t** weights);
size_t ob_strnxfrmlen_simple(const struct ObCharsetInfo *, size_t);
size_t ob_strnxfrmlen_unicode_full_bin(const struct ObCharsetInfo *, size_t);
size_t ob_strnxfrmlen_utf8mb4(const struct ObCharsetInfo *, size_t);
extern void right_to_die_or_duty_to_live_c();
#ifdef __cplusplus
......
......@@ -637,7 +637,7 @@ static int ob_strnncollsp_binary(const ObCharsetInfo* cs __attribute__((unused))
}
static size_t ob_strnxfrm_8bit_bin(const ObCharsetInfo* cs __attribute__((unused)), unsigned char* dst, size_t dstlen,
uint32_t nweights, const unsigned char* src, size_t srclen, int* is_valid_unicode)
uint32_t nweights, const unsigned char* src, size_t srclen, unsigned int flags, int* is_valid_unicode)
{
*is_valid_unicode = 1;
srclen = (srclen < dstlen ? srclen : dstlen);
......@@ -645,7 +645,8 @@ static size_t ob_strnxfrm_8bit_bin(const ObCharsetInfo* cs __attribute__((unused
if (dst != src && srclen > 0) {
memcpy(dst, src, srclen);
}
return srclen;
return ob_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen,
nweights - srclen, flags, 0);
}
#define likeconv(s, A) (A)
......@@ -826,6 +827,7 @@ static ObCollationHandler ob_collation_binary_handler = {
ob_strnncoll_binary,
ob_strnncollsp_binary,
ob_strnxfrm_8bit_bin,
ob_strnxfrmlen_simple,
ob_like_range_simple,
ob_wildcmp_bin,
ob_instr_bin,
......
......@@ -454,6 +454,65 @@ size_t ob_scan_8bit(const char* str, const char* end, int sq)
}
}
void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *end,
unsigned int flags, unsigned int level)
{
if (flags & (OB_STRXFRM_DESC_LEVEL1 << level))
{
if (flags & (OB_STRXFRM_REVERSE_LEVEL1 << level))
{
for (end--; str <= end;)
{
unsigned char tmp= *str;
*str++= ~*end;
*end--= ~tmp;
}
}
else
{
for (; str < end; str++)
*str= ~*str;
}
}
else if (flags & (OB_STRXFRM_REVERSE_LEVEL1 << level))
{
for (end--; str < end;)
{
unsigned char tmp= *str;
*str++= *end;
*end--= tmp;
}
}
}
size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
unsigned char *str, unsigned char *frm_end_ptr, unsigned char *end,
unsigned int nweights, unsigned int flags, unsigned int level)
{
if (nweights && frm_end_ptr < end && (flags & OB_STRXFRM_PAD_WITH_SPACE))
{
unsigned int fill_str_len= MY_MIN((unsigned int) (end - frm_end_ptr), nweights * cs->mbminlen);
cs->cset->fill(cs, (char*) frm_end_ptr, fill_str_len, cs->pad_char);
frm_end_ptr+= fill_str_len;
}
ob_strxfrm_desc_and_reverse(str, frm_end_ptr, flags, level);
if ((flags & OB_STRXFRM_PAD_TO_MAXLEN) && frm_end_ptr < end)
{
unsigned int fill_str_len= end - frm_end_ptr;
cs->cset->fill(cs, (char*) frm_end_ptr, fill_str_len, cs->pad_char);
frm_end_ptr= end;
}
return frm_end_ptr - str;
}
/*
Returns the number of bytes required for strnxfrm().
*/
size_t ob_strnxfrmlen_simple(const ObCharsetInfo *cs, size_t len)
{
return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
}
//========================================================================
int ob_like_range_simple(const ObCharsetInfo* cs, const char* str, size_t str_len, int escape, int w_one, int w_many,
......
......@@ -10435,9 +10435,47 @@ static int ob_strnncollsp_utf8mb4(
}
return res;
}
static size_t ob_strxfrm_pad_nweights_unicode(uchar *str_ptr, uchar *str_ptr_end, size_t nweights)
{
uchar *str_ptr_begin;
ob_charset_assert(str_ptr && str_ptr <= str_ptr_end);
for (str_ptr_begin= str_ptr; str_ptr < str_ptr_end && nweights; nweights--)
{
*str_ptr++= 0x00;
if (str_ptr < str_ptr_end)
*str_ptr++= 0x20;
}
return str_ptr - str_ptr_begin;
}
static size_t ob_strxfrm_pad_unicode(uchar *str_ptr, uchar *str_ptr_end)
{
uchar *str_ptr_begin= str_ptr;
ob_charset_assert(str_ptr && str_ptr <= str_ptr_end);
for ( ; str_ptr < str_ptr_end ; )
{
*str_ptr++= 0x00;
if (str_ptr < str_ptr_end)
*str_ptr++= 0x20;
}
return str_ptr - str_ptr_begin;
}
void ob_strnxfrm_unicode_help(uchar **d_start,
uchar **d_end,
uint nweights,
uint flags,
uchar **dst0)
{
if (*d_start < *d_end && nweights && (flags & OB_STRXFRM_PAD_WITH_SPACE))
*d_start += ob_strxfrm_pad_nweights_unicode(*d_start, *d_end, nweights);
ob_strxfrm_desc_and_reverse(*dst0, *d_start, flags, 0);
if ((flags & OB_STRXFRM_PAD_TO_MAXLEN) && *d_start < *d_end)
*d_start += ob_strxfrm_pad_unicode(*d_start, *d_end);
}
size_t ob_strnxfrm_unicode(const ObCharsetInfo* cs, unsigned char* dst, size_t dst_len, uint32_t nweights,
const unsigned char* src, size_t src_len, int* is_valid_unicode)
const unsigned char* src, size_t src_len, unsigned int flags ,int* is_valid_unicode)
{
ob_wc_t wchar = 0;
int cur_len = 0;
......@@ -10463,7 +10501,7 @@ size_t ob_strnxfrm_unicode(const ObCharsetInfo* cs, unsigned char* dst, size_t d
}
dst += cur_len;
}
// ob_strnxfrm_unicode_help(&dst, &dst_end, nweights, &dst0);
ob_strnxfrm_unicode_help(&dst, &dst_end, nweights, flags , &dst_begin);
return dst - dst_begin;
}
......@@ -10669,7 +10707,7 @@ static void ob_hash_sort_utf8mb4(const ObCharsetInfo* cs, const unsigned char* s
//======================================================================
size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo* cs, unsigned char* dst, size_t dstlen, uint32_t nweights,
const unsigned char* src, size_t srclen, int* is_valid_unicode)
const unsigned char* src, size_t srclen, unsigned int flags, int* is_valid_unicode)
{
ob_wc_t wc;
unsigned char* dst0 = dst;
......@@ -10693,10 +10731,46 @@ size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo* cs, unsigned char* dst,
}
dst += res;
}
if (flags & OB_STRXFRM_PAD_WITH_SPACE)
{
for ( ; dst < de && nweights; nweights--)
{
*dst++= 0x00;
if (dst < de)
{
*dst++= 0x00;
if (dst < de)
*dst++= 0x20;
}
}
}
ob_strxfrm_desc_and_reverse(dst0, dst, flags, 0);
if (flags & OB_STRXFRM_PAD_TO_MAXLEN)
{
while (dst < de)
{
*dst++= 0x00;
if (dst < de)
{
*dst++= 0x00;
if (dst < de)
*dst++= 0x20;
}
}
}
return dst - dst0;
}
size_t ob_strnxfrmlen_utf8mb4(const ObCharsetInfo *cs __attribute__((unused)), size_t len)
{
return (len * 2 + 2) / 4;
}
size_t ob_strnxfrmlen_unicode_full_bin(const ObCharsetInfo *cs, size_t len)
{
return ((len + 3) / cs->mbmaxlen) * 3;
}
//======================================================================
ObCharsetHandler ob_charset_utf8mb4_handler = {ob_ismbchar_utf8mb4,
......@@ -10721,6 +10795,7 @@ static ObCollationHandler ob_collation_utf8mb4_general_ci_handler = {
ob_strnncoll_utf8mb4,
ob_strnncollsp_utf8mb4,
ob_strnxfrm_unicode,
ob_strnxfrmlen_utf8mb4,
ob_like_range_mb,
ob_wildcmp_utf8mb4,
ob_instr_mb,
......@@ -10731,6 +10806,7 @@ static ObCollationHandler ob_collation_utf8mb4_bin_handler = {
ob_strnncoll_mb_bin,
ob_strnncollsp_mb_bin,
ob_strnxfrm_unicode_full_bin,
ob_strnxfrmlen_unicode_full_bin,
ob_like_range_mb,
ob_wildcmp_mb_bin,
ob_instr_mb,
......
......@@ -866,6 +866,7 @@
#define N_TO_SINGLE_BYTE "to_single_byte"
#define N_TO_MULTI_BYTE "to_multi_byte"
#define N_WEIGHT_STRING "weight_string"
#define N_TO_NCHAR "to_nchar"
#define N_LNNVL "lnnvl"
......
......@@ -268,6 +268,8 @@ ob_set_subtarget(ob_sql engine
engine/expr/ob_expr_greatest.cpp
engine/expr/ob_expr_hex.cpp
engine/expr/ob_expr_hextoraw.cpp
engine/expr/ob_expr_weight_string.h
engine/expr/ob_expr_weight_string.cpp
engine/expr/ob_expr_host_ip.cpp
engine/expr/ob_expr_ifnull.cpp
engine/expr/ob_expr_in.cpp
......
......@@ -185,6 +185,7 @@
#include "ob_expr_to_multi_byte.h"
#include "ob_expr_convert_tz.h"
#include "ob_expr_degrees.h"
#include "ob_expr_weight_string.h"
namespace oceanbase {
using namespace common;
......@@ -716,7 +717,7 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = {
ObExprIsIpv4Compat::calc_is_ipv4_compat, /* 455 */
ObExprInetAton::calc_inet_aton, /* 456 */
ObExprInet6Ntoa::calc_inet6_ntoa, /* 457 */
NULL, // ObExprWeightString::eval_weight_string, /* 458 */
ObExprWeightString::eval_weight_string, /* 458 */
ObExprConvertTZ::eval_convert_tz, /* 459 */
ObExprCrc32::calc_crc32_expr /* 460 */
};
......
......@@ -269,6 +269,7 @@
#include "sql/engine/expr/ob_expr_bit_length.h"
#include "sql/engine/expr/ob_expr_convert_tz.h"
#include "sql/engine/expr/ob_expr_degrees.h"
#include "sql/engine/expr/ob_expr_weight_string.h"
using namespace oceanbase::common;
namespace oceanbase {
......@@ -679,6 +680,7 @@ void ObExprOperatorFactory::register_expr_operators()
REG_OP(ObExprTimeFormat);
REG_OP(ObExprTimestamp);
REG_OP(ObExprDegrees);
REG_OP(ObExprWeightString);
// register oracle system function
REG_OP_ORCL(ObExprSysConnectByPath);
REG_OP_ORCL(ObExprTimestampNvl);
......
// Copyright (c) 2015-2016 Alibaba Inc. All Rights Reserved.
// Author:
// jiajingzhe.jjz@alibaba-inc.com
// Normalizer:
//
#define USING_LOG_PREFIX SQL_ENG
#define OB_MAX_WEIGHT OB_MAX_VARCHAR_LENGTH
#include <string.h>
#include "sql/parser/ob_item_type.h"
#include "sql/engine/expr/ob_expr_weight_string.h"
#include "sql/engine/expr/ob_expr_operator.h"
#include "share/object/ob_obj_cast.h"
#include "lib/oblog/ob_log.h"
#include "sql/engine/expr/ob_datum_cast.h"
#include "ob_expr_util.h"
using namespace oceanbase::common;
namespace oceanbase
{
namespace sql
{
ObExprWeightString::ObExprWeightString(ObIAllocator &alloc)
: ObStringExprOperator(alloc, T_FUN_SYS_WEIGHT_STRING, N_WEIGHT_STRING, MORE_THAN_ZERO)
{
}
ObExprWeightString::~ObExprWeightString()
{
}
int ObExprWeightString::calc_result_typeN(ObExprResType &type,
ObExprResType *types_stack,
int64_t param_num,
ObExprTypeCtx &type_ctx) const
{
int ret = OB_SUCCESS;
UNUSED(param_num);
CK (OB_NOT_NULL(type_ctx.get_session()));
if (NOT_ROW_DIMENSION != row_dimension_ || ObMaxType == types_stack[0].get_type()) {
ret = OB_ERR_INVALID_TYPE_FOR_OP;
} else {
if (types_stack[0].get_type() > ObUNumberType ) {
// 输入不是数字类型时
type_ctx.set_cast_mode(type_ctx.get_cast_mode() | CM_NULL_ON_WARN);
types_stack[0].set_calc_type(ObVarcharType);
}
int max_length = OB_MAX_VARBINARY_LENGTH; // The maximum length of the result of WEIGHT_STRING()
int result_length = types_stack[1].get_param().get_int();
int nweight = types_stack[2].get_param().get_int();
bool as_binary = types_stack[4].get_param().get_int();
ObCollationLevel coll_level = CS_LEVEL_INVALID;
if (as_binary) {
coll_level = CS_LEVEL_IMPLICIT;
} else {
coll_level = types_stack[0].get_collation_level();
}
ObCollationType collation_type = types_stack[0].get_collation_type();
const ObCharsetInfo *cs = ObCharset::get_charset(collation_type);
if (types_stack[0].get_type() == ObDateTimeType ||
types_stack[0].get_type() == ObTimestampType ||
types_stack[0].get_type() == ObDateType ||
types_stack[0].get_type() == ObTimeType ) {
// 日期、时间等类型,max_lenght是输入的类型的长度
max_length = types_stack[0].get_length();
} else if (result_length > 0) {
max_length = result_length;
} else if (as_binary) {
// as_binary的情况下,以nweight作为输出结果的max_length
max_length = nweight;
} else {
// 输入为 char的情况下,使用cs->mbmaxlen计算max_length
max_length = cs->mbmaxlen * max(nweight, types_stack[0].get_length()*cs->mbmaxlen);
}
ObObj aaa = types_stack[0].get_param();
// 推导结果
type.set_varchar();
type.set_collation_type(CS_TYPE_BINARY);
type.set_collation_level(coll_level);
type.set_length(max_length);
}
return ret;
}
int ObExprWeightString::calc_resultN(common::ObObj &result , const common::ObObj *objs_array,
int64_t param_num,common::ObExprCtx &expr_ctx) const
{
int ret = OB_SUCCESS;
if (OB_ISNULL(expr_ctx.calc_buf_)) {
ret = OB_NOT_INIT;
LOG_WARN("varchar buffer not init", K(ret));
}
if (param_num == 5) {
if (OB_UNLIKELY(objs_array[0].is_null()) ||
OB_UNLIKELY(objs_array[1].is_null()) ||
OB_UNLIKELY(objs_array[2].is_null()) ||
OB_UNLIKELY(objs_array[3].is_null()) ||
OB_UNLIKELY(objs_array[4].is_null()) ) {
result.set_null();
} else {
const ObString str = objs_array[0].get_string();
int result_length = objs_array[1].get_int();
int nweights = objs_array[2].get_int();
int flags = objs_array[3].get_int();
bool as_binary = objs_array[4].get_int();
ObCollationType collation_type = CS_TYPE_INVALID;
if (as_binary) {
collation_type = CS_TYPE_BINARY;
} else {
collation_type = objs_array[0].get_collation_type();
}
const ObCharsetInfo *cs = ObCharset::get_charset(collation_type);
flags = ob_strxfrm_flag_normalize(flags, cs->levels_for_order);
// calc the length of result
size_t frm_length = 0;
size_t tmp_length = 0;
if (result_length > 0) {
tmp_length = result_length;
} else {
tmp_length = cs->coll->strnxfrmlen(cs, cs->mbmaxlen*max(str.length() , nweights));
}
int is_valid_unicode_tmp = 1;
char *out_buf;
if (OB_ISNULL(out_buf = static_cast<char*>(expr_ctx.calc_buf_->alloc(tmp_length)))) {
result.set_null();
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_ERROR("alloc memory failed",K(ret), K(out_buf), K(tmp_length));
} else {
frm_length = cs->coll->strnxfrm(cs,
reinterpret_cast<uchar *>(out_buf),
tmp_length,
nweights ? nweights: tmp_length,
reinterpret_cast<const uchar *>(str.ptr()),
str.length(),
flags,
&is_valid_unicode_tmp);
result.set_varchar(out_buf,frm_length);
}
}
} else {
ret = OB_INVALID_ARGUMENT_NUM;
result.set_null();
LOG_ERROR("invalid argument num",K(ret), K(param_num), K(objs_array));
}
return ret;
}
int ObExprWeightString::eval_weight_string(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum)
{
int ret = OB_SUCCESS;
ObDatum *arg = NULL;
ObDatum *result_length_arg = NULL;
ObDatum *nweights_arg = NULL;
ObDatum *flags_arg = NULL;
ObDatum *as_binary_arg = NULL;
if (OB_FAIL(expr.args_[0]->eval(ctx, arg)) ||
OB_FAIL(expr.args_[1]->eval(ctx, result_length_arg)) ||
OB_FAIL(expr.args_[2]->eval(ctx, nweights_arg)) ||
OB_FAIL(expr.args_[3]->eval(ctx, flags_arg)) ||
OB_FAIL(expr.args_[4]->eval(ctx, as_binary_arg))) {
LOG_WARN("eval arg failed", K(ret),
KP(arg),
KP(result_length_arg),
KP(nweights_arg),
KP(flags_arg),
KP(as_binary_arg));
} else if (arg->is_null() ||
arg->get_string() == NULL ||
expr.args_[0]->datum_meta_.type_ <= ObUNumberType ) {
// The input string is NULL or numeric
res_datum.set_null();
} else {
const ObString str = arg->get_string();
int result_length = result_length_arg->get_int();
int nweights = nweights_arg->get_int();
int flags = flags_arg->get_int();
bool as_binary = as_binary_arg->get_int();
// Get the character set and collation information of the input string
ObCollationType collation_type = CS_TYPE_INVALID;
if (as_binary) {
collation_type = CS_TYPE_BINARY;
} else {
collation_type = expr.args_[0]->datum_meta_.cs_type_;
}
const ObCharsetInfo *cs = ObCharset::get_charset(collation_type);
flags = ob_strxfrm_flag_normalize(flags, cs->levels_for_order);
// calc the length of result
size_t frm_length = 0;
size_t tmp_length = 0;
if (result_length > 0) {
tmp_length = result_length;
} else {
tmp_length = cs->coll->strnxfrmlen(cs, cs->mbmaxlen*max(str.length() , nweights));
}
int is_valid_unicode_tmp = 1;
char *out_buf = expr.get_str_res_mem(ctx, tmp_length);
if (OB_ISNULL(out_buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
} else {
frm_length = cs->coll->strnxfrm(cs,
reinterpret_cast<uchar *>(out_buf),
tmp_length,
nweights ? nweights: tmp_length,
reinterpret_cast<const uchar *>(str.ptr()),
str.length(),
flags,
&is_valid_unicode_tmp);
res_datum.set_string(out_buf,frm_length);
}
}
return ret;
}
int ObExprWeightString::cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr,
ObExpr &rt_expr) const
{
UNUSED(op_cg_ctx);
UNUSED(raw_expr);
rt_expr.eval_func_ = ObExprWeightString::eval_weight_string;
return OB_SUCCESS;
}
uint64_t ObExprWeightString::ob_strxfrm_flag_normalize(uint64_t flags, uint64_t maximum)
{
/* If levels are omitted, then 1-maximum is assumed*/
if (!(flags & OB_STRXFRM_LEVEL_ALL)) {
static uint64_t def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
uint64_t flag_pad= flags & (OB_STRXFRM_PAD_WITH_SPACE | OB_STRXFRM_PAD_TO_MAXLEN);
flags= def_level_flags[maximum] | flag_pad;
}
else {
uint64_t i = 0;
uint64_t flag_lev= flags & OB_STRXFRM_LEVEL_ALL;
uint64_t flag_dsc= (flags >> OB_STRXFRM_DESC_SHIFT) & OB_STRXFRM_LEVEL_ALL;
uint64_t flag_rev= (flags >> OB_STRXFRM_REVERSE_SHIFT) & OB_STRXFRM_LEVEL_ALL;
uint64_t flag_pad= flags & (OB_STRXFRM_PAD_WITH_SPACE | OB_STRXFRM_PAD_TO_MAXLEN);
/*
If any level number is greater than the maximum,
it is treated as the maximum.
*/
maximum--;
flags= 0;
for (i= 0; i < OB_STRXFRM_NLEVELS; i++)
{
uint64_t src_bit= 1 << i;
if (flag_lev & src_bit) {
uint64_t dst_bit= 1 << std::min(i, maximum);
flags|= dst_bit;
flags|= (flag_dsc & dst_bit) << OB_STRXFRM_DESC_SHIFT;
flags|= (flag_rev & dst_bit) << OB_STRXFRM_REVERSE_SHIFT;
}
}
flags|= flag_pad;
}
return flags;
}
}
}
/*
* Copyright (c) 2021 OceanBase
* OceanBase is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_WEGHT_STRING_H_
#define OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_WEGHT_STRING_H_
#include "sql/engine/expr/ob_expr_operator.h"
#include "share/object/ob_obj_cast.h"
namespace oceanbase
{
namespace sql
{
class ObExprWeightString : public ObStringExprOperator
{
public:
explicit ObExprWeightString(common::ObIAllocator &alloc);
virtual ~ObExprWeightString() override;
virtual int calc_resultN(common::ObObj &result,
const common::ObObj *objs,
int64_t param_num,
common::ObExprCtx &expr_ctx) const override;
virtual int calc_result_typeN(ObExprResType &type,
ObExprResType *types,
int64_t param_num,
common::ObExprTypeCtx &type_ctx) const override;
static int eval_weight_string(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &epr_datum);
virtual int cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const override;
private:
static uint64_t ob_strxfrm_flag_normalize(uint64_t flags, uint64_t maximum);
DISALLOW_COPY_AND_ASSIGN(ObExprWeightString);
};
}
}
#endif /* OCEANBASE_SQL_ENGINE_EXPR_OB_EXPR_WEGHT_STRING_H_ */
\ No newline at end of file
......@@ -494,6 +494,7 @@ struct ObSqlTraits {
bool is_modify_tenant_stmt_;
bool is_cause_implicit_commit_;
bool is_commit_stmt_;
bool has_weight_string_func_stmt_; // sql中是否包含weight_string函数
ObItemType stmt_type_;
ObSqlTraits();
......@@ -504,10 +505,11 @@ struct ObSqlTraits {
is_modify_tenant_stmt_ = false;
is_cause_implicit_commit_ = false;
is_commit_stmt_ = false;
has_weight_string_func_stmt_ = false;
stmt_type_ = T_INVALID;
}
TO_STRING_KV(
K(is_readonly_stmt_), K(is_modify_tenant_stmt_), K(is_cause_implicit_commit_), K(is_commit_stmt_), K(stmt_type_));
K(is_readonly_stmt_), K(is_modify_tenant_stmt_), K(is_cause_implicit_commit_), K(is_commit_stmt_),K(has_weight_string_func_stmt_), K(stmt_type_));
};
template <typename ValueType>
......
......@@ -436,6 +436,7 @@ typedef enum ObItemType {
T_FUN_SYS_IS_IPV4_MAPPED = 726,
T_FUN_SYS_IS_IPV4_COMPAT = 727,
T_FUN_SYS_INETATON = 728,
T_FUN_SYS_WEIGHT_STRING = 729,
T_FUN_SYS_CRC32 = 730,
///< @note add new mysql only function type before this line
T_MYSQL_ONLY_SYS_MAX_OP = 800,
......
......@@ -430,6 +430,8 @@ END_P SET_VAR DELIMITER
%type <node> opt_force_purge
%type <node> opt_sql_throttle_for_priority opt_sql_throttle_using_cond sql_throttle_one_or_more_metrics sql_throttle_metric get_format_unit
%type <node> opt_copy_id opt_backup_dest opt_preview opt_backup_backup_dest opt_tenant_info opt_with_active_piece
%type <node> ws_nweights opt_ws_as_char opt_ws_levels ws_level_flag_desc ws_level_flag_reverse ws_level_flags ws_level_list ws_level_list_item ws_level_number ws_level_range ws_level_list_or_range
%start sql_stmt
%%
////////////////////////////////////////////////////////////////
......@@ -2496,6 +2498,56 @@ MOD '(' expr ',' expr ')'
{
$$ = $1;
}
| WEIGHT_STRING '(' expr opt_ws_as_char opt_ws_levels ')'
{
ParseNode *zeroNode1 = NULL;
malloc_terminal_node(zeroNode1, result->malloc_pool_, T_INT);
zeroNode1->value_ = 0;
zeroNode1->is_hidden_const_ = 1;
if($4->value_ > 0){
$5->value_ |= OB_STRXFRM_PAD_WITH_SPACE;
}
ParseNode *falseNode = NULL;
malloc_terminal_node(falseNode, result->malloc_pool_, T_INT);
falseNode->value_ = 0;
falseNode->is_hidden_const_ = 1;
ParseNode *params = NULL;
malloc_non_terminal_node(params, result->malloc_pool_, T_EXPR_LIST , 5, $3 , zeroNode1 , $4, $5 ,falseNode);
make_name_node($$, result->malloc_pool_, "weight_string");
malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, $$, params);
}
| WEIGHT_STRING '(' expr AS BINARY ws_nweights ')'
{
ParseNode *zeroNode1 = NULL;
malloc_terminal_node(zeroNode1, result->malloc_pool_, T_INT);
zeroNode1->value_ = 0;
zeroNode1->is_hidden_const_ = 1;
ParseNode *padNode = NULL;
malloc_terminal_node(padNode, result->malloc_pool_, T_INT);
padNode->value_ = OB_STRXFRM_PAD_WITH_SPACE;
padNode->is_hidden_const_ = 1;
ParseNode *trueNode = NULL;
malloc_terminal_node(trueNode, result->malloc_pool_, T_INT);
trueNode->value_ = 1;
trueNode->is_hidden_const_ = 1;
ParseNode *params = NULL;
malloc_non_terminal_node(params, result->malloc_pool_, T_EXPR_LIST , 5, $3 , zeroNode1 , $6, padNode ,trueNode);
make_name_node($$, result->malloc_pool_, "weight_string");
malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, $$, params);
}
| WEIGHT_STRING '(' expr ',' INTNUM ',' INTNUM ',' INTNUM ',' INTNUM ')'
{
ParseNode *params = NULL;
malloc_non_terminal_node(params, result->malloc_pool_, T_EXPR_LIST , 5, $3 , $5 , $7, $9 ,$11);
make_name_node($$, result->malloc_pool_, "weight_string");
malloc_non_terminal_node($$, result->malloc_pool_, T_FUN_SYS, 2, $$, params);
}
;
sys_interval_func:
......@@ -2745,6 +2797,160 @@ opt_separator:
malloc_non_terminal_node($$, result->malloc_pool_, T_SEPARATOR_CLAUSE, 1, $2);
}
;
opt_ws_as_char:
/* EMPTY */
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->is_hidden_const_ = 1;
$$->value_ = 0;
$$->param_num_ = 1;
}
| AS CHARACTER ws_nweights
{
$$ = $3;
}
;
opt_ws_levels:
/* EMPTY */
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->is_hidden_const_ = 1;
$$->value_ = 0;
$$->param_num_ = 1;
}
| LEVEL ws_level_list_or_range
{
(void)($1);
$$ = $2;
}
;
ws_level_list_or_range:
ws_level_list
{
$$ = $1;
}
| ws_level_range
{
$$ = $1;
}
;
ws_level_list:
ws_level_list_item
{
$$ = $1;
}
| ws_level_list ',' ws_level_list_item
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->value_ = $3->value_ | $1->value_;
$$->param_num_ = 1;
}
;
ws_level_list_item:
ws_level_number ws_level_flags
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->value_ = (1 | $2->value_) << $1->value_ ;
$$->param_num_ = 1;
}
;
ws_level_range:
ws_level_number '-' ws_level_number
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
uint32_t res = 0;
uint32_t start = $1->value_ ;
uint32_t end = $3->value_ ;
if (end < start) {
end = start;
}
for ( ; start <= end; start++) {
res |= (1 << start);
}
$$->value_ = res;
$$->param_num_ = 1;
}
;
ws_level_number:
INTNUM
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
if ($1->value_ < 1) {
$$->value_ = 1;
} else if ($1->value_ > OB_STRXFRM_NLEVELS) {
$$->value_ = OB_STRXFRM_NLEVELS;
} else{
$$->value_ = $1->value_;
}
$$->value_ = $$->value_ - 1;
$$->param_num_ = 1;
}
;
ws_level_flags:
/* empty */
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->value_ = 0;
$$->param_num_ = 1;
}
| ws_level_flag_desc
{
$$= $1;
}
| ws_level_flag_desc ws_level_flag_reverse
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->value_ = $1->value_ | $2->value_;
$$->param_num_ = 1;
}
| ws_level_flag_reverse
{
$$ = $1 ;
}
;
ws_nweights:
'(' INTNUM ')'
{
if ($2->value_ < 1) {
yyerror(&@1, result, "Incorrect arguments to WEIGHT_STRING()\n");
YYABORT_PARSE_SQL_ERROR;
}
$$ = $2;
}
;
ws_level_flag_desc:
ASC
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->value_ = 0;
$$->param_num_ = 1;
}
| DESC
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->value_ = 1 << OB_STRXFRM_DESC_SHIFT;
$$->param_num_ = 1;
}
;
ws_level_flag_reverse:
REVERSE
{
malloc_terminal_node($$, result->malloc_pool_, T_INT);
$$->value_ = 1 << OB_STRXFRM_REVERSE_SHIFT;
$$->param_num_ = 1;
}
;
/*****************************************************************************
*
......@@ -14253,4 +14459,4 @@ int obsql_mysql_fast_parse(ParseResult *p)
p->no_param_sql_[len] = '\0';
}
return ret;
}
}
\ No newline at end of file
......@@ -380,6 +380,7 @@ const char* get_type_name(int type)
case T_FUN_SYS_IS_IPV4_COMPAT : return "T_FUN_SYS_IS_IPV4_COMPAT";
case T_FUN_SYS_INETATON : return "T_FUN_SYS_INETATON";
case T_FUN_SYS_CRC32 : return "T_FUN_SYS_CRC32";
case T_FUN_SYS_WEIGHT_STRING : return "T_FUN_SYS_WEIGHT_STRING";
case T_MYSQL_ONLY_SYS_MAX_OP : return "T_MYSQL_ONLY_SYS_MAX_OP";
case T_FUN_SYS_CONNECT_BY_PATH : return "T_FUN_SYS_CONNECT_BY_PATH";
case T_FUN_SYS_SYSTIMESTAMP : return "T_FUN_SYS_SYSTIMESTAMP";
......
......@@ -538,7 +538,7 @@ int ObSqlParameterization::transform_tree(TransformTreeCtx& ctx, const ObSQLSess
}
if (OB_SUCC(ret)) {
if (OB_FAIL(mark_tree(ctx.tree_))) {
if (OB_FAIL(mark_tree(ctx.tree_ , *ctx.sql_info_))) {
SQL_PC_LOG(WARN, "fail to mark function tree", K(ctx.tree_), K(ret));
}
}
......@@ -593,12 +593,17 @@ int ObSqlParameterization::check_and_generate_param_info(
if (sql_info.total_ != raw_params.count()) {
ret = OB_NOT_SUPPORTED;
#if !defined(NDEBUG)
SQL_PC_LOG(ERROR,
if ( sql_info.sql_traits_.has_weight_string_func_stmt_ ) {
// do nothing
}
else {
SQL_PC_LOG(ERROR,
"const number of fast parse and normal parse is different",
"fast_parse_const_num",
raw_params.count(),
"normal_parse_const_num",
sql_info.total_);
}
#endif
}
ObPCParam* pc_param = NULL;
......@@ -1059,7 +1064,7 @@ int ObSqlParameterization::mark_args(ParseNode* arg_tree, const bool* mark_arr,
// After mark this node, it has following mechanism:
// If a node is marked as cannot be parameterized,
// CUREENT NODE AND ALL NODES OF IT'S SUBTREE cannot be parameterized.
int ObSqlParameterization::mark_tree(ParseNode* tree)
int ObSqlParameterization::mark_tree(ParseNode *tree ,SqlInfo &sql_info)
{
int ret = OB_SUCCESS;
if (NULL == tree) {
......@@ -1086,6 +1091,14 @@ int ObSqlParameterization::mark_tree(ParseNode* tree)
if (OB_FAIL(mark_args(node[1], mark_arr, ARGS_NUMBER_THREE))) {
SQL_PC_LOG(WARN, "fail to mark substr arg", K(ret));
}
}else if (0 == func_name.case_compare("weight_string")
&& (5 == node[1]->num_child_)) {
const int64_t ARGS_NUMBER_FIVE = 5;
bool mark_arr[ARGS_NUMBER_FIVE] = {0, 1, 1, 1, 1}; //0表示参数化, 1 表示不参数化
sql_info.sql_traits_.has_weight_string_func_stmt_ = true;
if (OB_FAIL(mark_args(node[1], mark_arr, ARGS_NUMBER_FIVE))) {
SQL_PC_LOG(WARN, "fail to mark weight_string arg", K(ret));
}
} else if ((0 == func_name.case_compare("str_to_date") // STR_TO_DATE(str,format)
|| 0 == func_name.case_compare("date_format") // DATE_FORMAT(date,format)
||
......
......@@ -113,7 +113,7 @@ private:
static int add_not_param_flag(const ParseNode* node, SqlInfo& sql_info);
static int add_varchar_charset(const ParseNode* node, SqlInfo& sql_info);
static int mark_args(ParseNode* arg_tree, const bool* mark_arr, int64_t arg_num);
static int mark_tree(ParseNode* tree);
static int mark_tree(ParseNode *tree, SqlInfo &sql_info);
static int get_related_user_vars(const ParseNode* tree, common::ObIArray<common::ObString>& user_vars);
static int get_select_item_param_info(const common::ObIArray<ObPCParam*>& raw_params, ParseNode* tree,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册