vnode.h 15.9 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

#ifndef TDENGINE_VNODE_H
#define TDENGINE_VNODE_H

#ifdef __cplusplus
extern "C" {
#endif

#include <errno.h>
#include <pthread.h>
#include <semaphore.h>
#include <stdint.h>
#include <syslog.h>

#include "tglobalcfg.h"
#include "tidpool.h"
#include "tlog.h"
#include "tmempool.h"
#include "trpc.h"
#include "tsclient.h"
#include "tsdb.h"
#include "tsdb.h"
#include "tsocket.h"
#include "ttime.h"
#include "ttimer.h"
#include "tutil.h"
#include "vnodeCache.h"
#include "vnodeFile.h"
#include "vnodeShell.h"

#define TSDB_FILE_HEADER_LEN          512
#define TSDB_FILE_HEADER_VERSION_SIZE 32
#define TSDB_CACHE_POS_BITS           13
#define TSDB_CACHE_POS_MASK           0x1FFF

#define TSDB_ACTION_INSERT 0
#define TSDB_ACTION_IMPORT 1
#define TSDB_ACTION_DELETE 2
#define TSDB_ACTION_UPDATE 3
#define TSDB_ACTION_MAX    4

enum _data_source {
  TSDB_DATA_SOURCE_METER,
  TSDB_DATA_SOURCE_VNODE,
  TSDB_DATA_SOURCE_SHELL,
  TSDB_DATA_SOURCE_QUEUE,
  TSDB_DATA_SOURCE_LOG,
};

enum _sync_cmd {
  TSDB_SYNC_CMD_FILE,
  TSDB_SYNC_CMD_CACHE,
  TSDB_SYNC_CMD_CREATE,
  TSDB_SYNC_CMD_REMOVE,
};

enum _meter_state {
  TSDB_METER_STATE_READY,
  TSDB_METER_STATE_IMPORTING,
  TSDB_METER_STATE_UPDATING,
  TSDB_METER_STATE_DELETING,
  TSDB_METER_STATE_DELETED,
};

typedef struct {
  int64_t offset : 48;
  int64_t length : 16;
} SMeterObjHeader;

typedef struct {
  int64_t len;
  char    data[];
} SData;

typedef struct {
  int       vnode;
  SVnodeCfg cfg;
  // SDiskDesc   tierDisk[TSDB_MAX_TIER];
  SVPeerDesc          vpeers[TSDB_VNODES_SUPPORT];
  SVnodeStatisticInfo vnodeStatistic;
  char                selfIndex;
  char                status;
  char                accessState;  // Vnode access state, Readable/Writable
  char                syncStatus;
  char                commitInProcess;
  pthread_t           commitThread;
  TSKEY               firstKey;  // minimum key uncommitted, it may be smaller than
  // commitFirstKey
  TSKEY commitFirstKey;  // minimum key for a commit file, it shall be
  // xxxx00000, calculated from fileId
  TSKEY commitLastKey;  // maximum key for a commit file, it shall be xxxx99999,
  // calculated fromm fileId
  int   commitFileId;
  TSKEY lastCreate;
  TSKEY lastRemove;
  TSKEY lastKey;  // last key for the whole vnode, updated by every insert
  // operation
  uint64_t version;

  int   streamRole;
  int   numOfStreams;
  void *streamTimer;

  TSKEY           lastKeyOnFile;  // maximum key on the last file, is shall be xxxx99999
  int             fileId;
  int             badFileId;
  int             numOfFiles;
  int             maxFiles;
  int             maxFile1;
  int             maxFile2;
  int             nfd;  // temp head file FD
  int             hfd;  // head file FD
  int             lfd;  // last file FD
  int             tfd;  // temp last file FD
  int             dfd;  // data file FD
  int64_t         dfSize;
  int64_t         lfSize;
  uint64_t *      fmagic;  // hold magic number for each file
  char            cfn[TSDB_FILENAME_LEN];
  char            nfn[TSDB_FILENAME_LEN];
  char            lfn[TSDB_FILENAME_LEN];  // last file name
  char            tfn[TSDB_FILENAME_LEN];  // temp last file name
  pthread_mutex_t vmutex;

  int             logFd;
  char *          pMem;
  char *          pWrite;
  pthread_mutex_t logMutex;
  char            logFn[TSDB_FILENAME_LEN];
  char            logOFn[TSDB_FILENAME_LEN];
  int64_t         mappingSize;
  int64_t         mappingThreshold;

  void *         commitTimer;
  void **        meterList;
  void *         pCachePool;
  void *         pQueue;
  pthread_t      thread;
  int            peersOnline;
  int            shellConns;
  int            meterConns;
  struct _qinfo *pQInfoList;

  TAOS *           dbConn;
  SMeterObjHeader *meterIndex;
} SVnodeObj;

typedef struct SColumn {
  short colId;
  short bytes;
  char  type;
} SColumn;

typedef struct _meter_obj {
  uint64_t uid;
  char     meterId[TSDB_METER_ID_LEN];
  int      sid;
  short    vnode;
  short    numOfColumns;
  short    bytesPerPoint;
  short    maxBytes;
  int32_t  pointsPerBlock;
  int32_t  pointsPerFileBlock;
  int      freePoints;
  TSKEY    lastKey;        // updated by insert operation
  TSKEY    lastKeyOnFile;  // last key on file, updated by commit action
  TSKEY    timeStamp;      // delete or added time
  uint64_t commitCount;
  int32_t  sversion;
  short    sqlLen;
  char     searchAlgorithm : 4;
  char     compAlgorithm : 4;
  char     state : 5;   // deleted or added, 1: added
  char     status : 3;  // 0: ok, 1: stop stream computing

  char     reserved[16];
  int      numOfQueries;
  char *   pSql;
  void *   pStream;
  void *   pCache;
  SColumn *schema;
} SMeterObj;

typedef struct {
  char     type;
  char     pversion;  // protocol version
  char     action;    // insert, import, delete, update
  int32_t  sversion;  // only for insert
  int32_t  sid;
  int32_t  len;
  uint64_t lastVersion;  // latest version
  char     cont[];
} SVMsgHeader;

/*
 * the value of QInfo.signature is used to denote that a query is executing, it
 * isn't safe
 * to release QInfo yet.
 * The release operations will be blocked in a busy-waiting until the query
 * operation reach a safepoint.
 * Then it will reset the signature in a atomic operation, followed by release
 * operation.
 * Only the QInfo.signature == QInfo, this structure can be released safely.
 */
#define TSDB_QINFO_QUERY_FLAG 0x1
#define TSDB_QINFO_RESET_SIG(x) ((x)->signature = (uint64_t)(x))
#define TSDB_QINFO_SET_QUERY_FLAG(x) \
  __sync_val_compare_and_swap(&((x)->signature), (uint64_t)(x), TSDB_QINFO_QUERY_FLAG);

// live lock: wait for query reaching a safe-point, release all resources
// belongs to this query
#define TSDB_WAIT_TO_SAFE_DROP_QINFO(x)                                                       \
  {                                                                                           \
    while (__sync_val_compare_and_swap(&((x)->signature), (x), 0) == TSDB_QINFO_QUERY_FLAG) { \
      taosMsleep(1);                                                                          \
    }                                                                                         \
  }

struct tSQLBinaryExpr;

typedef struct SColumnFilter {
  SColumnFilterMsg data;
  int16_t          colIdx;
  int16_t          colIdxInBuf;

  /*
   * 0: denotes if its is required in the first round of scan of data block
   * 1: denotes if its is required in the secondary scan
   */
  int16_t req[2];
} SColumnFilter;

typedef bool (*__filter_func_t)(SColumnFilter *pFilter, char *val1, char *val2);

typedef struct SColumnFilterInfo {
  SColumnFilter   pFilter;
  int16_t         elemSize;  // element size in pData
  __filter_func_t fp;        // filter function
  char *          pData;     // raw data, as the input for filter function
} SColumnFilterInfo;

typedef struct {
  short       numOfCols;
  SOrderVal   order;
  char        keyIsMet;  // if key is met, it will be set
  char        over;
  int         fileId;  // only for query in file
  int         hfd;     // only for query in file, head file handle
  int         dfd;     // only for query in file, data file handle
  int         lfd;     // only for query in file, last file handle
  SCompBlock *pBlock;  // only for query in file
  SField **   pFields;
  int         numOfBlocks;      // only for query in file
  int         blockBufferSize;  // length of pBlock buffer
  int         currentSlot;
  int         firstSlot;
  int         slot;
  int         pos;
  TSKEY       key;
  int         compBlockLen;  // only for import
  int64_t     blockId;
  TSKEY       skey;
  TSKEY       ekey;
  int64_t     nAggTimeInterval;
  char        intervalTimeUnit;  // interval data type, used for daytime revise

  int16_t numOfOutputCols;
  int16_t interpoType;
  int16_t checkBufferInLoop;  // check if the buffer is full during scan each block

  SLimitVal limit;
  int32_t   rowSize;
  int32_t   dataRowSize;  // row size of each loaded data from disk, the value is
  // used for prepare buffer
  SSqlGroupbyExpr * pGroupbyExpr;
  SSqlFunctionExpr *pSelectExpr;

  SColumnFilter *colList;

  int32_t            numOfFilterCols;
  SColumnFilterInfo *pFilterInfo;

  int64_t *defaultVal;

  TSKEY lastKey;
  // buffer info
  int64_t pointsRead;    // the number of points returned
  int64_t pointsToRead;  // maximum number of points to read
  int64_t pointsOffset;  // the number of points offset to save read data
  SData **sdata;
  SData * tsData;  // timestamp column/primary key column
} SQuery;

typedef struct {
  char spi;
  char encrypt;
  char secret[TSDB_KEY_LEN];
  char cipheringKey[TSDB_KEY_LEN];
} SConnSec;

typedef struct {
  char *          buffer;
  char *          offset;
  int             trans;
  int             bufferSize;
  pthread_mutex_t qmutex;
} STranQueue;

// internal globals
extern int        tsMeterSizeOnFile;
extern uint32_t   tsRebootTime;
extern void *     rpcQhandle;
extern void *     dmQhandle;
extern void *     queryQhandle;
extern int        tsMaxVnode;
extern int        tsOpenVnodes;
extern SVnodeObj *vnodeList;
extern void *     vnodeTmrCtrl;

// read API
extern int (*vnodeSearchKeyFunc[])(char *pValue, int num, TSKEY key, int order);

void *vnodeQueryInTimeRange(SMeterObj **pMeterObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *sqlExprs,
                            SQueryMeterMsg *pQueryMsg, int *code);

void *vnodeQueryOnMultiMeters(SMeterObj **pMeterObj, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs,
                              SQueryMeterMsg *pQueryMsg, int *code);

// assistant/tool functions
SSqlGroupbyExpr *vnodeCreateGroupbyExpr(SQueryMeterMsg *pQuery, int32_t *code);

SSqlFunctionExpr *vnodeCreateSqlFunctionExpr(SQueryMeterMsg *pQuery, int32_t *code);
bool vnodeValidateExprColumnInfo(SQueryMeterMsg* pQueryMsg, SSqlFuncExprMsg* pExprMsg);

bool vnodeIsValidVnodeCfg(SVnodeCfg *pCfg);

int32_t vnodeGetResultSize(void *handle, int32_t *numOfRows);

int32_t vnodeCopyQueryResultToMsg(void *handle, char *data, int32_t numOfRows);

int64_t vnodeGetOffsetVal(void *thandle);

bool vnodeHasRemainResults(void *handle);

int vnodeRetrieveQueryResult(void *handle, int *pNum, char *argv[]);

int vnodeSaveQueryResult(void *handle, char *data);

int vnodeRetrieveQueryInfo(void *handle, int *numOfRows, int *rowSize, int16_t *timePrec);

void vnodeFreeQInfo(void *, bool);

void vnodeFreeQInfoInQueue(void *param);

bool vnodeIsQInfoValid(void *param);

int32_t vnodeConvertQueryMeterMsg(SQueryMeterMsg *pQuery);

void vnodeQueryData(SSchedMsg *pMsg);

// meter API
int vnodeOpenMetersVnode(int vnode);

void vnodeCloseMetersVnode(int vnode);

int vnodeCreateMeterObj(SMeterObj *pNew, SConnSec *pSec);

int vnodeRemoveMeterObj(int vnode, int sid);

int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *, int sversion, int *numOfPoints);

int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, void *, int sversion, int *numOfPoints);

int vnodeInsertBufferedPoints(int vnode);

int vnodeSaveAllMeterObjToFile(int vnode);

int vnodeSaveMeterObjToFile(SMeterObj *pObj);

int vnodeSaveVnodeCfg(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc);

int vnodeSaveVnodeInfo(int vnode);

// cache API
void *vnodeOpenCachePool(int vnode);

void vnodeCloseCachePool(int vnode);

void *vnodeAllocateCacheInfo(SMeterObj *pObj);

void vnodeFreeCacheInfo(SMeterObj *pObj);

void vnodeSetCommitQuery(SMeterObj *pObj, SQuery *pQuery);

int vnodeInsertPointToCache(SMeterObj *pObj, char *pData);

int vnodeQueryFromCache(SMeterObj *pObj, SQuery *pQuery);

uint64_t vnodeGetPoolCount(SVnodeObj *pVnode);

void vnodeUpdateCommitInfo(SMeterObj *pObj, int slot, int pos, uint64_t count);

void vnodeCommitOver(SVnodeObj *pVnode);

TSKEY vnodeGetFirstKey(int vnode);

int vnodeSyncRetrieveCache(int vnode, int fd);

int vnodeSyncRestoreCache(int vnode, int fd);

pthread_t vnodeCreateCommitThread(SVnodeObj *pVnode);

void vnodeCancelCommit(SVnodeObj *pVnode);

void vnodeCloseStream(SVnodeObj *pVnode);

void vnodeProcessCommitTimer(void *param, void *tmrId);

void vnodeSearchPointInCache(SMeterObj *pObj, SQuery *pQuery);

int vnodeAllocateCacheBlock(SMeterObj *pObj);

int vnodeFreeCacheBlock(SCacheBlock *pCacheBlock);

int vnodeIsCacheCommitted(SMeterObj *pObj);

// file API
int vnodeInitFile(int vnode);

int vnodeQueryFromFile(SMeterObj *pObj, SQuery *pQuery);

void *vnodeCommitToFile(void *param);

void *vnodeCommitMultiToFile(SVnodeObj *pVnode, int ssid, int esid);

int vnodeSyncRetrieveFile(int vnode, int fd, uint32_t fileId, uint64_t *fmagic);

int vnodeSyncRestoreFile(int vnode, int sfd);

int vnodeWriteBlockToFile(SMeterObj *pObj, SCompBlock *pBlock, SData *data[], SData *cdata[], int pointsRead);

int vnodeSearchPointInFile(SMeterObj *pObj, SQuery *pQuery);

int vnodeReadCompBlockToMem(SMeterObj *pObj, SQuery *pQuery, SData *sdata[]);

int vnodeOpenCommitFiles(SVnodeObj *pVnode, int noTempLast);

void vnodeCloseCommitFiles(SVnodeObj *pVnode);

int vnodeReadLastBlockToMem(SMeterObj *pObj, SCompBlock *pBlock, SData *sdata[]);

// vnode API
int vnodeInitPeer(int numOfThreads);

void vnodeCleanUpPeer();

int vnodeOpenPeerVnode(int vnode);

void vnodeClosePeerVnode(int vnode);

void *vnodeGetMeterPeerConnection(SMeterObj *pObj, int index);

int vnodeForwardToPeer(SMeterObj *pObj, char *msg, int msgLen, char action, int sversion);

void vnodeCloseAllSyncFds(int vnode);

void vnodeConfigVPeers(int vnode, int numOfPeers, SVPeerDesc peerDesc[]);

void vnodeStartSyncProcess(SVnodeObj *pVnode);

void vnodeCancelSync(int vnode);

void vnodeListPeerStatus(char *buffer);

void vnodeCheckOwnStatus(SVnodeObj *pVnode);

int vnodeSaveMeterObjToFile(SMeterObj *pObj);

int vnodeRecoverFromPeer(SVnodeObj *pVnode, int fileId);

// vnodes API
int vnodeInitVnodes();

int vnodeInitStore();

void vnodeCleanUpVnodes();

void vnodeRemoveVnode(int vnode);

int vnodeCreateVnode(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc);

void vnodeCreateStream(SMeterObj *pObj);

void vnodeRemoveStream(SMeterObj *pObj);

// shell API
int vnodeInitShell();

void vnodeCleanUpShell();

int vnodeOpenShellVnode(int vnode);

void vnodeCloseShellVnode(int vnode);

// mgmt
int vnodeInitMgmt();

void vnodeCleanUpMgmt();

int vnodeRetrieveMissedCreateMsg(int vnode, int fd, uint64_t stime);

int vnodeRestoreMissedCreateMsg(int vnode, int fd);

int vnodeRetrieveMissedRemoveMsg(int vid, int fd, uint64_t stime);

int vnodeRestoreMissedRemoveMsg(int vnode, int fd);

int vnodeProcessBufferedCreateMsgs(int vnode);

int vnodeSendVpeerCfgMsg(int vnode);

int vnodeSendMeterCfgMsg(int vnode, int sid);

int vnodeMgmtConns();

// commit
int vnodeInitCommit(int vnode);

void vnodeCleanUpCommit(int vnode);

int vnodeRenewCommitLog(int vnode);

void vnodeRemoveCommitLog(int vnode);

int vnodeWriteToCommitLog(SMeterObj *pObj, char action, char *cont, int contLen, int sversion);

extern int (*vnodeProcessAction[])(SMeterObj *, char *, int, char, void *, int, int *);

extern int (*pCompFunc[])(const char *const input, int inputSize, const int elements, char *const output,
                          int outputSize, char algorithm, char *const buffer, int bufferSize);

extern int (*pDecompFunc[])(const char *const input, int compressedSize, const int elements, char *const output,
                            int outputSize, char algorithm, char *const buffer, int bufferSize);

// global variable and APIs provided by mgmt
extern char          mgmtStatus;
extern char          mgmtDirectory[];
extern const int16_t vnodeFileVersion;

#ifdef __cplusplus
}
#endif

#endif  // TDENGINE_VNODE_H