output_room_events_table.go 9.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15 16 17
package storage

import (
18
	"context"
19 20
	"database/sql"

21
	log "github.com/sirupsen/logrus"
22
	"github.com/lib/pq"
23
	"github.com/matrix-org/dendrite/common"
R
Robert Swain 已提交
24
	"github.com/matrix-org/dendrite/syncapi/types"
K
Kegsay 已提交
25
	"github.com/matrix-org/gomatrixserverlib"
26 27 28
)

const outputRoomEventsSchema = `
29 30 31
-- This sequence is shared between all the tables generated from kafka logs.
CREATE SEQUENCE IF NOT EXISTS syncapi_stream_id;

32
-- Stores output room events received from the roomserver.
33
CREATE TABLE IF NOT EXISTS syncapi_output_room_events (
34 35 36
    -- An incrementing ID which denotes the position in the log that this event resides at.
    -- NB: 'serial' makes no guarantees to increment by 1 every time, only that it increments.
    --     This isn't a problem for us since we just want to order by this field.
37
    id BIGINT PRIMARY KEY DEFAULT nextval('syncapi_stream_id'),
K
Kegsay 已提交
38 39
    -- The event ID for the event
    event_id TEXT NOT NULL,
40 41 42 43
    -- The 'room_id' key for the event.
    room_id TEXT NOT NULL,
    -- The JSON for the event. Stored as TEXT because this should be valid UTF-8.
    event_json TEXT NOT NULL,
K
Kegsay 已提交
44 45 46 47
    -- A list of event IDs which represent a delta of added/removed room state. This can be NULL
    -- if there is no delta.
    add_state_ids TEXT[],
    remove_state_ids TEXT[]
48
);
K
Kegsay 已提交
49
-- for event selection
50
CREATE UNIQUE INDEX IF NOT EXISTS syncapi_event_id_idx ON syncapi_output_room_events(event_id);
51 52 53
`

const insertEventSQL = "" +
54 55 56
	"INSERT INTO syncapi_output_room_events (" +
	" room_id, event_id, event_json, add_state_ids, remove_state_ids" +
	") VALUES ($1, $2, $3, $4, $5) RETURNING id"
K
Kegsay 已提交
57 58

const selectEventsSQL = "" +
59
	"SELECT id, event_json FROM syncapi_output_room_events WHERE event_id = ANY($1)"
60

61
const selectRecentEventsSQL = "" +
62 63 64
	"SELECT id, event_json FROM syncapi_output_room_events" +
	" WHERE room_id = $1 AND id > $2 AND id <= $3" +
	" ORDER BY id DESC LIMIT $4"
65

66
const selectMaxEventIDSQL = "" +
67
	"SELECT MAX(id) FROM syncapi_output_room_events"
68

K
Kegsay 已提交
69 70
// In order for us to apply the state updates correctly, rows need to be ordered in the order they were received (id).
const selectStateInRangeSQL = "" +
71 72
	"SELECT id, event_json, add_state_ids, remove_state_ids" +
	" FROM syncapi_output_room_events" +
73
	" WHERE (id > $1 AND id <= $2) AND (add_state_ids IS NOT NULL OR remove_state_ids IS NOT NULL)" +
K
Kegsay 已提交
74 75
	" ORDER BY id ASC"

76
type outputRoomEventsStatements struct {
77 78
	insertEventStmt        *sql.Stmt
	selectEventsStmt       *sql.Stmt
79
	selectMaxEventIDStmt   *sql.Stmt
80 81
	selectRecentEventsStmt *sql.Stmt
	selectStateInRangeStmt *sql.Stmt
82 83 84 85 86 87 88 89 90 91
}

func (s *outputRoomEventsStatements) prepare(db *sql.DB) (err error) {
	_, err = db.Exec(outputRoomEventsSchema)
	if err != nil {
		return
	}
	if s.insertEventStmt, err = db.Prepare(insertEventSQL); err != nil {
		return
	}
K
Kegsay 已提交
92 93 94
	if s.selectEventsStmt, err = db.Prepare(selectEventsSQL); err != nil {
		return
	}
95
	if s.selectMaxEventIDStmt, err = db.Prepare(selectMaxEventIDSQL); err != nil {
96 97
		return
	}
98 99 100
	if s.selectRecentEventsStmt, err = db.Prepare(selectRecentEventsSQL); err != nil {
		return
	}
K
Kegsay 已提交
101 102 103
	if s.selectStateInRangeStmt, err = db.Prepare(selectStateInRangeSQL); err != nil {
		return
	}
104 105 106
	return
}

107
// selectStateInRange returns the state events between the two given stream positions, exclusive of oldPos, inclusive of newPos.
K
Kegsay 已提交
108 109
// Results are bucketed based on the room ID. If the same state is overwritten multiple times between the
// two positions, only the most recent state is returned.
110
func (s *outputRoomEventsStatements) selectStateInRange(
111
	ctx context.Context, txn *sql.Tx, oldPos, newPos types.StreamPosition,
112
) (map[string]map[string]bool, map[string]streamEvent, error) {
113 114 115
	stmt := common.TxStmt(txn, s.selectStateInRangeStmt)

	rows, err := stmt.QueryContext(ctx, oldPos, newPos)
K
Kegsay 已提交
116
	if err != nil {
117
		return nil, nil, err
K
Kegsay 已提交
118 119 120 121 122 123
	}
	// Fetch all the state change events for all rooms between the two positions then loop each event and:
	//  - Keep a cache of the event by ID (99% of state change events are for the event itself)
	//  - For each room ID, build up an array of event IDs which represents cumulative adds/removes
	// For each room, map cumulative event IDs to events and return. This may need to a batch SELECT based on event ID
	// if they aren't in the event ID cache. We don't handle state deletion yet.
124
	eventIDToEvent := make(map[string]streamEvent)
K
Kegsay 已提交
125 126 127 128 129 130

	// RoomID => A set (map[string]bool) of state event IDs which are between the two positions
	stateNeeded := make(map[string]map[string]bool)

	for rows.Next() {
		var (
131
			streamPos  int64
K
Kegsay 已提交
132 133 134 135
			eventBytes []byte
			addIDs     pq.StringArray
			delIDs     pq.StringArray
		)
136
		if err := rows.Scan(&streamPos, &eventBytes, &addIDs, &delIDs); err != nil {
137
			return nil, nil, err
K
Kegsay 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
		}
		// Sanity check for deleted state and whine if we see it. We don't need to do anything
		// since it'll just mark the event as not being needed.
		if len(addIDs) < len(delIDs) {
			log.WithFields(log.Fields{
				"since":   oldPos,
				"current": newPos,
				"adds":    addIDs,
				"dels":    delIDs,
			}).Warn("StateBetween: ignoring deleted state")
		}

		// TODO: Handle redacted events
		ev, err := gomatrixserverlib.NewEventFromTrustedJSON(eventBytes, false)
		if err != nil {
153
			return nil, nil, err
K
Kegsay 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166
		}
		needSet := stateNeeded[ev.RoomID()]
		if needSet == nil { // make set if required
			needSet = make(map[string]bool)
		}
		for _, id := range delIDs {
			needSet[id] = false
		}
		for _, id := range addIDs {
			needSet[id] = true
		}
		stateNeeded[ev.RoomID()] = needSet

167
		eventIDToEvent[ev.EventID()] = streamEvent{ev, types.StreamPosition(streamPos)}
K
Kegsay 已提交
168 169
	}

170
	return stateNeeded, eventIDToEvent, nil
K
Kegsay 已提交
171 172
}

173 174 175
// MaxID returns the ID of the last inserted event in this table. 'txn' is optional. If it is not supplied,
// then this function should only ever be used at startup, as it will race with inserting events if it is
// done afterwards. If there are no inserted events, 0 is returned.
176
func (s *outputRoomEventsStatements) selectMaxEventID(
177 178
	ctx context.Context, txn *sql.Tx,
) (id int64, err error) {
179
	var nullableID sql.NullInt64
180
	stmt := common.TxStmt(txn, s.selectMaxEventIDStmt)
181
	err = stmt.QueryRowContext(ctx).Scan(&nullableID)
182 183 184
	if nullableID.Valid {
		id = nullableID.Int64
	}
185 186 187
	return
}

188 189
// InsertEvent into the output_room_events table. addState and removeState are an optional list of state event IDs. Returns the position
// of the inserted event.
190 191 192 193 194 195 196 197 198 199 200 201
func (s *outputRoomEventsStatements) insertEvent(
	ctx context.Context, txn *sql.Tx,
	event *gomatrixserverlib.Event, addState, removeState []string,
) (streamPos int64, err error) {
	stmt := common.TxStmt(txn, s.insertEventStmt)
	err = stmt.QueryRowContext(
		ctx,
		event.RoomID(),
		event.EventID(),
		event.JSON(),
		pq.StringArray(addState),
		pq.StringArray(removeState),
202 203
	).Scan(&streamPos)
	return
204
}
K
Kegsay 已提交
205

206
// RecentEventsInRoom returns the most recent events in the given room, up to a maximum of 'limit'.
207
func (s *outputRoomEventsStatements) selectRecentEvents(
208 209
	ctx context.Context, txn *sql.Tx,
	roomID string, fromPos, toPos types.StreamPosition, limit int,
210
) ([]streamEvent, error) {
211 212
	stmt := common.TxStmt(txn, s.selectRecentEventsStmt)
	rows, err := stmt.QueryContext(ctx, roomID, fromPos, toPos, limit)
213 214 215
	if err != nil {
		return nil, err
	}
E
Erik Johnston 已提交
216
	defer rows.Close() // nolint: errcheck
217
	events, err := rowsToStreamEvents(rows)
218 219 220 221 222 223
	if err != nil {
		return nil, err
	}
	// reverse the order because [0] is the newest event due to the ORDER BY in SQL-land. The reverse order makes [0] the oldest event,
	// which is correct for /sync responses.
	return reverseEvents(events), nil
224 225
}

K
Kegsay 已提交
226 227
// Events returns the events for the given event IDs. Returns an error if any one of the event IDs given are missing
// from the database.
228 229 230 231 232
func (s *outputRoomEventsStatements) selectEvents(
	ctx context.Context, txn *sql.Tx, eventIDs []string,
) ([]streamEvent, error) {
	stmt := common.TxStmt(txn, s.selectEventsStmt)
	rows, err := stmt.QueryContext(ctx, pq.StringArray(eventIDs))
233 234 235
	if err != nil {
		return nil, err
	}
E
Erik Johnston 已提交
236
	defer rows.Close() // nolint: errcheck
237
	return rowsToStreamEvents(rows)
238 239
}

240
func rowsToStreamEvents(rows *sql.Rows) ([]streamEvent, error) {
241
	var result []streamEvent
242
	for rows.Next() {
243 244 245 246 247
		var (
			streamPos  int64
			eventBytes []byte
		)
		if err := rows.Scan(&streamPos, &eventBytes); err != nil {
K
Kegsay 已提交
248 249
			return nil, err
		}
250
		// TODO: Handle redacted events
K
Kegsay 已提交
251 252 253 254
		ev, err := gomatrixserverlib.NewEventFromTrustedJSON(eventBytes, false)
		if err != nil {
			return nil, err
		}
255
		result = append(result, streamEvent{ev, types.StreamPosition(streamPos)})
K
Kegsay 已提交
256 257 258
	}
	return result, nil
}
259

260
func reverseEvents(input []streamEvent) (output []streamEvent) {
261 262 263 264 265
	for i := len(input) - 1; i >= 0; i-- {
		output = append(output, input[i])
	}
	return
}