/Users/deen/code/yugabyte-db/src/yb/integration-tests/create-table-itest.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // The following only applies to changes made to this file as part of YugaByte development. |
19 | | // |
20 | | // Portions Copyright (c) YugaByte, Inc. |
21 | | // |
22 | | // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except |
23 | | // in compliance with the License. You may obtain a copy of the License at |
24 | | // |
25 | | // http://www.apache.org/licenses/LICENSE-2.0 |
26 | | // |
27 | | // Unless required by applicable law or agreed to in writing, software distributed under the License |
28 | | // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express |
29 | | // or implied. See the License for the specific language governing permissions and limitations |
30 | | // under the License. |
31 | | // |
32 | | |
33 | | #include <map> |
34 | | #include <memory> |
35 | | #include <set> |
36 | | #include <string> |
37 | | |
38 | | #include <glog/stl_logging.h> |
39 | | #include <gtest/gtest.h> |
40 | | |
41 | | #include "yb/client/client_fwd.h" |
42 | | #include "yb/client/client-test-util.h" |
43 | | #include "yb/client/table.h" |
44 | | #include "yb/client/table_creator.h" |
45 | | #include "yb/client/table_info.h" |
46 | | |
47 | | #include "yb/common/common.pb.h" |
48 | | #include "yb/common/transaction.h" |
49 | | #include "yb/common/wire_protocol-test-util.h" |
50 | | |
51 | | #include "yb/integration-tests/external_mini_cluster-itest-base.h" |
52 | | #include "yb/integration-tests/external_mini_cluster.h" |
53 | | |
54 | | #include "yb/master/master_client.pb.h" |
55 | | #include "yb/master/master_defaults.h" |
56 | | #include "yb/master/master_util.h" |
57 | | |
58 | | #include "yb/tserver/tserver_service.pb.h" |
59 | | |
60 | | #include "yb/util/metrics.h" |
61 | | #include "yb/util/path_util.h" |
62 | | #include "yb/util/tsan_util.h" |
63 | | |
64 | | using std::multimap; |
65 | | using std::set; |
66 | | using std::string; |
67 | | using std::vector; |
68 | | using strings::Substitute; |
69 | | using yb::client::YBTableType; |
70 | | using yb::client::YBTableName; |
71 | | |
72 | | METRIC_DECLARE_entity(server); |
73 | | METRIC_DECLARE_entity(tablet); |
74 | | METRIC_DECLARE_gauge_int64(is_raft_leader); |
75 | | METRIC_DECLARE_histogram(handler_latency_yb_tserver_TabletServerAdminService_CreateTablet); |
76 | | METRIC_DECLARE_histogram(handler_latency_yb_tserver_TabletServerAdminService_DeleteTablet); |
77 | | |
78 | | DECLARE_int32(ycql_num_tablets); |
79 | | DECLARE_int32(yb_num_shards_per_tserver); |
80 | | |
81 | | namespace yb { |
82 | | |
83 | | static const YBTableName kTableName(YQL_DATABASE_CQL, "my_keyspace", "test-table"); |
84 | | |
85 | | class CreateTableITest : public ExternalMiniClusterITestBase { |
86 | | public: |
87 | | Status CreateTableWithPlacement( |
88 | | const master::ReplicationInfoPB& replication_info, const string& table_suffix, |
89 | 0 | const YBTableType table_type = YBTableType::YQL_TABLE_TYPE) { |
90 | 0 | auto db_type = master::GetDatabaseTypeForTable( |
91 | 0 | client::ClientToPBTableType(table_type)); |
92 | 0 | RETURN_NOT_OK(client_->CreateNamespaceIfNotExists(kTableName.namespace_name(), db_type)); |
93 | 0 | std::unique_ptr<client::YBTableCreator> table_creator(client_->NewTableCreator()); |
94 | 0 | client::YBSchema client_schema(client::YBSchemaFromSchema(yb::GetSimpleTestSchema())); |
95 | 0 | if (table_type != YBTableType::REDIS_TABLE_TYPE) { |
96 | 0 | table_creator->schema(&client_schema); |
97 | 0 | } |
98 | 0 | return table_creator->table_name( |
99 | 0 | YBTableName(db_type, |
100 | 0 | kTableName.namespace_name(), |
101 | 0 | Substitute("$0:$1", kTableName.table_name(), table_suffix))) |
102 | 0 | .replication_info(replication_info) |
103 | 0 | .table_type(table_type) |
104 | 0 | .wait(true) |
105 | 0 | .Create(); |
106 | 0 | } |
107 | | |
108 | | Result<bool> VerifyTServerTablets(int idx, int num_tablets, int num_leaders, |
109 | 34 | const std::string& table_name, bool verify_leaders) { |
110 | 34 | auto tablets = VERIFY_RESULT(cluster_->GetTablets(cluster_->tablet_server(idx))); |
111 | | |
112 | 34 | int leader_count = 0, tablet_count = 0; |
113 | 301 | for (const auto& tablet : tablets) { |
114 | 301 | if (tablet.table_name() != table_name) { |
115 | 120 | continue; |
116 | 120 | } |
117 | 181 | if (tablet.state() != tablet::RaftGroupStatePB::RUNNING) { |
118 | 0 | return false; |
119 | 0 | } |
120 | 181 | tablet_count++; |
121 | 181 | if (tablet.is_leader()) { |
122 | 65 | leader_count++; |
123 | 65 | } |
124 | 181 | } |
125 | 34 | LOG(INFO) << "For table " << table_name << ", on tserver " << idx << " number of leaders " |
126 | 34 | << leader_count << " number of tablets " << tablet_count; |
127 | 34 | if ((verify_leaders && leader_count != num_leaders) || tablet_count != num_tablets) { |
128 | 12 | return false; |
129 | 12 | } |
130 | 22 | return true; |
131 | 22 | } |
132 | | |
133 | | void PreparePlacementInfo(const std::unordered_map<string, int>& zone_to_replica_count, |
134 | 4 | int num_replicas, master::PlacementInfoPB* placement_info) { |
135 | 4 | placement_info->set_num_replicas(num_replicas); |
136 | 10 | for (const auto& zone_and_count : zone_to_replica_count) { |
137 | 10 | auto* pb = placement_info->add_placement_blocks(); |
138 | 10 | pb->mutable_cloud_info()->set_placement_cloud("c"); |
139 | 10 | pb->mutable_cloud_info()->set_placement_region("r"); |
140 | 10 | pb->mutable_cloud_info()->set_placement_zone(zone_and_count.first); |
141 | 10 | pb->set_min_num_replicas(zone_and_count.second); |
142 | 10 | } |
143 | 4 | } |
144 | | |
145 | 1 | void AddTServerInZone(const string& zone) { |
146 | 1 | vector<std::string> flags = { |
147 | 1 | "--placement_cloud=c", |
148 | 1 | "--placement_region=r", |
149 | 1 | "--placement_zone=" + zone |
150 | 1 | }; |
151 | 1 | ASSERT_OK(cluster_->AddTabletServer(true, flags)); |
152 | 1 | } |
153 | | }; |
154 | | |
155 | | // TODO(bogdan): disabled until ENG-2687 |
156 | 0 | TEST_F(CreateTableITest, DISABLED_TestCreateRedisTable) { |
157 | 0 | const string cloud = "aws"; |
158 | 0 | const string region = "us-west-1"; |
159 | 0 | const string zone = "a"; |
160 | |
|
161 | 0 | const int kNumReplicas = 3; |
162 | 0 | vector<string> flags = {Substitute("--placement_cloud=$0", cloud), |
163 | 0 | Substitute("--placement_region=$0", region), |
164 | 0 | Substitute("--placement_zone=$0", zone)}; |
165 | 0 | ASSERT_NO_FATALS(StartCluster(flags, flags, kNumReplicas)); |
166 | |
|
167 | 0 | master::ReplicationInfoPB replication_info; |
168 | 0 | replication_info.mutable_live_replicas()->set_num_replicas(kNumReplicas); |
169 | 0 | auto* placement_block = replication_info.mutable_live_replicas()->add_placement_blocks(); |
170 | 0 | auto* cloud_info = placement_block->mutable_cloud_info(); |
171 | 0 | cloud_info->set_placement_cloud(cloud); |
172 | 0 | cloud_info->set_placement_region(region); |
173 | 0 | cloud_info->set_placement_zone(zone); |
174 | 0 | placement_block->set_min_num_replicas(kNumReplicas); |
175 | | |
176 | | // Successful table create. |
177 | 0 | ASSERT_OK( |
178 | 0 | CreateTableWithPlacement(replication_info, "success_base", YBTableType::REDIS_TABLE_TYPE)); |
179 | 0 | } |
180 | | |
181 | | // TODO(bogdan): disabled until ENG-2687 |
182 | 0 | TEST_F(CreateTableITest, DISABLED_TestCreateWithPlacement) { |
183 | 0 | const string cloud = "aws"; |
184 | 0 | const string region = "us-west-1"; |
185 | 0 | const string zone = "a"; |
186 | |
|
187 | 0 | const int kNumReplicas = 3; |
188 | 0 | vector<string> flags = {Substitute("--placement_cloud=$0", cloud), |
189 | 0 | Substitute("--placement_region=$0", region), |
190 | 0 | Substitute("--placement_zone=$0", zone)}; |
191 | 0 | ASSERT_NO_FATALS(StartCluster(flags, flags, kNumReplicas)); |
192 | |
|
193 | 0 | master::ReplicationInfoPB replication_info; |
194 | 0 | replication_info.mutable_live_replicas()->set_num_replicas(kNumReplicas); |
195 | 0 | auto* placement_block = replication_info.mutable_live_replicas()->add_placement_blocks(); |
196 | 0 | auto* cloud_info = placement_block->mutable_cloud_info(); |
197 | 0 | cloud_info->set_placement_cloud(cloud); |
198 | 0 | cloud_info->set_placement_region(region); |
199 | 0 | cloud_info->set_placement_zone(zone); |
200 | 0 | placement_block->set_min_num_replicas(kNumReplicas); |
201 | | |
202 | | // Successful table create. |
203 | 0 | ASSERT_OK(CreateTableWithPlacement(replication_info, "success_base")); |
204 | | |
205 | | // Cannot create table with 4 replicas when only 3 TS available. |
206 | 0 | { |
207 | 0 | auto copy_replication_info = replication_info; |
208 | 0 | copy_replication_info.mutable_live_replicas()->set_num_replicas(kNumReplicas + 1); |
209 | 0 | Status s = CreateTableWithPlacement(copy_replication_info, "fail_num_replicas"); |
210 | 0 | ASSERT_TRUE(s.IsInvalidArgument()); |
211 | 0 | } |
212 | | |
213 | | // Cannot create table in locations we have no servers. |
214 | 0 | { |
215 | 0 | auto copy_replication_info = replication_info; |
216 | 0 | auto* new_placement = |
217 | 0 | copy_replication_info.mutable_live_replicas()->mutable_placement_blocks(0); |
218 | 0 | new_placement->mutable_cloud_info()->set_placement_zone("b"); |
219 | 0 | Status s = CreateTableWithPlacement(copy_replication_info, "fail_zone"); |
220 | 0 | ASSERT_TRUE(s.IsTimedOut()); |
221 | 0 | } |
222 | | |
223 | | // Set cluster config placement and test table placement interaction. Right now, this should fail |
224 | | // instantly, as we do not support cluster and table level at the same time. |
225 | 0 | ASSERT_OK(client_->SetReplicationInfo(replication_info)); |
226 | 0 | { |
227 | 0 | Status s = CreateTableWithPlacement(replication_info, "fail_table_placement"); |
228 | 0 | ASSERT_TRUE(s.IsInvalidArgument()); |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | // Regression test for an issue seen when we fail to create a majority of the |
233 | | // replicas in a tablet. Previously, we'd still consider the tablet "RUNNING" |
234 | | // on the master and finish the table creation, even though that tablet would |
235 | | // be stuck forever with its minority never able to elect a leader. |
236 | 1 | TEST_F(CreateTableITest, TestCreateWhenMajorityOfReplicasFailCreation) { |
237 | 1 | const int kNumReplicas = 3; |
238 | 1 | const int kNumTablets = 1; |
239 | 1 | vector<string> ts_flags; |
240 | 1 | vector<string> master_flags; |
241 | 1 | master_flags.push_back("--tablet_creation_timeout_ms=1000"); |
242 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumReplicas)); |
243 | | |
244 | | // Shut down 2/3 of the tablet servers. |
245 | 1 | cluster_->tablet_server(1)->Shutdown(); |
246 | 1 | cluster_->tablet_server(2)->Shutdown(); |
247 | | |
248 | | // Try to create a single-tablet table. |
249 | | // This won't succeed because we can't create enough replicas to get |
250 | | // a quorum. |
251 | 1 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kTableName.namespace_name(), |
252 | 1 | kTableName.namespace_type())); |
253 | 1 | std::unique_ptr<client::YBTableCreator> table_creator(client_->NewTableCreator()); |
254 | 1 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
255 | 1 | ASSERT_OK(table_creator->table_name(kTableName) |
256 | 1 | .schema(&client_schema) |
257 | 1 | .num_tablets(kNumTablets) |
258 | 1 | .wait(false) |
259 | 1 | .Create()); |
260 | | |
261 | | // Sleep until we've seen a couple retries on our live server. |
262 | 1 | int64_t num_create_attempts = 0; |
263 | 36 | while (num_create_attempts < 3) { |
264 | 35 | SleepFor(MonoDelta::FromMilliseconds(100)); |
265 | 35 | num_create_attempts = ASSERT_RESULT(cluster_->tablet_server(0)->GetInt64Metric( |
266 | 35 | &METRIC_ENTITY_server, |
267 | 35 | "yb.tabletserver", |
268 | 35 | &METRIC_handler_latency_yb_tserver_TabletServerAdminService_CreateTablet, |
269 | 35 | "total_count")); |
270 | 35 | LOG(INFO) << "Waiting for the master to retry creating the tablet 3 times... " |
271 | 35 | << num_create_attempts << " RPCs seen so far"; |
272 | | |
273 | | // The CreateTable operation should still be considered in progress, even though |
274 | | // we'll be successful at creating a single replica. |
275 | 35 | bool in_progress = false; |
276 | 35 | ASSERT_OK(client_->IsCreateTableInProgress(kTableName, &in_progress)); |
277 | 35 | ASSERT_TRUE(in_progress); |
278 | 35 | } |
279 | | |
280 | | // Once we restart the servers, we should succeed at creating a healthy |
281 | | // replicated tablet. |
282 | 1 | ASSERT_OK(cluster_->tablet_server(1)->Restart()); |
283 | 1 | ASSERT_OK(cluster_->tablet_server(2)->Restart()); |
284 | | |
285 | | // We should eventually finish the table creation we started earlier. |
286 | 1 | bool in_progress = false; |
287 | 1 | while (in_progress) { |
288 | 0 | LOG(INFO) << "Waiting for the master to successfully create the table..."; |
289 | 0 | ASSERT_OK(client_->IsCreateTableInProgress(kTableName, &in_progress)); |
290 | 0 | SleepFor(MonoDelta::FromMilliseconds(100)); |
291 | 0 | } |
292 | | |
293 | | // The server that was up from the beginning should be left with only |
294 | | // one tablet, eventually, since the tablets which failed to get created |
295 | | // properly should get deleted. |
296 | 1 | vector<string> tablets; |
297 | 1 | int wait_iter = 0; |
298 | 2 | while (tablets.size() != kNumTablets && wait_iter++ < 100) { |
299 | 1 | LOG(INFO) << "Waiting for only " << kNumTablets << " tablet(s) to be left on TS 0. " |
300 | 1 | << "Currently have: " << tablets; |
301 | 1 | SleepFor(MonoDelta::FromMilliseconds(100)); |
302 | 1 | tablets = inspect_->ListTabletsWithDataOnTS(0); |
303 | 1 | } |
304 | 2 | ASSERT_EQ(tablets.size(), kNumTablets) << "Tablets on TS0: " << tablets; |
305 | 1 | } |
306 | | |
307 | | // Ensure that, when a table is created, |
308 | | // the tablets are well spread out across the machines in the cluster. |
309 | 1 | TEST_F(CreateTableITest, TestSpreadReplicasEvenly) { |
310 | 1 | const int kNumServers = 10; |
311 | 1 | const int kNumTablets = 20; |
312 | 1 | vector<string> ts_flags; |
313 | 1 | vector<string> master_flags; |
314 | 1 | ts_flags.push_back("--never_fsync"); // run faster on slow disks |
315 | 1 | master_flags.push_back("--enable_load_balancing=false"); // disable load balancing moves |
316 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumServers)); |
317 | | |
318 | 0 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kTableName.namespace_name(), |
319 | 0 | kTableName.namespace_type())); |
320 | 0 | std::unique_ptr<client::YBTableCreator> table_creator(client_->NewTableCreator()); |
321 | 0 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
322 | 0 | ASSERT_OK(table_creator->table_name(kTableName) |
323 | 0 | .schema(&client_schema) |
324 | 0 | .num_tablets(kNumTablets) |
325 | 0 | .Create()); |
326 | | |
327 | | // Load should be equal on all the 10 servers without any deviation. |
328 | 0 | for (int ts_idx = 0; ts_idx < kNumServers; ts_idx++) { |
329 | 0 | auto num_replicas = inspect_->ListTabletsOnTS(ts_idx).size(); |
330 | 0 | LOG(INFO) << "TS " << ts_idx << " has " << num_replicas << " tablets"; |
331 | 0 | ASSERT_EQ(num_replicas, 6); |
332 | 0 | } |
333 | 0 | } |
334 | | |
335 | 1 | TEST_F(CreateTableITest, TestNoAllocBlacklist) { |
336 | 1 | const int kNumServers = 4; |
337 | 1 | const int kNumTablets = 24; |
338 | 1 | vector<string> ts_flags; |
339 | 1 | vector<string> master_flags; |
340 | 1 | ts_flags.push_back("--never_fsync"); // run faster on slow disks |
341 | 1 | master_flags.push_back("--enable_load_balancing=false"); // disable load balancing moves |
342 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumServers)); |
343 | | // add TServer to blacklist |
344 | 1 | ASSERT_OK(cluster_->AddTServerToBlacklist(cluster_->master(), cluster_->tablet_server(1))); |
345 | | // create table |
346 | 1 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kTableName.namespace_name(), |
347 | 1 | kTableName.namespace_type())); |
348 | 1 | std::unique_ptr<client::YBTableCreator> table_creator(client_->NewTableCreator()); |
349 | 1 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
350 | 1 | ASSERT_OK(table_creator->table_name(kTableName) |
351 | 1 | .schema(&client_schema) |
352 | 1 | .num_tablets(kNumTablets) |
353 | 1 | .Create()); |
354 | | // check that no tablets have been allocated to blacklisted TServer |
355 | 1 | ASSERT_EQ(inspect_->ListTabletsOnTS(1).size(), 0); |
356 | 1 | } |
357 | | |
358 | 1 | TEST_F(CreateTableITest, TableColocationRemoteBootstrapTest) { |
359 | 1 | const int kNumReplicas = 3; |
360 | 1 | string parent_table_id; |
361 | 1 | string tablet_id; |
362 | 1 | vector<string> ts_flags; |
363 | 1 | vector<string> master_flags; |
364 | | |
365 | 1 | ts_flags.push_back("--follower_unavailable_considered_failed_sec=3"); |
366 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumReplicas)); |
367 | 1 | ASSERT_OK( |
368 | 1 | client_->CreateNamespace("colocation_test", boost::none /* db */, "" /* creator */, |
369 | 1 | "" /* ns_id */, "" /* src_ns_id */, |
370 | 1 | boost::none /* next_pg_oid */, nullptr /* txn */, true)); |
371 | | |
372 | 1 | { |
373 | 1 | string ns_id; |
374 | 1 | auto namespaces = ASSERT_RESULT(client_->ListNamespaces(boost::none)); |
375 | 2 | for (const auto& ns : namespaces) { |
376 | 2 | if (ns.name() == "colocation_test") { |
377 | 1 | ns_id = ns.id(); |
378 | 1 | break; |
379 | 1 | } |
380 | 2 | } |
381 | 1 | ASSERT_FALSE(ns_id.empty()); |
382 | 1 | parent_table_id = ns_id + master::kColocatedParentTableIdSuffix; |
383 | 1 | } |
384 | | |
385 | 1 | { |
386 | 1 | google::protobuf::RepeatedPtrField<master::TabletLocationsPB> tablets; |
387 | 1 | ASSERT_OK(WaitFor( |
388 | 1 | [&]() -> bool { |
389 | 1 | EXPECT_OK(client_->GetTabletsFromTableId(parent_table_id, 0, &tablets)); |
390 | 1 | return tablets.size() == 1; |
391 | 1 | }, |
392 | 1 | MonoDelta::FromSeconds(30), "Create colocated tablet")); |
393 | 1 | tablet_id = tablets[0].tablet_id(); |
394 | 1 | } |
395 | | |
396 | 1 | string rocksdb_dir = JoinPathSegments( |
397 | 1 | cluster_->data_root(), "ts-1", "yb-data", "tserver", "data", "rocksdb", |
398 | 1 | "table-" + parent_table_id, "tablet-" + tablet_id); |
399 | 1 | string wal_dir = JoinPathSegments( |
400 | 1 | cluster_->data_root(), "ts-1", "yb-data", "tserver", "wals", "table-" + parent_table_id, |
401 | 1 | "tablet-" + tablet_id); |
402 | 100 | std::function<Result<bool>()> dirs_exist = [&] { |
403 | 100 | return Env::Default()->FileExists(rocksdb_dir) && Env::Default()->FileExists(wal_dir); |
404 | 100 | }; |
405 | | |
406 | 1 | ASSERT_OK(WaitFor(dirs_exist, MonoDelta::FromSeconds(30), "Create data and wal directories")); |
407 | | |
408 | | // Stop a tablet server and create a new tablet server. This will trigger a remote bootstrap on |
409 | | // the new tablet server. |
410 | 1 | cluster_->tablet_server(2)->Shutdown(); |
411 | 1 | ASSERT_OK(cluster_->AddTabletServer()); |
412 | 1 | ASSERT_OK(cluster_->WaitForTabletServerCount(4, MonoDelta::FromSeconds(20))); |
413 | | |
414 | | // Remote bootstrap should create the correct tablet directory for the new tablet server. |
415 | 1 | rocksdb_dir = JoinPathSegments( |
416 | 1 | cluster_->data_root(), "ts-4", "yb-data", "tserver", "data", "rocksdb", |
417 | 1 | "table-" + parent_table_id, "tablet-" + tablet_id); |
418 | 1 | wal_dir = JoinPathSegments( |
419 | 1 | cluster_->data_root(), "ts-4", "yb-data", "tserver", "wals", "table-" + parent_table_id, |
420 | 1 | "tablet-" + tablet_id); |
421 | 1 | ASSERT_OK(WaitFor(dirs_exist, MonoDelta::FromSeconds(100), "Create data and wal directories")); |
422 | 1 | } |
423 | | |
424 | | // Skipping in TSAN because of an error with initdb in TSAN when ysql is enabled |
425 | 1 | TEST_F(CreateTableITest, YB_DISABLE_TEST_IN_TSAN(TablegroupRemoteBootstrapTest)) { |
426 | 1 | const int kNumReplicas = 3; |
427 | 1 | string parent_table_id; |
428 | 1 | string tablet_id; |
429 | 1 | vector<string> ts_flags; |
430 | 1 | vector<string> master_flags; |
431 | 1 | string namespace_name = "tablegroup_test_namespace_name"; |
432 | 1 | TablegroupId tablegroup_id = "tablegroup_test_id00000000000000"; |
433 | 1 | TablespaceId tablespace_id = ""; |
434 | 1 | string namespace_id; |
435 | | |
436 | 1 | ts_flags.push_back("--follower_unavailable_considered_failed_sec=3"); |
437 | 1 | ts_flags.push_back("--ysql_beta_feature_tablegroup=true"); |
438 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumReplicas, 1 /* masters */, |
439 | 1 | true /* enable_ysql (allows load balancing) */)); |
440 | | |
441 | 1 | ASSERT_OK(client_->CreateNamespace(namespace_name, YQL_DATABASE_PGSQL, "" /* creator */, |
442 | 1 | "" /* ns_id */, "" /* src_ns_id */, |
443 | 1 | boost::none /* next_pg_oid */, nullptr /* txn */, false)); |
444 | | |
445 | 1 | { |
446 | 1 | auto namespaces = ASSERT_RESULT(client_->ListNamespaces(boost::none)); |
447 | 8 | for (const auto& ns : namespaces) { |
448 | 8 | if (ns.name() == namespace_name) { |
449 | 1 | namespace_id = ns.id(); |
450 | 1 | break; |
451 | 1 | } |
452 | 8 | } |
453 | 1 | ASSERT_FALSE(namespace_id.empty()); |
454 | 1 | } |
455 | | |
456 | | // Since this is just for testing purposes, we do not bother generating a valid PgsqlTablegroupId |
457 | 1 | ASSERT_OK( |
458 | 1 | client_->CreateTablegroup(namespace_name, namespace_id, tablegroup_id, tablespace_id)); |
459 | | |
460 | | // Now want to ensure that the newly created tablegroup shows up in the list. |
461 | 1 | auto exists = ASSERT_RESULT(client_->TablegroupExists(namespace_name, tablegroup_id)); |
462 | 1 | ASSERT_TRUE(exists); |
463 | 1 | parent_table_id = tablegroup_id + master::kTablegroupParentTableIdSuffix; |
464 | | |
465 | 1 | { |
466 | 1 | google::protobuf::RepeatedPtrField<master::TabletLocationsPB> tablets; |
467 | 1 | ASSERT_OK(WaitFor( |
468 | 1 | [&]() -> bool { |
469 | 1 | EXPECT_OK(client_->GetTabletsFromTableId(parent_table_id, 0, &tablets)); |
470 | 1 | return tablets.size() == 1; |
471 | 1 | }, |
472 | 1 | MonoDelta::FromSeconds(30), "Create tablegroup tablet")); |
473 | 1 | tablet_id = tablets[0].tablet_id(); |
474 | 1 | } |
475 | | |
476 | 1 | string rocksdb_dir = JoinPathSegments( |
477 | 1 | cluster_->data_root(), "ts-1", "yb-data", "tserver", "data", "rocksdb", |
478 | 1 | "table-" + parent_table_id, "tablet-" + tablet_id); |
479 | 1 | string wal_dir = JoinPathSegments( |
480 | 1 | cluster_->data_root(), "ts-1", "yb-data", "tserver", "wals", "table-" + parent_table_id, |
481 | 1 | "tablet-" + tablet_id); |
482 | 79 | std::function<Result<bool>()> dirs_exist = [&] { |
483 | 79 | return Env::Default()->FileExists(rocksdb_dir) && Env::Default()->FileExists(wal_dir); |
484 | 79 | }; |
485 | | |
486 | 1 | ASSERT_OK(WaitFor(dirs_exist, MonoDelta::FromSeconds(30), "Create data and wal directories")); |
487 | | |
488 | | // Stop a tablet server and create a new tablet server. This will trigger a remote bootstrap on |
489 | | // the new tablet server. |
490 | 1 | cluster_->tablet_server(2)->Shutdown(); |
491 | 1 | ASSERT_OK(cluster_->AddTabletServer()); |
492 | 1 | ASSERT_OK(cluster_->WaitForTabletServerCount(4, MonoDelta::FromSeconds(20))); |
493 | | |
494 | | // Remote bootstrap should create the correct tablet directory for the new tablet server. |
495 | 1 | rocksdb_dir = JoinPathSegments( |
496 | 1 | cluster_->data_root(), "ts-4", "yb-data", "tserver", "data", "rocksdb", |
497 | 1 | "table-" + parent_table_id, "tablet-" + tablet_id); |
498 | 1 | wal_dir = JoinPathSegments( |
499 | 1 | cluster_->data_root(), "ts-4", "yb-data", "tserver", "wals", "table-" + parent_table_id, |
500 | 1 | "tablet-" + tablet_id); |
501 | 1 | ASSERT_OK(WaitFor(dirs_exist, MonoDelta::FromSeconds(100), "Create data and wal directories")); |
502 | 1 | } |
503 | | |
504 | 1 | TEST_F(CreateTableITest, TestIsRaftLeaderMetric) { |
505 | 1 | const int kNumReplicas = 3; |
506 | 1 | const int kNumTablets = 1; |
507 | 1 | const int kExpectedRaftLeaders = 1; |
508 | 1 | vector<string> ts_flags; |
509 | 1 | vector<string> master_flags; |
510 | 1 | master_flags.push_back("--tablet_creation_timeout_ms=1000"); |
511 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumReplicas)); |
512 | 1 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kTableName.namespace_name(), |
513 | 1 | kTableName.namespace_type())); |
514 | 1 | std::unique_ptr<client::YBTableCreator> table_creator(client_->NewTableCreator()); |
515 | 1 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
516 | | |
517 | | // create a table |
518 | 1 | ASSERT_OK(table_creator->table_name(kTableName) |
519 | 1 | .schema(&client_schema) |
520 | 1 | .num_tablets(kNumTablets) |
521 | 1 | .Create()); |
522 | | |
523 | | // Count the total Number of Raft Leaders in the cluster. Go through each tablet of every |
524 | | // tablet-server and sum up the leaders. |
525 | 1 | int64_t kNumRaftLeaders = 0; |
526 | 4 | for (size_t i = 0 ; i < kNumReplicas; i++) { |
527 | 3 | auto tablet_ids = ASSERT_RESULT(cluster_->GetTabletIds(cluster_->tablet_server(i))); |
528 | 6 | for(size_t ti = 0; ti < inspect_->ListTabletsOnTS(i).size(); ti++) { |
529 | 3 | const char *tabletId = tablet_ids[ti].c_str(); |
530 | 3 | kNumRaftLeaders += ASSERT_RESULT(cluster_->tablet_server(i)->GetInt64Metric( |
531 | 3 | &METRIC_ENTITY_tablet, tabletId, &METRIC_is_raft_leader, "value")); |
532 | 3 | } |
533 | 3 | } |
534 | 1 | ASSERT_EQ(kNumRaftLeaders, kExpectedRaftLeaders); |
535 | 1 | } |
536 | | |
537 | | // In TSAN, currently, initdb isn't created during build but on first start. |
538 | | // As a result transaction table gets created without waiting for the requisite |
539 | | // number of TS. |
540 | 1 | TEST_F(CreateTableITest, YB_DISABLE_TEST_IN_TSAN(TestTransactionStatusTableCreation)) { |
541 | | // Set up an RF 1. |
542 | | // Tell the Master leader to wait for 3 TS to join before creating the |
543 | | // transaction status table. |
544 | 1 | vector<string> master_flags = { |
545 | 1 | "--txn_table_wait_min_ts_count=3" |
546 | 1 | }; |
547 | | // We also need to enable ysql. |
548 | 1 | ASSERT_NO_FATALS(StartCluster({}, master_flags, 1, 1, true)); |
549 | | |
550 | | // Check that the transaction table hasn't been created yet. |
551 | 1 | YQLDatabase db = YQL_DATABASE_CQL; |
552 | 1 | YBTableName transaction_status_table(db, master::kSystemNamespaceId, |
553 | 1 | master::kSystemNamespaceName, kGlobalTransactionsTableName); |
554 | 1 | bool exists = ASSERT_RESULT(client_->TableExists(transaction_status_table)); |
555 | 2 | ASSERT_FALSE(exists) << "Transaction table exists even though the " |
556 | 2 | "requirement for the minimum number of TS not met"; |
557 | | |
558 | | // Add two tservers. |
559 | 1 | ASSERT_OK(cluster_->AddTabletServer()); |
560 | 1 | ASSERT_OK(cluster_->AddTabletServer()); |
561 | | |
562 | 1 | auto tbl_exists = [&]() -> Result<bool> { |
563 | 1 | return client_->TableExists(transaction_status_table); |
564 | 1 | }; |
565 | | |
566 | 1 | ASSERT_OK(WaitFor(tbl_exists, 30s * kTimeMultiplier, |
567 | 1 | "Transaction table doesn't exist even though the " |
568 | 1 | "requirement for the minimum number of TS met")); |
569 | 1 | } |
570 | | |
571 | 1 | TEST_F(CreateTableITest, TestCreateTableWithDefinedPartition) { |
572 | 1 | const int kNumReplicas = 3; |
573 | 1 | const int kNumTablets = 2; |
574 | | |
575 | 1 | const int kNumPartitions = kNumTablets; |
576 | | |
577 | 1 | vector<string> ts_flags; |
578 | 1 | vector<string> master_flags; |
579 | 1 | ts_flags.push_back("--never_fsync"); // run faster on slow disks |
580 | 1 | master_flags.push_back("--enable_load_balancing=false"); // disable load balancing moves |
581 | 1 | ASSERT_NO_FATALS(StartCluster(ts_flags, master_flags, kNumReplicas)); |
582 | | |
583 | 1 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kTableName.namespace_name(), |
584 | 1 | kTableName.namespace_type())); |
585 | 1 | std::unique_ptr<client::YBTableCreator> table_creator(client_->NewTableCreator()); |
586 | 1 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
587 | | |
588 | | // Allocate the partitions. |
589 | 1 | Partition partitions[kNumPartitions]; |
590 | 1 | const uint16_t interval = PartitionSchema::kMaxPartitionKey / (kNumPartitions + 1); |
591 | | |
592 | 1 | partitions[0].set_partition_key_end(PartitionSchema::EncodeMultiColumnHashValue(interval)); |
593 | 1 | partitions[1].set_partition_key_start(PartitionSchema::EncodeMultiColumnHashValue(interval)); |
594 | | |
595 | | // create a table |
596 | 1 | ASSERT_OK(table_creator->table_name(kTableName) |
597 | 1 | .schema(&client_schema) |
598 | 1 | .num_tablets(kNumTablets) |
599 | 1 | .add_partition(partitions[0]) |
600 | 1 | .add_partition(partitions[1]) |
601 | 1 | .Create()); |
602 | | |
603 | 1 | google::protobuf::RepeatedPtrField<yb::master::TabletLocationsPB> tablets; |
604 | 1 | ASSERT_OK(client_->GetTablets( |
605 | 1 | kTableName, -1, &tablets, /* partition_list_version =*/ nullptr, |
606 | 1 | RequireTabletsRunning::kFalse)); |
607 | 3 | for (int i = 0 ; i < kNumPartitions; ++i) { |
608 | 2 | Partition p; |
609 | 2 | Partition::FromPB(tablets[i].partition(), &p); |
610 | 2 | ASSERT_TRUE(partitions[i].BoundsEqualToPartition(p)); |
611 | 2 | } |
612 | 1 | } |
613 | | |
614 | 1 | TEST_F(CreateTableITest, TestNumTabletsFlags) { |
615 | | // Start an RF 3. |
616 | 1 | const int kNumReplicas = 3; |
617 | 1 | const int kNumTablets = 6; |
618 | 1 | const string kNamespaceName = "my_keyspace"; |
619 | 1 | const YQLDatabase kNamespaceType = YQL_DATABASE_CQL; |
620 | 1 | const string kTableName1 = "test-table1"; |
621 | 1 | const string kTableName2 = "test-table2"; |
622 | 1 | const string kTableName3 = "test-table3"; |
623 | | |
624 | | // Set the value of the flags. |
625 | 1 | FLAGS_ycql_num_tablets = 1; |
626 | 1 | FLAGS_yb_num_shards_per_tserver = 3; |
627 | | // Start an RF3. |
628 | 1 | ASSERT_NO_FATALS(StartCluster({}, {}, kNumReplicas)); |
629 | | |
630 | | // Create a namespace for all the tables. |
631 | 1 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kNamespaceName, kNamespaceType)); |
632 | | // One common schema for all the tables. |
633 | 1 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
634 | | |
635 | | // Test 1: Create a table with explicit tablet count. |
636 | 1 | YBTableName table_name1(kNamespaceType, kNamespaceName, kTableName1); |
637 | 1 | std::unique_ptr<client::YBTableCreator> table_creator1(client_->NewTableCreator()); |
638 | 1 | ASSERT_OK(table_creator1->table_name(table_name1) |
639 | 1 | .schema(&client_schema) |
640 | 1 | .num_tablets(kNumTablets) |
641 | 1 | .wait(true) |
642 | 1 | .Create()); |
643 | | |
644 | | // Verify that number of tablets is 6 instead of 1. |
645 | 1 | google::protobuf::RepeatedPtrField<yb::master::TabletLocationsPB> tablets; |
646 | 1 | ASSERT_OK(client_->GetTablets( |
647 | 1 | table_name1, -1, &tablets, /* partition_list_version =*/ nullptr, |
648 | 1 | RequireTabletsRunning::kFalse)); |
649 | 1 | ASSERT_EQ(tablets.size(), 6); |
650 | | |
651 | | // Test 2: Create another table without explicit number of tablets. |
652 | 1 | YBTableName table_name2(kNamespaceType, kNamespaceName, kTableName2); |
653 | 1 | std::unique_ptr<client::YBTableCreator> table_creator2(client_->NewTableCreator()); |
654 | 1 | ASSERT_OK(table_creator2->table_name(table_name2) |
655 | 1 | .schema(&client_schema) |
656 | 1 | .wait(true) |
657 | 1 | .Create()); |
658 | | |
659 | | // Verify that number of tablets is 1. |
660 | 1 | tablets.Clear(); |
661 | 1 | ASSERT_OK(client_->GetTablets( |
662 | 1 | table_name2, -1, &tablets, /* partition_list_version =*/ nullptr, |
663 | 1 | RequireTabletsRunning::kFalse)); |
664 | 1 | ASSERT_EQ(tablets.size(), 1); |
665 | | |
666 | | // Reset the value of the flag. |
667 | 1 | FLAGS_ycql_num_tablets = -1; |
668 | | |
669 | | // Test 3: Create a table without explicit tablet count. |
670 | 1 | YBTableName table_name3(kNamespaceType, kNamespaceName, kTableName3); |
671 | 1 | std::unique_ptr<client::YBTableCreator> table_creator3(client_->NewTableCreator()); |
672 | 1 | ASSERT_OK(table_creator3->table_name(table_name3) |
673 | 1 | .schema(&client_schema) |
674 | 1 | .wait(true) |
675 | 1 | .Create()); |
676 | | |
677 | | // Verify that number of tablets is 6 instead of 1. |
678 | 1 | tablets.Clear(); |
679 | 1 | ASSERT_OK(client_->GetTablets( |
680 | 1 | table_name3, -1, &tablets, /* partition_list_version =*/ nullptr, |
681 | 1 | RequireTabletsRunning::kFalse)); |
682 | 1 | ASSERT_EQ(tablets.size(), 9); |
683 | 1 | } |
684 | | |
685 | 1 | TEST_F(CreateTableITest, OnlyMajorityReplicasWithoutPlacement) { |
686 | 1 | const int kNumTablets = 6; |
687 | 1 | const string kNamespaceName = "my_keyspace"; |
688 | 1 | const YQLDatabase kNamespaceType = YQL_DATABASE_CQL; |
689 | 1 | const string kTableName = "test-table"; |
690 | 1 | const string kTableName2 = "test-table2"; |
691 | 1 | std::unordered_set<int> stopped_tservers; |
692 | 1 | int num_tservers = 3; |
693 | 1 | int num_alive_tservers = 0; |
694 | | |
695 | | // Start an RF3. |
696 | 1 | vector<std::string> master_flags = { |
697 | 1 | "--tserver_unresponsive_timeout_ms=5000" |
698 | 1 | }; |
699 | 1 | ASSERT_NO_FATALS(StartCluster({}, master_flags, num_tservers)); |
700 | 1 | num_alive_tservers = 3; |
701 | 1 | LOG(INFO) << "Started an RF3 cluster with 3 tservers and 1 master"; |
702 | | |
703 | | // Stop a node. |
704 | 1 | ASSERT_OK(cluster_->tablet_server(2)->Pause()); |
705 | 1 | LOG(INFO) << "Paused tserver index 2"; |
706 | | |
707 | | // Wait for the master leader to mark it dead. |
708 | 1 | ASSERT_OK(cluster_->WaitForMasterToMarkTSDead(2)); |
709 | 1 | stopped_tservers.emplace(2); |
710 | 1 | --num_alive_tservers; |
711 | 1 | LOG(INFO) << "TServer index 2 is now marked DEAD by the leader master"; |
712 | | |
713 | | // Now issue a create table. |
714 | | // Create a namespace. |
715 | 1 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kNamespaceName, kNamespaceType)); |
716 | 1 | LOG(INFO) << "Created YQL Namespace " << kNamespaceName; |
717 | | |
718 | 1 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
719 | | |
720 | 1 | YBTableName table_name(kNamespaceType, kNamespaceName, kTableName); |
721 | 1 | std::unique_ptr<client::YBTableCreator> table_creator1(client_->NewTableCreator()); |
722 | 1 | ASSERT_OK(table_creator1->table_name(table_name) |
723 | 1 | .schema(&client_schema) |
724 | 1 | .num_tablets(kNumTablets) |
725 | 1 | .wait(true) |
726 | 1 | .Create()); |
727 | 1 | LOG(INFO) << "Created table " << kNamespaceName << "." << kTableName; |
728 | | |
729 | | // Verify that each tserver contains kNumTablets with kNumTablets/2 leaders. |
730 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
731 | 1 | for (int i = 0; i < num_tservers; i++) { |
732 | 1 | if (stopped_tservers.count(i)) { |
733 | 1 | continue; |
734 | 1 | } |
735 | 1 | if (!VERIFY_RESULT(VerifyTServerTablets( |
736 | 1 | i, kNumTablets, kNumTablets / num_alive_tservers, kTableName, true))) { |
737 | 1 | return false; |
738 | 1 | } |
739 | 1 | } |
740 | 1 | return true; |
741 | 1 | }, 120s * kTimeMultiplier, "Are tablets running", 1s)); |
742 | | |
743 | | // Stop another node. Create table should now fail. |
744 | 1 | ASSERT_OK(cluster_->tablet_server(1)->Pause()); |
745 | 1 | LOG(INFO) << "Paused tserver index 1"; |
746 | | |
747 | | // Wait for the master leader to mark it dead. |
748 | 1 | ASSERT_OK(cluster_->WaitForMasterToMarkTSDead(1)); |
749 | 1 | stopped_tservers.emplace(1); |
750 | 1 | --num_alive_tservers; |
751 | 1 | LOG(INFO) << "TServer index 1 is now marked DEAD by the leader master"; |
752 | | |
753 | 1 | YBTableName table_name2(kNamespaceType, kNamespaceName, kTableName2); |
754 | 1 | std::unique_ptr<client::YBTableCreator> table_creator2(client_->NewTableCreator()); |
755 | 1 | ASSERT_NOK(table_creator2->table_name(table_name2) |
756 | 1 | .schema(&client_schema) |
757 | 1 | .num_tablets(kNumTablets) |
758 | 1 | .wait(true) |
759 | 1 | .timeout(10s * kTimeMultiplier) |
760 | 1 | .Create()); |
761 | | |
762 | | // Now resume the paused tservers. |
763 | 1 | ASSERT_OK(cluster_->tablet_server(2)->Resume()); |
764 | 1 | ASSERT_OK(cluster_->tablet_server(1)->Resume()); |
765 | 1 | stopped_tservers.erase(2); |
766 | 1 | stopped_tservers.erase(1); |
767 | 1 | ++num_alive_tservers; |
768 | 1 | ++num_alive_tservers; |
769 | 1 | LOG(INFO) << "Tablet Server 2 and 1 resumed"; |
770 | | |
771 | | // Verify each tserver getting kNumTablets with leadership of kNumTablets/3. |
772 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
773 | 1 | for (int i = 0; i < num_tservers; i++) { |
774 | 1 | if (!VERIFY_RESULT(VerifyTServerTablets( |
775 | 1 | i, kNumTablets, kNumTablets / num_alive_tservers, kTableName, true))) { |
776 | 1 | return false; |
777 | 1 | } |
778 | 1 | } |
779 | 1 | return true; |
780 | 1 | }, 120s * kTimeMultiplier, "Are tablets running", 1s)); |
781 | 1 | } |
782 | | |
783 | 1 | TEST_F(CreateTableITest, OnlyMajorityReplicasWithPlacement) { |
784 | 1 | const int kNumTablets = 6; |
785 | 1 | const string kNamespaceName = "my_keyspace"; |
786 | 1 | const YQLDatabase kNamespaceType = YQL_DATABASE_CQL; |
787 | 1 | const string kTableName1 = "test-table1"; |
788 | 1 | const string kTableName2 = "test-table2"; |
789 | 1 | const string kTableName3 = "test-table3"; |
790 | 1 | const string kTableName4 = "test-table4"; |
791 | 1 | const string kTableName5 = "test-table5"; |
792 | 1 | std::unordered_set<int> stopped_tservers; |
793 | 1 | int num_tservers = 3; |
794 | 1 | int num_alive_tservers = 0; |
795 | | |
796 | 1 | vector<std::string> master_flags = { |
797 | 1 | "--tserver_unresponsive_timeout_ms=5000" |
798 | 1 | }; |
799 | 1 | vector<std::string> tserver_flags = { |
800 | 1 | "--placement_cloud=c", |
801 | 1 | "--placement_region=r", |
802 | 1 | "--placement_zone=z${index}" |
803 | 1 | }; |
804 | | |
805 | | // Test - 1. |
806 | | // Placement Policy: c.r.z1:1, c.r.z2:1, c.r.z3:1 with num_replicas as 3. |
807 | | // Available tservers: 1 in c.r.z1 and 1 in c.r.z2. |
808 | | // Result: Create Table should succeed. |
809 | | |
810 | | // Start an RF3 with tservers placed in "c.r.z0,c.r.z1,c.r.z2". |
811 | 1 | ASSERT_NO_FATALS(StartCluster(tserver_flags, master_flags, 3)); |
812 | 1 | num_alive_tservers = 3; |
813 | 1 | LOG(INFO) << "Started an RF3 cluster with 3 tservers in c.r.z0,c.r.z1,c.r.z2 and 1 master"; |
814 | | |
815 | | // Modify placement info to contain at least one replica in each of the three zones. |
816 | 1 | master::ReplicationInfoPB replication_info; |
817 | 1 | auto* placement_info = replication_info.mutable_live_replicas(); |
818 | 1 | PreparePlacementInfo({ {"z0", 1}, {"z1", 1}, {"z2", 1} }, 3, placement_info); |
819 | | |
820 | 1 | ASSERT_OK(client_->SetReplicationInfo(replication_info)); |
821 | 1 | LOG(INFO) << "Set replication info to c.r.z0,c.r.z1,c.r.z2 with num_replicas as 3"; |
822 | | |
823 | | // Create a namespace. |
824 | 1 | ASSERT_OK(client_->CreateNamespaceIfNotExists(kNamespaceName, kNamespaceType)); |
825 | 1 | LOG(INFO) << "Created YQL Namespace " << kNamespaceName; |
826 | | |
827 | | // Create a schema. |
828 | 1 | client::YBSchema client_schema(client::YBSchemaFromSchema(GetSimpleTestSchema())); |
829 | 1 | LOG(INFO) << "Created schema for tables"; |
830 | | |
831 | | // Bring down one tserver in z2. |
832 | 1 | ASSERT_OK(cluster_->tablet_server(2)->Pause()); |
833 | 1 | LOG(INFO) << "Paused tserver with index 2"; |
834 | | |
835 | | // Wait for the master leader to mark them dead. |
836 | 1 | ASSERT_OK(cluster_->WaitForMasterToMarkTSDead(2)); |
837 | 1 | stopped_tservers.emplace(2); |
838 | 1 | --num_alive_tservers; |
839 | 1 | LOG(INFO) << "Tserver index 2 is now marked DEAD by the leader master"; |
840 | | |
841 | | // Issue a create table request, it should succeed. |
842 | 1 | YBTableName table_name1(kNamespaceType, kNamespaceName, kTableName1); |
843 | 1 | std::unique_ptr<client::YBTableCreator> table_creator1(client_->NewTableCreator()); |
844 | 1 | ASSERT_OK(table_creator1->table_name(table_name1) |
845 | 1 | .schema(&client_schema) |
846 | 1 | .num_tablets(kNumTablets) |
847 | 1 | .wait(true) |
848 | 1 | .Create()); |
849 | 1 | LOG(INFO) << "Created table " << kNamespaceName << "." << kTableName1; |
850 | | |
851 | | // Verify that each tserver contains kNumTablets with kNumTablets/2 leaders. |
852 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
853 | 1 | for (int i = 0; i < num_tservers; i++) { |
854 | 1 | if (stopped_tservers.count(i)) { |
855 | 1 | continue; |
856 | 1 | } |
857 | 1 | if (!VERIFY_RESULT(VerifyTServerTablets( |
858 | 1 | i, kNumTablets, kNumTablets / num_alive_tservers, kTableName1, true))) { |
859 | 1 | return false; |
860 | 1 | } |
861 | 1 | } |
862 | 1 | return true; |
863 | 1 | }, 120s * kTimeMultiplier, "Are tablets running", 1s)); |
864 | | |
865 | | // Test - 2. |
866 | | // Placement Policy: c.r.z1:1, c.r.z2:1, c.r.z3:1 with num_replicas as 3. |
867 | | // Available tservers: 1 in c.r.z1. |
868 | | // Result: CreateTable will fail because we don't have a raft quorum underneath. |
869 | | |
870 | | // Bring down another tserver, create table should now fail. |
871 | 1 | ASSERT_OK(cluster_->tablet_server(1)->Pause()); |
872 | 1 | LOG(INFO) << "Paused tserver with index 1"; |
873 | | |
874 | | // Wait for the master leader to mark them dead. |
875 | 1 | ASSERT_OK(cluster_->WaitForMasterToMarkTSDead(1)); |
876 | 1 | stopped_tservers.emplace(1); |
877 | 1 | --num_alive_tservers; |
878 | 1 | LOG(INFO) << "Tserver index 1 is now marked DEAD by the leader master"; |
879 | | |
880 | 1 | YBTableName table_name2(kNamespaceType, kNamespaceName, kTableName2); |
881 | 1 | std::unique_ptr<client::YBTableCreator> table_creator2(client_->NewTableCreator()); |
882 | 1 | ASSERT_NOK(table_creator2->table_name(table_name2) |
883 | 1 | .schema(&client_schema) |
884 | 1 | .num_tablets(kNumTablets) |
885 | 1 | .wait(true) |
886 | 1 | .timeout(10s * kTimeMultiplier) |
887 | 1 | .Create()); |
888 | | |
889 | | // Test - 3. |
890 | | // Placement Policy: c.r.z1:1, c.r.z2:1, c.r.z3:1 with num_replicas as 3. |
891 | | // Available tservers: 2 in c.r.z1. |
892 | | // Result: Create Table will not succeed. |
893 | | |
894 | | // Add another tserver in c.r.z0. Create table should still fail after adding. |
895 | 1 | AddTServerInZone("z0"); |
896 | 1 | ASSERT_OK(cluster_->WaitForTabletServerCount(++num_tservers, MonoDelta::FromSeconds(20))); |
897 | 1 | ASSERT_OK(cluster_->WaitForMasterToMarkTSAlive(3)); |
898 | 1 | ++num_alive_tservers; |
899 | | |
900 | 1 | YBTableName table_name3(kNamespaceType, kNamespaceName, kTableName3); |
901 | 1 | std::unique_ptr<client::YBTableCreator> table_creator3(client_->NewTableCreator()); |
902 | 1 | ASSERT_NOK(table_creator3->table_name(table_name3) |
903 | 1 | .schema(&client_schema) |
904 | 1 | .num_tablets(kNumTablets) |
905 | 1 | .wait(true) |
906 | 1 | .timeout(10s * kTimeMultiplier) |
907 | 1 | .Create()); |
908 | | |
909 | | // Test - 4. |
910 | | // Placement Policy: c.r.z1:1, c.r.z2:1, c.r.z3:1 with num_replicas as 5. |
911 | | // Available tservers: 2 in c.r.z1 and 1 in c.r.z2. |
912 | | // Result: Create Table should succeed. |
913 | | |
914 | | // Increase the number of replicas to 5 with the same placement config. |
915 | 1 | master::ReplicationInfoPB replication_info2; |
916 | 1 | auto* placement_info2 = replication_info2.mutable_live_replicas(); |
917 | 1 | PreparePlacementInfo({ {"z0", 1}, {"z1", 1}, {"z2", 1} }, 5, placement_info2); |
918 | | |
919 | 1 | ASSERT_OK(client_->SetReplicationInfo(replication_info2)); |
920 | 1 | LOG(INFO) << "Set replication info to c.r.z0,c.r.z1,c.r.z2 with num_replicas as 5"; |
921 | | |
922 | | // Now resume tserver 2 and wait for master to mark it alive. |
923 | 1 | ASSERT_OK(cluster_->tablet_server(2)->Resume()); |
924 | 1 | ASSERT_OK(cluster_->WaitForMasterToMarkTSAlive(2)); |
925 | 1 | ++num_alive_tservers; |
926 | 1 | LOG(INFO) << "Tablet Server index 2 resumed"; |
927 | | |
928 | | // Create table should now succeed. |
929 | 1 | YBTableName table_name4(kNamespaceType, kNamespaceName, kTableName4); |
930 | 1 | std::unique_ptr<client::YBTableCreator> table_creator4(client_->NewTableCreator()); |
931 | 1 | ASSERT_OK(table_creator4->table_name(table_name4) |
932 | 1 | .schema(&client_schema) |
933 | 1 | .num_tablets(kNumTablets) |
934 | 1 | .wait(true) |
935 | 1 | .Create()); |
936 | | // Validate data. |
937 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
938 | 1 | for (int i = 0; i < num_tservers; i++) { |
939 | 1 | if (stopped_tservers.count(i)) { |
940 | 1 | continue; |
941 | 1 | } |
942 | 1 | if (!VERIFY_RESULT(VerifyTServerTablets( |
943 | 1 | i, kNumTablets, kNumTablets / num_alive_tservers, kTableName4, true))) { |
944 | 1 | return false; |
945 | 1 | } |
946 | 1 | } |
947 | 1 | return true; |
948 | 1 | }, 120s * kTimeMultiplier, "Are tablets running", 1s)); |
949 | | |
950 | | // Test - 5. |
951 | | // Placement Policy: c.r.z1:1, c.r.z2:1, c.r.z3:1 with num_replicas as 5 as live replicas |
952 | | // and c.r.z4:1 with num_replicas as 1 as read_replica. |
953 | | // Available tservers: 2 in c.r.z1 and 1 in c.r.z2. |
954 | | // Result: Create Table should succeed despite having 0 read replica nodes. |
955 | | |
956 | | // Modify Placement info to contain a read replica also. |
957 | 1 | master::ReplicationInfoPB replication_info3; |
958 | 1 | auto* placement_info3 = replication_info3.mutable_live_replicas(); |
959 | 1 | PreparePlacementInfo({ {"z0", 1}, {"z1", 1}, {"z2", 1} }, 5, placement_info3); |
960 | 1 | auto* read_placement_info = replication_info3.add_read_replicas(); |
961 | 1 | read_placement_info->set_placement_uuid("read-replica"); |
962 | 1 | PreparePlacementInfo({ {"z4", 1} }, 1, read_placement_info); |
963 | 1 | ASSERT_OK(client_->SetReplicationInfo(replication_info3)); |
964 | 1 | LOG(INFO) << "Set replication info to " << replication_info3.ShortDebugString(); |
965 | | |
966 | | // Try creating a table. It should succeed. |
967 | 1 | YBTableName table_name5(kNamespaceType, kNamespaceName, kTableName5); |
968 | 1 | std::unique_ptr<client::YBTableCreator> table_creator5(client_->NewTableCreator()); |
969 | 1 | ASSERT_OK(table_creator1->table_name(table_name5) |
970 | 1 | .schema(&client_schema) |
971 | 1 | .num_tablets(kNumTablets) |
972 | 1 | .wait(true) |
973 | 1 | .Create()); |
974 | | |
975 | | // Resume tserver 1. |
976 | 1 | ASSERT_OK(cluster_->tablet_server(1)->Resume()); |
977 | 1 | stopped_tservers.erase(1); |
978 | 1 | ++num_alive_tservers; |
979 | 1 | LOG(INFO) << "Tablet server index 1 resumed"; |
980 | | |
981 | | // LB should move data to this fourth server also. |
982 | 1 | ASSERT_OK(WaitFor([&]() -> Result<bool> { |
983 | 1 | for (int i = 0; i < num_tservers; i++) { |
984 | 1 | if (stopped_tservers.count(i)) { |
985 | 1 | continue; |
986 | 1 | } |
987 | 1 | if (!VERIFY_RESULT(VerifyTServerTablets( |
988 | 1 | i, kNumTablets, kNumTablets / num_alive_tservers, kTableName4, false))) { |
989 | 1 | return false; |
990 | 1 | } |
991 | 1 | } |
992 | 1 | return true; |
993 | 1 | }, 120s * kTimeMultiplier, "Are tablets running", 1s)); |
994 | 1 | } |
995 | | |
996 | | } // namespace yb |