Skip to content

Commit cf4b3d9

Browse files
Add v2 FieldEntry #1.
PiperOrigin-RevId: 661433949
1 parent c5f6231 commit cf4b3d9

5 files changed

+572
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
2+
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
3+
4+
#include <cstdint>
5+
#include <limits>
6+
7+
#include "absl/log/absl_check.h"
8+
9+
namespace google {
10+
namespace protobuf {
11+
namespace internal {
12+
namespace v2 {
13+
14+
// Field layout enums.
15+
//
16+
// Structural information about fields is packed into a 8-bit value. The enum
17+
// types below represent bitwise fields, along with their respective widths,
18+
// shifts, and masks. To pack into one byte, some mutually exclusive types share
19+
// bits in [5, 7].
20+
//
21+
// <<Numeric Fields>>
22+
// Bit:
23+
// +---------------+---------------+
24+
// |7 ... 4|3 ... 0|
25+
// +---------------+---------------+
26+
// : . : . : . : . : 3|===========| [3] FieldKind
27+
// : . : . : 5|=======| . : . : . : [2] Cardinality
28+
// : . : 6|===| . : . : . : . : . : [1] NumericKind
29+
// +---------------+---------------+
30+
//
31+
// <<Message Fields>>
32+
// Bit:
33+
// +---------------+---------------+
34+
// |7 ... 4|3 ... 0|
35+
// +---------------+---------------+
36+
// : . : . : . : . : 3|===========| [3] FieldKind
37+
// : . : . : 5|=======| . : . : . : [2] Cardinality
38+
// : 7|=======| . : . : . : . : . : [2] MessageKind
39+
// +---------------+---------------+
40+
//
41+
// <<String Fields>>
42+
// Bit:
43+
// +---------------+---------------+
44+
// |7 ... 4|3 ... 0|
45+
// +---------------+---------------+
46+
// : . : . : . : . : 3|===========| [3] FieldKind
47+
// : . : . : 5|=======| . : . : . : [2] Cardinality
48+
// |===========| . : . : . : . : . : [3] StringKind
49+
// +---------------+---------------+
50+
//
51+
52+
// clang-format off
53+
54+
// FieldKind (3 bits):
55+
// These values broadly represent a wire type and an in-memory storage class.
56+
namespace FieldKind {
57+
inline constexpr int kShift = 0;
58+
inline constexpr int kBits = 3;
59+
inline constexpr int kMask = ((1 << kBits) - 1) << kShift;
60+
61+
enum Kinds : uint8_t {
62+
kFixed8 = 0, // bool
63+
kFixed16, // place holder
64+
kFixed32, // (s|u)?int32, (s)?fixed32, float, enum
65+
kFixed64, // (s|u)?int64, (s)?fixed64, double
66+
kBytes, // bytes
67+
kString, // string
68+
kMessage, // group, message
69+
kMap, // map<...>
70+
};
71+
72+
static_assert(kMap < (1 << kBits), "too many types");
73+
} // namespace FieldKind
74+
75+
// Cardinality (2 bits):
76+
// These values determine how many values a field can have and its presence.
77+
namespace Cardinality {
78+
inline constexpr int kShift = FieldKind::kShift + FieldKind::kBits;
79+
inline constexpr int kBits = 2;
80+
inline constexpr int kMask = ((1 << kBits) - 1) << kShift;
81+
82+
enum Kinds : uint8_t {
83+
kSingular = 0,
84+
kOptional = 1 << kShift,
85+
kRepeated = 2 << kShift,
86+
kOneof = 3 << kShift,
87+
};
88+
} // namespace Cardinality
89+
90+
// NumericKind, MessageKind, StringKind are mutually exclusive and share the
91+
// same bit-space (i.e. the same shift).
92+
93+
// NumericKind (1 bit):
94+
// Indicates whether a numeric is signed.
95+
namespace NumericKind {
96+
inline constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
97+
inline constexpr int kBits = 1;
98+
inline constexpr int kMask = ((1 << kBits) - 1) << kShift;
99+
100+
enum Kinds : uint8_t {
101+
kUnsigned = 0,
102+
kSigned = 1 << kShift,
103+
};
104+
} // namespace NumericKind
105+
106+
// MessageKind (2 bits):
107+
// Indicates if it's LazyField or eager message / group.
108+
namespace MessageKind {
109+
inline constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
110+
inline constexpr int kBits = 2;
111+
inline constexpr int kMask = ((1 << kBits) - 1) << kShift;
112+
113+
enum Kinds : uint8_t {
114+
kEager = 0,
115+
kLazy = 1 << kShift,
116+
kGroup = 2 << kShift,
117+
};
118+
} // namespace MessageKind
119+
120+
// StringKind (3 bits):
121+
// Indicates if it's LazyField or eager message / group.
122+
namespace StringKind {
123+
inline constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
124+
inline constexpr int kBits = 3;
125+
inline constexpr int kMask = ((1 << kBits) - 1) << kShift;
126+
127+
enum Kinds : uint8_t {
128+
kArenaPtr = 0,
129+
kInlined = 1 << kShift,
130+
kView = 2 << kShift,
131+
kCord = 3 << kShift,
132+
kStringPiece = 4 << kShift,
133+
kStringPtr = 5 << kShift,
134+
};
135+
} // namespace StringKind
136+
137+
// Convenience aliases except cardinality (8 bits, with format):
138+
enum FieldType : uint8_t {
139+
// Numeric types:
140+
kBool = 0 | FieldKind::kFixed8 | NumericKind::kUnsigned,
141+
142+
kInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
143+
kSInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
144+
kSFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
145+
kUInt32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
146+
kFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
147+
kFloat = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
148+
kEnum = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
149+
150+
kInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
151+
kSInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
152+
kSFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
153+
kUInt64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
154+
kFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
155+
kDouble = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
156+
157+
// String types:
158+
kBytes = FieldKind::kBytes,
159+
kString = FieldKind::kString,
160+
161+
// Message types:
162+
kMessage = 0 | FieldKind::kMessage | MessageKind::kEager,
163+
kLazyMessage = 0 | FieldKind::kMessage | MessageKind::kLazy,
164+
kGroup = 0 | FieldKind::kMessage | MessageKind::kGroup,
165+
166+
// Map types:
167+
kMap = FieldKind::kMap,
168+
};
169+
// clang-format on
170+
171+
struct FieldEntry {
172+
// Constructors without aux index. (Should be common cases.)
173+
constexpr FieldEntry(uint8_t type, uint8_t hasbit_index, uint16_t offset,
174+
uint16_t number)
175+
: field_type(type),
176+
hasbit_index(hasbit_index),
177+
offset(offset),
178+
field_number(number),
179+
aux_index(kNoAuxIdx) {}
180+
181+
// If any of hasbit_index, offset, field_number is too big to fit, fallback to
182+
// aux entry for all.
183+
constexpr FieldEntry(uint8_t type, uint16_t aux_index)
184+
: field_type(type),
185+
hasbit_index(kHasbitFallbackToAux),
186+
offset(kFallbackToAux),
187+
field_number(kFallbackToAux),
188+
aux_index(aux_index) {}
189+
190+
constexpr bool ShouldLookupAuxEntry() const { return aux_index != kNoAuxIdx; }
191+
192+
uint8_t GetFieldKind() const { return field_type & FieldKind::kMask; }
193+
uint8_t GetCardinality() const { return field_type & Cardinality::kMask; }
194+
uint8_t GetNumericKind() const {
195+
ABSL_DCHECK_LT(GetFieldKind(), FieldKind::kBytes);
196+
return field_type & NumericKind::kMask;
197+
}
198+
uint8_t GetMessageKind() const {
199+
ABSL_DCHECK_EQ(GetFieldKind(), FieldKind::kMessage);
200+
return field_type & MessageKind::kMask;
201+
}
202+
uint8_t GetStringKind() const {
203+
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
204+
GetFieldKind() == FieldKind::kString);
205+
return field_type & StringKind::kMask;
206+
}
207+
208+
bool IsSigned() const { return GetNumericKind() == NumericKind::kSigned; }
209+
bool IsUTF8() const {
210+
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
211+
GetFieldKind() == FieldKind::kString);
212+
return GetFieldKind() == FieldKind::kString;
213+
}
214+
215+
bool IsRepeated() const { return GetCardinality() == Cardinality::kRepeated; }
216+
217+
// Field type consists of FieldKind, Cardinality and type-specific Kind.
218+
uint8_t field_type;
219+
// Covers up to 256 fields. Fallback to aux if 0xFF.
220+
uint8_t hasbit_index;
221+
// Covers sizeof(Message) up to 64 KiB. Fallback to aux if 0xFFFF.
222+
uint16_t offset;
223+
// Most field numbers should fit 16 bits. Fallback to aux if 0xFFFF.
224+
uint16_t field_number;
225+
// Only up to 2^16 fallback cases are supported.
226+
uint16_t aux_index;
227+
228+
static constexpr uint16_t kHasbitFallbackToAux = 0xFF;
229+
static constexpr uint16_t kFallbackToAux = 0xFFFF;
230+
static constexpr uint16_t kNoAuxIdx = 0xFFFF;
231+
232+
// These constants are same as the above but compared against values from
233+
// reflection or protoc (hence different types) to determine whether to use
234+
// aux entries.
235+
static constexpr uint32_t kHasbitIdxLimit =
236+
std::numeric_limits<uint8_t>::max();
237+
static constexpr uint32_t kOffsetLimit = std::numeric_limits<uint16_t>::max();
238+
static constexpr int kFieldNumberLimit = std::numeric_limits<uint16_t>::max();
239+
};
240+
241+
static_assert(sizeof(FieldEntry) == sizeof(uint64_t), "");
242+
243+
} // namespace v2
244+
} // namespace internal
245+
} // namespace protobuf
246+
} // namespace google
247+
248+
#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#include "google/protobuf/generated_message_table_gen.h"
2+
3+
#include <cstdint>
4+
5+
#include "absl/log/absl_check.h"
6+
#include "google/protobuf/descriptor.h"
7+
#include "google/protobuf/generated_message_table.h"
8+
#include "google/protobuf/port.h"
9+
10+
namespace google {
11+
namespace protobuf {
12+
namespace internal {
13+
namespace v2 {
14+
15+
using CppStringType = FieldDescriptor::CppStringType;
16+
17+
namespace {
18+
19+
uint8_t GenerateStringKind(const FieldDescriptor* field, bool is_inlined) {
20+
switch (field->cpp_string_type()) {
21+
// VIEW fields are treated as strings for now.
22+
case CppStringType::kView:
23+
case CppStringType::kString:
24+
return field->is_repeated() ? StringKind::kStringPtr
25+
: is_inlined ? StringKind::kInlined
26+
: StringKind::kArenaPtr;
27+
case CppStringType::kCord:
28+
ABSL_CHECK(!is_inlined);
29+
return StringKind::kCord;
30+
case CppStringType::kStringPiece:
31+
ABSL_CHECK(!is_inlined);
32+
return StringKind::kStringPiece;
33+
default:
34+
Unreachable();
35+
break;
36+
}
37+
}
38+
39+
} // namespace
40+
41+
uint8_t MakeTypeCardForField(const FieldDescriptor* field, FieldTypeInfo info) {
42+
constexpr uint8_t field_type_to_type_card[] = {
43+
0, // placeholder as type starts from 1.
44+
FieldType::kDouble, // TYPE_DOUBLE
45+
FieldType::kFloat, // TYPE_FLOAT
46+
FieldType::kInt64, // TYPE_INT64
47+
FieldType::kUInt64, // TYPE_UINT64
48+
FieldType::kInt32, // TYPE_INT32
49+
FieldType::kFixed64, // TYPE_FIXED64
50+
FieldType::kFixed32, // TYPE_FIXED32
51+
FieldType::kBool, // TYPE_BOOL
52+
FieldType::kBytes, // TYPE_STRING
53+
FieldType::kGroup, // TYPE_GROUP
54+
FieldType::kMessage, // TYPE_MESSAGE
55+
FieldType::kBytes, // TYPE_BYTES
56+
FieldType::kUInt32, // TYPE_UINT32
57+
FieldType::kEnum, // TYPE_ENUM
58+
FieldType::kSFixed32, // TYPE_SFIXED32
59+
FieldType::kSFixed64, // TYPE_SFIXED64
60+
FieldType::kSInt32, // TYPE_SINT32
61+
FieldType::kSInt64, // TYPE_SINT64
62+
};
63+
static_assert(
64+
sizeof(field_type_to_type_card) == (FieldDescriptor::MAX_TYPE + 1), "");
65+
66+
if (field->is_map()) return FieldType::kMap;
67+
68+
auto field_type = field->type();
69+
uint8_t type_card = field_type_to_type_card[field_type];
70+
// Override previously set type for lazy message and UTF8 strings.
71+
switch (field_type) {
72+
case FieldDescriptor::TYPE_MESSAGE:
73+
if (info.is_lazy) type_card = FieldType::kLazyMessage;
74+
break;
75+
case FieldDescriptor::TYPE_STRING:
76+
if (field->requires_utf8_validation()) type_card = FieldType::kString;
77+
break;
78+
default:
79+
break;
80+
}
81+
82+
// Set cardinality.
83+
if (field->is_repeated()) {
84+
type_card |= Cardinality::kRepeated;
85+
} else if (field->real_containing_oneof()) {
86+
type_card |= Cardinality::kOneof;
87+
} else if (field->has_presence()) {
88+
type_card |= Cardinality::kOptional;
89+
} else {
90+
type_card |= Cardinality::kSingular;
91+
}
92+
93+
// Set StringKind for string fields. Note that numerics (signedness) and
94+
// messages (lazy) are already specified.
95+
return field->cpp_type() != FieldDescriptor::CPPTYPE_STRING
96+
? type_card
97+
: type_card | GenerateStringKind(field, info.is_inlined);
98+
}
99+
100+
} // namespace v2
101+
} // namespace internal
102+
} // namespace protobuf
103+
} // namespace google
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__
2+
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__
3+
4+
#include <cstdint>
5+
6+
#include "google/protobuf/descriptor.h"
7+
8+
// This file contains types and APIs to generate tables for v2 wireformat.
9+
10+
namespace google {
11+
namespace protobuf {
12+
namespace internal {
13+
namespace v2 {
14+
15+
struct FieldTypeInfo {
16+
bool is_inlined;
17+
bool is_lazy;
18+
};
19+
20+
// Returns 8 bit type card for a given field. Type cards contains information
21+
// about field types and cardinality that are needed to iterate fields per
22+
// message.
23+
uint8_t MakeTypeCardForField(const FieldDescriptor* field, FieldTypeInfo info);
24+
25+
} // namespace v2
26+
} // namespace internal
27+
} // namespace protobuf
28+
} // namespace google
29+
30+
#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__

0 commit comments

Comments
 (0)