Topic Text   Topic Comments (1)   Topic Properties   Topic Information xx@sof...
Topic title: Protoc buf Saturday January 31, 2009 08:28:30

Download topic text | View in variable-width font | Tab width set to 8 (change to 4)

Files in topic: (view all files)  
dynamic_message.cc   {+532,-0}

[Add General Comment] to topic.

File dynamic_message.cc (Revision 1.0) [Add File Comment] [Top]
 
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.
3 // http://code.google.com/p/protobuf/
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16
17 // Author: kenton@google.com (Kenton Varda)
18 // Based on original Protocol Buffers design by
19 // Sanjay Ghemawat, Jeff Dean, and others.
20 //
21 // DynamicMessage is implemented by constructing a data structure which
22 // has roughly the same memory layout as a generated message would have.
23 // Then, we use GeneratedMessageReflection to implement our reflection
24 // interface. All the other operations we need to implement (e.g.
25 // parsing, copying, etc.) are already implemented in terms of
26 // Reflection, so the rest is easy.
27 //
28 // The up side of this strategy is that it's very efficient. We don't
29 // need to use hash_maps or generic representations of fields. The
30 // down side is that this is a low-level memory management hack which
31 // can be tricky to get right.
32 //
33 // As mentioned in the header, we only expose a DynamicMessageFactory
34 // publicly, not the DynamicMessage class itself. This is because
35 // GenericMessageReflection wants to have a pointer to a "default"
36 // copy of the class, with all fields initialized to their default
37 // values. We only want to construct one of these per message type,
38 // so DynamicMessageFactory stores a cache of default messages for
39 // each type it sees (each unique Descriptor pointer). The code
40 // refers to the "default" copy of the class as the "prototype".
41 //
42 // Note on memory allocation: This module often calls "operator new()"
43 // to allocate untyped memory, rather than calling something like
44 // "new uint8[]". This is because "operator new()" means "Give me some
45 // space which I can use as I please." while "new uint8[]" means "Give
46 // me an array of 8-bit integers.". In practice, the later may return
47 // a pointer that is not aligned correctly for general use. I believe
48 // Item 8 of "More Effective C++" discusses this in more detail, though
49 // I don't have the book on me right now so I'm not sure.
50
51 #include <algorithm>
52 #include <google/protobuf/stubs/hash.h>
53
54 #include <google/protobuf/stubs/common.h>
55
56 #include <google/protobuf/dynamic_message.h>
57 #include <google/protobuf/descriptor.h>
58 #include <google/protobuf/descriptor.pb.h>
59 #include <google/protobuf/generated_message_reflection.h>
60 #include <google/protobuf/reflection_ops.h>
61 #include <google/protobuf/repeated_field.h>
62 #include <google/protobuf/extension_set.h>
63 #include <google/protobuf/wire_format.h>
64
65 namespace google {
66 namespace protobuf {
67
68 using internal::WireFormat;
69 using internal::ExtensionSet;
70 using internal::GeneratedMessageReflection;
71 using internal::GenericRepeatedField;
72
73
74 // ===================================================================
75 // Some helper tables and functions...
76
77 namespace {
78
79 // Compute the byte size of the in-memory representation of the field.
80 int FieldSpaceUsed(const FieldDescriptor* field) {
81 typedef FieldDescriptor FD; // avoid line wrapping
82 if (field->label() == FD::LABEL_REPEATED) {
83 switch (field->cpp_type()) {
84 case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >);
85 case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >);
86 case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >);
87 case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >);
88 case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >);
89 case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >);
90 case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >);
91 case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >);
92 case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>);
93
94 case FD::CPPTYPE_STRING:
95 return sizeof(RepeatedPtrField<string>);
96 break;
97 }
98 } else {
99 switch (field->cpp_type()) {
100 case FD::CPPTYPE_INT32 : return sizeof(int32 );
101 case FD::CPPTYPE_INT64 : return sizeof(int64 );
102 case FD::CPPTYPE_UINT32 : return sizeof(uint32 );
103 case FD::CPPTYPE_UINT64 : return sizeof(uint64 );
104 case FD::CPPTYPE_DOUBLE : return sizeof(double );
105 case FD::CPPTYPE_FLOAT : return sizeof(float );
106 case FD::CPPTYPE_BOOL : return sizeof(bool );
107 case FD::CPPTYPE_ENUM : return sizeof(int );
108 case FD::CPPTYPE_MESSAGE: return sizeof(Message*);
109
110 case FD::CPPTYPE_STRING:
111 return sizeof(string*);
112 break;
113 }
114 }
115
116 GOOGLE_LOG(DFATAL) << "Can't get here.";
117 return 0;
118 }
119
120 struct DescendingFieldSizeOrder {
121 inline bool operator()(const FieldDescriptor* a,
122 const FieldDescriptor* b) {
123 // All repeated fields come first.
124 if (a->is_repeated()) {
125 if (b->is_repeated()) {
126 // Repeated fields and are not ordered with respect to each other.
127 return false;
128 } else {
129 return true;
130 }
131 } else if (b->is_repeated()) {
132 return false;
133 } else {
134 // Remaining fields in descending order by size.
135 return FieldSpaceUsed(a) > FieldSpaceUsed(b);
136 }
137 }
138 };
139
140 inline int DivideRoundingUp(int i, int j) {
141 return (i + (j - 1)) / j;
142 }
143
144 static const int kSafeAlignment = sizeof(uint64);
145
146 // Rounds the given byte offset up to the next offset aligned such that any
147 // type may be stored at it.
148 inline int AlignOffset(int offset) {
149 return DivideRoundingUp(offset, kSafeAlignment) * kSafeAlignment;
150 }
151
152 #define bitsizeof(T) (sizeof(T) * 8)
153
154 } // namespace
155
156 // ===================================================================
157
158 class DynamicMessage : public Message {
159 public:
160 struct TypeInfo {
161 int size;
162 int has_bits_offset;
163 int unknown_fields_offset;
164 int extensions_offset;
165
166 // Not owned by the TypeInfo.
167 DynamicMessageFactory* factory; // The factory that created this object.
168 const DescriptorPool* pool; // The factory's DescriptorPool.
169 const Descriptor* type; // Type of this DynamicMessage.
170
171 // Warning: The order in which the following pointers are defined is
172 // important (the prototype must be deleted *before* the offsets).
173 scoped_array<int> offsets;
174 scoped_ptr<const GeneratedMessageReflection> reflection;
175 scoped_ptr<const DynamicMessage> prototype;
176 };
177
178 DynamicMessage(const TypeInfo* type_info);
179 ~DynamicMessage();
180
181 // Called on the prototype after construction to initialize message fields.
182 void CrossLinkPrototypes();
183
184 // implements Message ----------------------------------------------
185
186 Message* New() const;
187
188 int GetCachedSize() const;
189 void SetCachedSize(int size) const;
190
191 const Descriptor* GetDescriptor() const;
192 const Reflection* GetReflection() const;
193
194 private:
195 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage);
196
197 inline bool is_prototype() const {
198 return type_info_->prototype == this ||
199 // If type_info_->prototype is NULL, then we must be constructing
200 // the prototype now, which means we must be the prototype.
201 type_info_->prototype == NULL;
202 }
203
204 inline void* OffsetToPointer(int offset) {
205 return reinterpret_cast<uint8*>(this) + offset;
206 }
207 inline const void* OffsetToPointer(int offset) const {
208 return reinterpret_cast<const uint8*>(this) + offset;
209 }
210
211 const TypeInfo* type_info_;
212
213 // TODO(kenton): Make this an atomic<int> when C++ supports it.
214 mutable int cached_byte_size_;
215 };
216
217 DynamicMessage::DynamicMessage(const TypeInfo* type_info)
218 : type_info_(type_info),
219 cached_byte_size_(0) {
220 // We need to call constructors for various fields manually and set
221 // default values where appropriate. We use placement new to call
222 // constructors. If you haven't heard of placement new, I suggest Googling
223 // it now. We use placement new even for primitive types that don't have
224 // constructors for consistency. (In theory, placement new should be used
225 // any time you are trying to convert untyped memory to typed memory, though
226 // in practice that's not strictly necessary for types that don't have a
227 // constructor.)
228
229 const Descriptor* descriptor = type_info_->type;
230
231 new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet;
232
233 if (type_info_->extensions_offset != -1) {
234 new(OffsetToPointer(type_info_->extensions_offset))
235 ExtensionSet(&type_info_->type, type_info_->pool, type_info_->factory);
236 }
237
238 for (int i = 0; i < descriptor->field_count(); i++) {
239 const FieldDescriptor* field = descriptor->field(i);
240 void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
241 switch (field->cpp_type()) {
242 #define HANDLE_TYPE(CPPTYPE, TYPE) \
243 case FieldDescriptor::CPPTYPE_##CPPTYPE: \
244 if (!field->is_repeated()) { \
245 new(field_ptr) TYPE(field->default_value_##TYPE()); \
246 } else { \
247 new(field_ptr) RepeatedField<TYPE>(); \
248 } \
249 break;
250
251 HANDLE_TYPE(INT32 , int32 );
252 HANDLE_TYPE(INT64 , int64 );
253 HANDLE_TYPE(UINT32, uint32);
254 HANDLE_TYPE(UINT64, uint64);
255 HANDLE_TYPE(DOUBLE, double);
256 HANDLE_TYPE(FLOAT , float );
257 HANDLE_TYPE(BOOL , bool );
258 #undef HANDLE_TYPE
259
260 case FieldDescriptor::CPPTYPE_ENUM:
261 if (!field->is_repeated()) {
262 new(field_ptr) int(field->default_value_enum()->number());
263 } else {
264 new(field_ptr) RepeatedField<int>();
265 }
266 break;
267
268 case FieldDescriptor::CPPTYPE_STRING:
269 if (!field->is_repeated()) {
270 if (is_prototype()) {
271 new(field_ptr) const string*(&field->default_value_string());
272 } else {
273 string* default_value =
274 *reinterpret_cast<string* const*>(
275 type_info_->prototype->OffsetToPointer(
276 type_info_->offsets[i]));
277 new(field_ptr) string*(default_value);
278 }
279 } else {
280 new(field_ptr) RepeatedPtrField<string>();
281 }
282 break;
283
284 case FieldDescriptor::CPPTYPE_MESSAGE: {
285 // If this object is the prototype, its CPPTYPE_MESSAGE fields
286 // must be initialized later, in CrossLinkPrototypes(), so we don't
287 // initialize them here.
288 if (!is_prototype()) {
289 if (!field->is_repeated()) {
290 new(field_ptr) Message*(NULL);
291 } else {
292 const RepeatedPtrField<Message>* prototype_field =
293 reinterpret_cast<const RepeatedPtrField<Message>*>(
294 type_info_->prototype->OffsetToPointer(
295 type_info_->offsets[i]));
296 new(field_ptr) RepeatedPtrField<Message>(
297 prototype_field->prototype());
298 }
299 }
300 break;
301 }
302 }
303 }
304 }
305
306 DynamicMessage::~DynamicMessage() {
307 const Descriptor* descriptor = type_info_->type;
308
309 reinterpret_cast<UnknownFieldSet*>(
310 OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet();
311
312 if (type_info_->extensions_offset != -1) {
313 reinterpret_cast<ExtensionSet*>(
314 OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet();
315 }
316
317 // We need to manually run the destructors for repeated fields and strings,
318 // just as we ran their constructors in the the DynamicMessage constructor.
319 // Additionally, if any singular embedded messages have been allocated, we
320 // need to delete them, UNLESS we are the prototype message of this type,
321 // in which case any embedded messages are other prototypes and shouldn't
322 // be touched.
323 for (int i = 0; i < descriptor->field_count(); i++) {
324 const FieldDescriptor* field = descriptor->field(i);
325 void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
326
327 if (field->is_repeated()) {
328 GenericRepeatedField* field =
329 reinterpret_cast<GenericRepeatedField*>(field_ptr);
330 field->~GenericRepeatedField();
331
332 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
333 string* ptr = *reinterpret_cast<string**>(field_ptr);
334 if (ptr != &field->default_value_string()) {
335 delete ptr;
336 }
337 } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) &&
338 !is_prototype()) {
339 Message* message = *reinterpret_cast<Message**>(field_ptr);
340 if (message != NULL) {
341 delete message;
342 }
343 }
344 }
345 }
346
347 void DynamicMessage::CrossLinkPrototypes() {
348 // This should only be called on the prototype message.
349 GOOGLE_CHECK(is_prototype());
350
351 DynamicMessageFactory* factory = type_info_->factory;
352 const Descriptor* descriptor = type_info_->type;
353
354 // Cross-link default messages.
355 for (int i = 0; i < descriptor->field_count(); i++) {
356 const FieldDescriptor* field = descriptor->field(i);
357 void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
358
359 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
360 // For fields with message types, we need to cross-link with the
361 // prototype for the field's type.
362 const Message* field_prototype =
363 factory->GetPrototype(field->message_type());
364
365 if (field->is_repeated()) {
366 // For repeated fields, we actually construct the RepeatedPtrField
367 // here, but only for fields with message types. All other repeated
368 // fields are constructed in DynamicMessage's constructor.
369 new(field_ptr) RepeatedPtrField<Message>(field_prototype);
370 } else {
371 // For singular fields, the field is just a pointer which should
372 // point to the prototype. (OK to const_cast here because the
373 // prototype itself will only be available const to the outside
374 // world.)
375 new(field_ptr) Message*(const_cast<Message*>(field_prototype));
376 }
377 }
378 }
379 }
380
381 Message* DynamicMessage::New() const {
382 void* new_base = reinterpret_cast<uint8*>(operator new(type_info_->size));
383 memset(new_base, 0, type_info_->size);
384 return new(new_base) DynamicMessage(type_info_);
385 }
386
387 int DynamicMessage::GetCachedSize() const {
388 return cached_byte_size_;
389 }
390
391 void DynamicMessage::SetCachedSize(int size) const {
392 // This is theoretically not thread-compatible, but in practice it works
393 // because if multiple threads write this simultaneously, they will be
394 // writing the exact same value.
395 cached_byte_size_ = size;
396 }
397
398 const Descriptor* DynamicMessage::GetDescriptor() const {
399 return type_info_->type;
400 }
401
402 const Reflection* DynamicMessage::GetReflection() const {
403 return type_info_->reflection.get();
404 }
405
406 // ===================================================================
407
408 struct DynamicMessageFactory::PrototypeMap {
409 typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map;
410 Map map_;
411 };
412
413 DynamicMessageFactory::DynamicMessageFactory()
414 : pool_(NULL), prototypes_(new PrototypeMap) {
415 }
416
417 DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool)
418 : pool_(pool), prototypes_(new PrototypeMap) {
419 }
420
421 DynamicMessageFactory::~DynamicMessageFactory() {
422 for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin();
423 iter != prototypes_->map_.end(); ++iter) {
424 delete iter->second;
425 }
426 }
427
428
429 const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
430 const DynamicMessage::TypeInfo** target = &prototypes_->map_[type];
431 if (*target != NULL) {
432 // Already exists.
433 return (*target)->prototype.get();
434 }
435
436 DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo;
437 *target = type_info;
438
439 type_info->type = type;
440 type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_;
441 type_info->factory = this;
442
443 // We need to construct all the structures passed to
444 // GeneratedMessageReflection's constructor. This includes:
445 // - A block of memory that contains space for all the message's fields.
446 // - An array of integers indicating the byte offset of each field within
447 // this block.
448 // - A big bitfield containing a bit for each field indicating whether
449 // or not that field is set.
450
451 // Compute size and offsets.
452 int* offsets = new int[type->field_count()];
453 type_info->offsets.reset(offsets);
454
455 // Sort the fields of this message in descending order by size. We
456 // assume that if we then pack the fields tightly in this order, all fields
457 // will end up properly-aligned, since all field sizes are powers of two or
458 // are multiples of the system word size.
459 scoped_array<const FieldDescriptor*> ordered_fields(
460 new const FieldDescriptor*[type->field_count()]);
461 for (int i = 0; i < type->field_count(); i++) {
462 ordered_fields[i] = type->field(i);
463 }
464 stable_sort(&ordered_fields[0], &ordered_fields[type->field_count()],
465 DescendingFieldSizeOrder());
466
467 // Decide all field offsets by packing in order.
468 // We place the DynamicMessage object itself at the beginning of the allocated
469 // space.
470 int size = sizeof(DynamicMessage);
471 size = AlignOffset(size);
472
473 // Next the has_bits, which is an array of uint32s.
474 type_info->has_bits_offset = size;
475 int has_bits_array_size =
476 DivideRoundingUp(type->field_count(), bitsizeof(uint32));
477 size += has_bits_array_size * sizeof(uint32);
478 size = AlignOffset(size);
479
480 // The ExtensionSet, if any.
481 if (type->extension_range_count() > 0) {
482 type_info->extensions_offset = size;
483 size += sizeof(ExtensionSet);
484 size = AlignOffset(size);
485 } else {
486 // No extensions.
487 type_info->extensions_offset = -1;
488 }
489
490 // All the fields. We don't need to align after each field because they are
491 // sorted in descending size order, and the size of a type is always a
492 // multiple of its alignment.
493 for (int i = 0; i < type->field_count(); i++) {
494 offsets[ordered_fields[i]->index()] = size;
495 size += FieldSpaceUsed(ordered_fields[i]);
496 }
497
498 // Add the UnknownFieldSet to the end.
499 size = AlignOffset(size);
500 type_info->unknown_fields_offset = size;
501 size += sizeof(UnknownFieldSet);
502
503 // Align the final size to make sure no clever allocators think that
504 // alignment is not necessary.
505 size = AlignOffset(size);
506 type_info->size = size;
507
508 // Allocate the prototype.
509 void* base = operator new(size);
510 memset(base, 0, size);
511 DynamicMessage* prototype = new(base) DynamicMessage(type_info);
512 type_info->prototype.reset(prototype);
513
514 // Construct the reflection object.
515 type_info->reflection.reset(
516 new GeneratedMessageReflection(
517 type_info->type,
518 type_info->prototype.get(),
519 type_info->offsets.get(),
520 type_info->has_bits_offset,
521 type_info->unknown_fields_offset,
522 type_info->extensions_offset,
523 type_info->pool));
524
525 // Cross link prototypes.
526 prototype->CrossLinkPrototypes();
527
528 return prototype;
529 }
530
531 } // namespace protobuf
532 } // namespace google
 
File dynamic_message.cc (Revision 1.0) [Add File Comment] [Top]
  
Legend:
Removed 
Changed
 Added

[Add General Comment] to topic.