1 /**
2     Defines a string based dictionary list with conserved insertion order.
3 
4     Copyright: © 2012-2014 RejectedSoftware e.K.
5     License: Subject to the terms of the MIT license, as written in the included LICENSE file.
6     Authors: Sönke Ludwig
7 */
8 module memutils.dictionarylist;
9 
10 import core.stdc.string : memset, memcpy;
11 import memutils.helpers;
12 import memutils.allocators;
13 import memutils.refcounted;
14 import memutils.utils;
15 import memutils.vector;
16 import std.conv : to;
17 import std.exception : enforce;
18 import std.algorithm: swap;
19 
20 alias DictionaryListRef(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) = RefCounted!(DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS), ALLOC);
21 
22 /**
23  * 
24     Behaves similar to $(D VALUE[string]) but the insertion order is not changed
25     and multiple values per key are supported.
26     
27     Note that despite case not being relevant for matching keys, iterating
28     over the list will yield the original case of the key that was put in.
29 
30     Insertion and lookup has O(n) complexity.
31 */
32 struct DictionaryList(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) {
33 	@disable this(this);
34 
35 	import std.typecons : Tuple;
36 	
37 	private {
38 		static struct Field { uint keyCheckSum; KEY key; VALUE value; }
39 		Field[NUM_STATIC_FIELDS] m_fields;
40 		size_t m_fieldCount;
41 		Field[] m_extendedFields;
42 		size_t m_extendedFieldCount;
43 	}
44 
45 	~this() {
46 		if (m_extendedFields) {
47 			auto sz = m_extendedFields.length;
48 			freeArray!(Field, ALLOC)(m_extendedFields.ptr[0 .. m_extendedFieldCount], sz);
49 			m_extendedFields = null;
50 		}
51 	}
52 	
53 	alias KeyType = KEY;
54 	alias ValueType = VALUE;
55 	
56 	struct FieldTuple { KeyType key; ValueType value; }
57 
58 	@property DictionaryList clone() const {
59 		auto ret = DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS)();
60 		
61 		ret.m_fields = (cast()this).m_fields;
62 		ret.m_fieldCount = (cast()this).m_fieldCount;
63 		
64 		if (m_extendedFieldCount > 0) {
65 			ret.m_extendedFields = allocArray!(Field, ALLOC)(m_extendedFieldCount);
66 			memcpy(cast(void*)ret.m_extendedFields.ptr, cast(void*)m_extendedFields.ptr, (m_extendedFieldCount)*Field.sizeof);
67 			ret.m_extendedFieldCount = (cast()this).m_extendedFieldCount;
68 		}
69 		return ret;
70 	}
71 
72 	DictionaryList move() {
73 		import std.algorithm : swap;
74 		auto ret = DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS)();
75 		.swap(m_fields, ret.m_fields);
76 		.swap(m_fieldCount, ret.m_fieldCount);
77 		.swap(m_extendedFields, ret.m_extendedFields);
78 		.swap(m_extendedFieldCount, ret.m_extendedFieldCount);
79 		return ret;
80 	}
81 
82 	@property bool empty() const { return length == 0; }
83 
84 	/** The number of fields present in the map.
85     */
86 	@property size_t length() const { return m_fieldCount + m_extendedFields.length; }
87 	
88 	/** Removes the first field that matches the given key.
89     */
90 	void remove(KeyType key)
91 	{
92 		auto keysum = computeCheckSumI(key);
93 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
94 		if( idx >= 0 ){
95 			auto slice = m_fields[0 .. m_fieldCount];
96 			removeFromArrayIdx(slice, idx);
97 			m_fieldCount--;
98 		} else {
99 			idx = getIndex(m_extendedFields, key, keysum);
100 			enforce(idx >= 0);
101 			removeFromArrayIdx(m_extendedFields, idx);
102 		}
103 	}
104 	
105 	/** Removes all fields that matches the given key.
106     */
107 	void removeAll(KeyType key)
108 	{
109 		auto keysum = computeCheckSumI(key);
110 		for (size_t i = 0; i < m_fieldCount;) {
111 			if (m_fields[i].keyCheckSum == keysum && matches(m_fields[i].key, key)) {
112 				auto slice = m_fields[0 .. m_fieldCount];
113 				removeFromArrayIdx(slice, i);
114 				m_fieldCount--;
115 			} else i++;
116 		}
117 		
118 		for (size_t i = 0; i < m_extendedFields.length;) {
119 			if (m_extendedFields[i].keyCheckSum == keysum && matches(m_extendedFields[i].key, key))
120 				removeFromArrayIdx(m_extendedFields, i);
121 			else i++;
122 		}
123 	}
124 	
125 	/** Adds a new field to the map.
126 
127         The new field will be added regardless of any existing fields that
128         have the same key, possibly resulting in duplicates. Use opIndexAssign
129         if you want to avoid duplicates.
130     */
131 	void insert()(auto const ref KeyType key, ValueType value)
132 	{
133 		auto keysum = computeCheckSumI(key);
134 		if (m_fieldCount < m_fields.length) {
135 			m_fields[m_fieldCount++] = Field(keysum, *cast(KeyType*) &key, value);
136 		}
137 		else {
138 			grow(1);
139 			m_extendedFields[$-1] = Field(keysum, *cast(KeyType*) &key, value);
140 		}
141 	}
142 	
143 	/** Returns the first field that matches the given key.
144 
145         If no field is found, def_val is returned.
146     */
147 	ValueType get(KeyType key, lazy ValueType def_val = ValueType.init) {
148 		if (auto pv = key in this) return *pv;
149 		return def_val;
150 	}
151 	
152 	const(ValueType) get(in KeyType key, lazy const(ValueType) def_val = const(ValueType).init) const
153 	{
154 		if (auto pv = key in this) return *pv;
155 		return def_val;
156 	}
157 	
158 	/** Returns all values matching the given key.
159 
160         Note that the version returning an array will allocate using the same allocator for each call.
161     */
162 	Vector!(ValueType, ALLOC) getValuesAt()(auto const ref KeyType key)
163 	const {
164 		import std.array;
165 		auto ret = Vector!(ValueType, ALLOC)(0);
166 		this.opApply( (const ref k, const ref v) {
167 				// static if (is(ValueType == string)) logTrace("Checking field: ", v);
168 				//logTrace("Looping ", k, " => ", v);
169 				if (matches(key, k)) {
170 					//logTrace("Appending: ", v);
171 					ret ~= v;
172 				}
173 				return 0;
174 			});
175 		//logTrace("Finished getValuesAt with: ", ret[]);
176 		return ret.move();
177 	}
178 	
179 	/// ditto
180 	void getValuesAt(in KeyType key, scope void delegate(const(ValueType)) del)
181 	const {
182 		uint keysum = computeCheckSumI(key);
183 		foreach (ref f; m_fields[0 .. m_fieldCount]) {
184 			if (f == Field.init) continue;
185 			if (f.keyCheckSum != keysum) continue;
186 			if (matches(f.key, key)) del(f.value);
187 		}
188 		foreach (ref f; m_extendedFields) {
189 			if (f.keyCheckSum != keysum) continue;
190 			if (matches(f.key, key)) del(f.value);
191 		}
192 	}
193 	/** Returns the first value matching the given key.
194     */
195 	inout(ValueType) opIndex(KeyType key)
196 	inout {
197 		auto pitm = key in this;
198 		enforce(pitm !is null, "Accessing non-existent key '"~ key.to!string ~"'.");
199 		return *pitm;
200 	}
201 	
202 	/** Adds or replaces the given field with a new value.
203     */
204 	ValueType opIndexAssign(ValueType val, KeyType key)
205 	{
206 		auto pitm = key in this;
207 		if( pitm ) *pitm = val;
208 		else if( m_fieldCount < m_fields.length ) m_fields[m_fieldCount++] = Field(computeCheckSumI(key), key, val);
209 		else {
210 			grow(1);
211 			m_extendedFields[$-1] = Field(computeCheckSumI(key), key, val);
212 		}
213 		return val;
214 	}
215 	
216 	/** Returns a pointer to the first field that matches the given key.
217     */
218 	inout(ValueType)* opBinaryRight(string op)(in KeyType key) inout if(op == "in") {
219 		return find(key);
220 	}
221 	
222 	inout(ValueType)* find(in KeyType key) inout 
223 	{
224 		uint keysum = computeCheckSumI(key);
225 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
226 		if( idx >= 0 ) return &m_fields[idx].value;
227 		idx = getIndex(m_extendedFields, key, keysum);
228 		if( idx >= 0 ) return &m_extendedFields[idx].value;
229 		return null;
230 	}
231 	
232 	/// ditto
233 	bool opBinaryRight(string op)(KeyType key) inout if(op == "!in") {
234 		return !(key in this);
235 	}
236 	
237 	/** Iterates over all fields, including duplicates.
238     */
239 	int opApply(int delegate(KeyType key, ref ValueType val) del)
240 	{
241 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
242 			if (kv == Field.init) return 0;
243 			if (auto ret = del(kv.key, kv.value))
244 				return ret;
245 		}
246 		foreach (ref kv; m_extendedFields) {
247 			if (auto ret = del(kv.key, kv.value))
248 				return ret;
249 		}
250 		return 0;
251 	}
252 	
253 	int opApply(int delegate(const ref KeyType key, const ref ValueType val) del) const
254 	{
255 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
256 			if (kv == Field.init) return 0;
257 			if (auto ret = del(kv.key, kv.value))
258 				return ret;
259 		}
260 		foreach (ref kv; m_extendedFields) {
261 			if (auto ret = del(kv.key, kv.value))
262 				return ret;
263 		}
264 		return 0;
265 	}
266 	
267 	/// ditto
268 	int opApply(int delegate(ref ValueType val) del)
269 	{
270 		return this.opApply((KeyType key, ref ValueType val) { return del(val); });
271 	}
272 	
273 	
274 	/// ditto
275 	int opApply(int delegate(ref const(ValueType) val) del) const
276 	{
277 		return (cast() this).opApply(cast(int delegate(ref ValueType)) del);
278 	}
279 
280 	bool opEquals(const ref DictionaryList!(KEY, VALUE, ALLOC) other) const
281 	{
282 		foreach (const ref KeyType key, const ref ValueType val; this)
283 		{
284 			bool found;
285 			other.getValuesAt(key, (const ValueType oval) {
286 					if (oval == val) {
287 						found = true;
288 						return;
289 					}
290 				});
291 			if (!found)
292 				return false;
293 		}
294 		
295 		return true;
296 	}
297 
298 	private void grow(size_t n) {
299 		if (m_extendedFields.length + n < m_extendedFieldCount) {
300 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFields.length + n];
301 			return;
302 		}
303 		if (m_extendedFields.length > 0)
304 		{
305 			size_t oldsz = m_extendedFields.length;
306 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFieldCount];
307 			m_extendedFieldCount = (m_extendedFieldCount + n)*3/2;
308 			m_extendedFields = reallocArray!(Field, ALLOC)(m_extendedFields, m_extendedFieldCount)[0 .. oldsz + n];
309 			memset(cast(void*)(m_extendedFields.ptr + oldsz), 0, (m_extendedFieldCount-oldsz)*Field.sizeof);
310 		}
311 		else {
312 			m_extendedFieldCount = 16;
313 			m_extendedFields = allocArray!(Field, ALLOC)(16).ptr[0 .. n];
314 			memset(cast(void*)m_extendedFields.ptr, 0, m_extendedFieldCount*Field.sizeof);
315 		}
316 	}
317 
318 	private ptrdiff_t getIndex(in Field[] map, in KeyType key, uint keysum)
319 	const {
320 		foreach (i, ref const(Field) entry; map) {
321 			if (entry.keyCheckSum != keysum) continue;
322 			if (matches(entry.key, key)) return i;
323 		}
324 		return -1;
325 	}
326 	
327 	private static bool matches(in KeyType a, in KeyType b)
328 	{
329 		static if (case_sensitive) return a == b;
330 		else static if (is (KeyType == string)) return icmp2(a, b) == 0;
331 		else return a == b;
332 	}
333 	
334 	// very simple check sum function with a good chance to match
335 	// strings with different case equal
336 	private static uint computeCheckSumI(string s)
337 	@trusted {
338 		uint csum = 0;
339 		immutable(char)* pc = s.ptr, pe = s.ptr + s.length;
340 		for (; pc != pe; pc++) {
341 			static if (case_sensitive) csum ^= *pc;
342 			else csum ^= *pc & 0x1101_1111;
343 			csum = (csum << 1) | (csum >> 31);
344 		}
345 		return csum;
346 	}
347 	
348 	private static uint computeCheckSumI(T)(ref T obj)
349 	@trusted {
350 		return cast(uint)typeid(obj).getHash(&obj);
351 	}
352 }
353 
354 private:
355 
356 void removeFromArrayIdx(T)(ref T[] array, size_t idx)
357 {
358 	import std.algorithm : swap;
359 	foreach( j; idx+1 .. array.length) { 
360 		array[j-1] = array[j];
361 	}
362 	array[array.length-1].destroy();
363 	array = array.ptr[0 .. array.length-1];
364 }
365 
366 /// Special version of icmp() with optimization for ASCII characters
367 int icmp2(in string a, in string b)
368 @safe pure {
369 	import std.algorithm : min;
370 	import std.utf : decode;
371 	import std.uni : toLower;
372 	size_t i = 0, j = 0;
373 	
374 	// fast skip equal prefix
375 	size_t min_len = min(a.length, b.length);
376 	while ( i < min_len && a[i] == b[i] ) i++;
377 	if( i > 0 && (a[i-1] & 0x80) ) i--; // don't stop half-way in a UTF-8 sequence
378 	j = i;
379 	
380 	// compare the differing character and the rest of the string
381 	while (i < a.length && j < b.length){
382 		uint ac = cast(uint)a[i];
383 		uint bc = cast(uint)b[j];
384 		if( !((ac | bc) & 0x80) ){
385 			i++;
386 			j++;
387 			if( ac >= 'A' && ac <= 'Z' ) ac += 'a' - 'A';
388 			if( bc >= 'A' && bc <= 'Z' ) bc += 'a' - 'A';
389 			if( ac < bc ) return -1;
390 			else if( ac > bc ) return 1;
391 		} else {
392 			dchar acp = decode(a, i);
393 			dchar bcp = decode(b, j);
394 			if( acp != bcp ){
395 				acp = toLower(acp);
396 				bcp = toLower(bcp);
397 				if( acp < bcp ) return -1;
398 				else if( acp > bcp ) return 1;
399 			}
400 		}
401 	}
402 	
403 	if( i < a.length ) return 1;
404 	else if( j < b.length ) return -1;
405 	
406 	assert(i == a.length || j == b.length, "Strings equal but we didn't fully compare them!?");
407 	return 0;
408 }