1 /**
2     Defines a string based dictionary list with conserved insertion order.
3 
4     Copyright: © 2012-2014 RejectedSoftware e.K.
5     License: Subject to the terms of the MIT license, as written in the included LICENSE file.
6     Authors: Sönke Ludwig
7 */
8 module memutils.dictionarylist;
9 
10 import core.stdc.string : memset, memcpy;
11 import memutils.helpers;
12 import memutils.allocators;
13 import memutils.refcounted;
14 import memutils.utils;
15 import memutils.vector;
16 import std.conv : to;
17 import std.exception : enforce;
18 
19 alias DictionaryListRef(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) = RefCounted!(DictionaryList!(KEY, VALUE, ALLOC, case_sensitive, NUM_STATIC_FIELDS), ALLOC);
20 
21 /**
22  * 
23     Behaves similar to $(D VALUE[string]) but the insertion order is not changed
24     and multiple values per key are supported.
25     
26     Note that despite case not being relevant for matching keys, iterating
27     over the list will yield the original case of the key that was put in.
28 
29     Insertion and lookup has O(n) complexity.
30 */
31 struct DictionaryList(KEY, VALUE, ALLOC = ThreadMem, bool case_sensitive = true, size_t NUM_STATIC_FIELDS = 8) {
32 	@disable this(this);
33 
34 	import std.typecons : Tuple;
35 	
36 	private {
37 		static struct Field { uint keyCheckSum; KEY key; VALUE value; }
38 		Field[NUM_STATIC_FIELDS] m_fields;
39 		size_t m_fieldCount;
40 		Field[] m_extendedFields;
41 		size_t m_extendedFieldCount;
42 	}
43 
44 	~this() {
45 		if (m_extendedFields) {
46 			auto sz = m_extendedFields.length;
47 			freeArray!(Field, ALLOC)(m_extendedFields.ptr[0 .. m_extendedFieldCount], sz);
48 		}
49 	}
50 	
51 	alias KeyType = KEY;
52 	alias ValueType = VALUE;
53 	
54 	struct FieldTuple { KeyType key; ValueType value; }
55 
56 	@property bool empty() const { return length == 0; }
57 
58 	/** The number of fields present in the map.
59     */
60 	@property size_t length() const { return m_fieldCount + m_extendedFields.length; }
61 	
62 	/** Removes the first field that matches the given key.
63     */
64 	void remove(KeyType key)
65 	{
66 		auto keysum = computeCheckSumI(key);
67 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
68 		if( idx >= 0 ){
69 			auto slice = m_fields[0 .. m_fieldCount];
70 			removeFromArrayIdx(slice, idx);
71 			m_fieldCount--;
72 		} else {
73 			idx = getIndex(m_extendedFields, key, keysum);
74 			enforce(idx >= 0);
75 			removeFromArrayIdx(m_extendedFields, idx);
76 		}
77 	}
78 	
79 	/** Removes all fields that matches the given key.
80     */
81 	void removeAll(KeyType key)
82 	{
83 		auto keysum = computeCheckSumI(key);
84 		for (size_t i = 0; i < m_fieldCount;) {
85 			if (m_fields[i].keyCheckSum == keysum && matches(m_fields[i].key, key)) {
86 				auto slice = m_fields[0 .. m_fieldCount];
87 				removeFromArrayIdx(slice, i);
88 				m_fieldCount--;
89 			} else i++;
90 		}
91 		
92 		for (size_t i = 0; i < m_extendedFields.length;) {
93 			if (m_extendedFields[i].keyCheckSum == keysum && matches(m_extendedFields[i].key, key))
94 				removeFromArrayIdx(m_extendedFields, i);
95 			else i++;
96 		}
97 	}
98 	
99 	/** Adds a new field to the map.
100 
101         The new field will be added regardless of any existing fields that
102         have the same key, possibly resulting in duplicates. Use opIndexAssign
103         if you want to avoid duplicates.
104     */
105 	void insert()(auto const ref KeyType key, ValueType value)
106 	{
107 		auto keysum = computeCheckSumI(key);
108 		if (m_fieldCount < m_fields.length) {
109 			logTrace("Appending: ", value);
110 			m_fields[m_fieldCount++] = Field(keysum, *cast(KeyType*) &key, value);
111 			logTrace("Now have: ", m_fields, " with ", m_fieldCount);
112 		}
113 		else {
114 			grow(1);
115 			m_extendedFields[$-1] = Field(keysum, *cast(KeyType*) &key, value);
116 		}
117 	}
118 	
119 	/** Returns the first field that matches the given key.
120 
121         If no field is found, def_val is returned.
122     */
123 	ValueType get(KeyType key, lazy ValueType def_val = ValueType.init) {
124 		if (auto pv = key in this) return *pv;
125 		return def_val;
126 	}
127 	
128 	const(ValueType) get(in KeyType key, lazy const(ValueType) def_val = const(ValueType).init) const
129 	{
130 		if (auto pv = key in this) return *pv;
131 		return def_val;
132 	}
133 	
134 	/** Returns all values matching the given key.
135 
136         Note that the version returning an array will allocate using the same allocator for each call.
137     */
138 	Vector!(ValueType, ALLOC) getValuesAt()(auto const ref KeyType key)
139 	const {
140 		import std.array;
141 		auto ret = Vector!(ValueType, ALLOC)(0);
142 		this.opApply( (k, const ref v) {
143 				// static if (is(ValueType == string)) logTrace("Checking field: ", v);
144 				//logTrace("Looping ", k, " => ", v);
145 				if (matches(key, k)) {
146 					//logTrace("Appending: ", v);
147 					ret ~= v;
148 				}
149 				return 0;
150 			});
151 		//logTrace("Finished getValuesAt with: ", ret[]);
152 		return ret.move();
153 	}
154 	
155 	/// ditto
156 	void getValuesAt(in KeyType key, scope void delegate(const(ValueType)) del)
157 	const {
158 		uint keysum = computeCheckSumI(key);
159 		foreach (ref f; m_fields[0 .. m_fieldCount]) {
160 			if (f == Field.init) continue;
161 			if (f.keyCheckSum != keysum) continue;
162 			if (matches(f.key, key)) del(f.value);
163 		}
164 		foreach (ref f; m_extendedFields) {
165 			if (f.keyCheckSum != keysum) continue;
166 			if (matches(f.key, key)) del(f.value);
167 		}
168 	}
169 	/** Returns the first value matching the given key.
170     */
171 	inout(ValueType) opIndex(KeyType key)
172 	inout {
173 		auto pitm = key in this;
174 		enforce(pitm !is null, "Accessing non-existent key '"~ key.to!string ~"'.");
175 		return *pitm;
176 	}
177 	
178 	/** Adds or replaces the given field with a new value.
179     */
180 	ValueType opIndexAssign(ValueType val, KeyType key)
181 	{
182 		auto pitm = key in this;
183 		if( pitm ) *pitm = val;
184 		else if( m_fieldCount < m_fields.length ) m_fields[m_fieldCount++] = Field(computeCheckSumI(key), key, val);
185 		else {
186 			grow(1);
187 			m_extendedFields[$-1] = Field(computeCheckSumI(key), key, val);
188 		}
189 		return val;
190 	}
191 	
192 	/** Returns a pointer to the first field that matches the given key.
193     */
194 	inout(ValueType)* opBinaryRight(string op)(in KeyType key) inout if(op == "in") {
195 		return find(key);
196 	}
197 	
198 	inout(ValueType)* find(in KeyType key) inout 
199 	{
200 		uint keysum = computeCheckSumI(key);
201 		auto idx = getIndex(m_fields[0 .. m_fieldCount], key, keysum);
202 		if( idx >= 0 ) return &m_fields[idx].value;
203 		idx = getIndex(m_extendedFields, key, keysum);
204 		if( idx >= 0 ) return &m_extendedFields[idx].value;
205 		return null;
206 	}
207 	
208 	/// ditto
209 	bool opBinaryRight(string op)(KeyType key) inout if(op == "!in") {
210 		return !(key in this);
211 	}
212 	
213 	/** Iterates over all fields, including duplicates.
214     */
215 	int opApply(int delegate(KeyType key, ref ValueType val) del)
216 	{
217 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
218 			if (kv == Field.init) return 0;
219 			if (auto ret = del(kv.key, kv.value))
220 				return ret;
221 		}
222 		foreach (ref kv; m_extendedFields) {
223 			if (auto ret = del(kv.key, kv.value))
224 				return ret;
225 		}
226 		return 0;
227 	}
228 	
229 	int opApply(int delegate(const ref KeyType key, const ref ValueType val) del) const
230 	{
231 		foreach (ref kv; m_fields[0 .. m_fieldCount]) {
232 			if (kv == Field.init) return 0;
233 			if (auto ret = del(kv.key, kv.value))
234 				return ret;
235 		}
236 		foreach (ref kv; m_extendedFields) {
237 			if (auto ret = del(kv.key, kv.value))
238 				return ret;
239 		}
240 		return 0;
241 	}
242 	
243 	/// ditto
244 	int opApply(int delegate(ref ValueType val) del)
245 	{
246 		return this.opApply((KeyType key, ref ValueType val) { return del(val); });
247 	}
248 	
249 	/// ditto
250 	int opApply(int delegate(KeyType key, ref const(ValueType) val) del) const
251 	{
252 		return (cast() this).opApply(cast(int delegate(KeyType, ref ValueType)) del);
253 	}
254 	
255 	/// ditto
256 	int opApply(int delegate(ref const(ValueType) val) del) const
257 	{
258 		return (cast() this).opApply(cast(int delegate(ref ValueType)) del);
259 	}
260 
261 	bool opEquals(const ref DictionaryList!(KEY, VALUE, ALLOC) other) const
262 	{
263 		foreach (const ref KeyType key, const ref ValueType val; this)
264 		{
265 			bool found;
266 			other.getValuesAt(key, (const ValueType oval) {
267 					if (oval == val) {
268 						found = true;
269 						return;
270 					}
271 				});
272 			if (!found)
273 				return false;
274 		}
275 		
276 		return true;
277 	}
278 
279 	private void grow(size_t n) {
280 		if (m_extendedFields.length + n < m_extendedFieldCount) {
281 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFields.length + n];
282 			return;
283 		}
284 		if (m_extendedFields.length > 0)
285 		{
286 			size_t oldsz = m_extendedFields.length;
287 			m_extendedFields = m_extendedFields.ptr[0 .. m_extendedFieldCount];
288 			m_extendedFieldCount = (m_extendedFieldCount + n)*3/2;
289 			logTrace("Extended field count: ", m_extendedFieldCount);
290 			m_extendedFields = reallocArray!(Field, ALLOC)(m_extendedFields, m_extendedFieldCount)[0 .. oldsz + n];
291 			memset(m_extendedFields.ptr + oldsz, 0, (m_extendedFieldCount-oldsz)*Field.sizeof);
292 		}
293 		else {
294 			m_extendedFieldCount = 16;
295 			m_extendedFields = allocArray!(Field, ALLOC)(16).ptr[0 .. n];
296 			memset(m_extendedFields.ptr, 0, m_extendedFieldCount*Field.sizeof);
297 		}
298 	}
299 
300 	private ptrdiff_t getIndex(in Field[] map, in KeyType key, uint keysum)
301 	const {
302 		foreach (i, ref const(Field) entry; map) {
303 			if (entry.keyCheckSum != keysum) continue;
304 			if (matches(entry.key, key)) return i;
305 		}
306 		return -1;
307 	}
308 	
309 	private static bool matches(in KeyType a, in KeyType b)
310 	{
311 		static if (case_sensitive) return a == b;
312 		else static if (is (KeyType == string)) return icmp2(a, b) == 0;
313 		else return a == b;
314 	}
315 	
316 	// very simple check sum function with a good chance to match
317 	// strings with different case equal
318 	private static uint computeCheckSumI(string s)
319 	@trusted {
320 		uint csum = 0;
321 		immutable(char)* pc = s.ptr, pe = s.ptr + s.length;
322 		for (; pc != pe; pc++) {
323 			static if (case_sensitive) csum ^= *pc;
324 			else csum ^= *pc & 0x1101_1111;
325 			csum = (csum << 1) | (csum >> 31);
326 		}
327 		return csum;
328 	}
329 	
330 	private static uint computeCheckSumI(T)(ref T obj)
331 	@trusted {
332 		return cast(uint)typeid(obj).getHash(&obj);
333 	}
334 }
335 
336 private:
337 
338 void removeFromArrayIdx(T)(ref T[] array, size_t idx)
339 {
340 	import std.algorithm : swap;
341 	foreach( j; idx+1 .. array.length) { 
342 		array[j-1] = array[j];
343 	}
344 	array[array.length-1].destroy();
345 	array = array.ptr[0 .. array.length-1];
346 }
347 
348 /// Special version of icmp() with optimization for ASCII characters
349 int icmp2(in string a, in string b)
350 @safe pure {
351 	import std.algorithm : min;
352 	import std.utf : decode;
353 	import std.uni : toLower;
354 	size_t i = 0, j = 0;
355 	
356 	// fast skip equal prefix
357 	size_t min_len = min(a.length, b.length);
358 	while ( i < min_len && a[i] == b[i] ) i++;
359 	if( i > 0 && (a[i-1] & 0x80) ) i--; // don't stop half-way in a UTF-8 sequence
360 	j = i;
361 	
362 	// compare the differing character and the rest of the string
363 	while (i < a.length && j < b.length){
364 		uint ac = cast(uint)a[i];
365 		uint bc = cast(uint)b[j];
366 		if( !((ac | bc) & 0x80) ){
367 			i++;
368 			j++;
369 			if( ac >= 'A' && ac <= 'Z' ) ac += 'a' - 'A';
370 			if( bc >= 'A' && bc <= 'Z' ) bc += 'a' - 'A';
371 			if( ac < bc ) return -1;
372 			else if( ac > bc ) return 1;
373 		} else {
374 			dchar acp = decode(a, i);
375 			dchar bcp = decode(b, j);
376 			if( acp != bcp ){
377 				acp = toLower(acp);
378 				bcp = toLower(bcp);
379 				if( acp < bcp ) return -1;
380 				else if( acp > bcp ) return 1;
381 			}
382 		}
383 	}
384 	
385 	if( i < a.length ) return 1;
386 	else if( j < b.length ) return -1;
387 	
388 	assert(i == a.length || j == b.length, "Strings equal but we didn't fully compare them!?");
389 	return 0;
390 }