forked from tableau/document-api-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfield.py
207 lines (161 loc) · 7 KB
/
field.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import functools
import xml.etree.ElementTree as ET
_ATTRIBUTES = [
'id', # Name of the field as specified in the file, usually surrounded by [ ]
'caption', # Name of the field as displayed in Tableau unless an aliases is defined
'datatype', # Type of the field within Tableau (string, integer, etc)
'role', # Dimension or Measure
'type', # three possible values: quantitative, ordinal, or nominal
'alias', # Name of the field as displayed in Tableau if the default name isn't wanted
'calculation', # If this field is a calculated field, this will be the formula
'description', # If this field has a description, this will be the description (including formatting tags)
]
_METADATA_ATTRIBUTES = [
'aggregation', # The type of aggregation on the field (e.g Sum, Avg)
]
_METADATA_TO_FIELD_MAP = [
('local-name', 'id'),
('local-type', 'datatype'),
('remote-alias', 'alias')
]
def _find_metadata_record(record, attrib):
element = record.find('.//{}'.format(attrib))
if element is None:
return None
return element.text
class Field(object):
""" Represents a field in a datasource """
def __init__(self, column_xml=None, metadata_xml=None):
# Initialize all the possible attributes
for attrib in _ATTRIBUTES:
setattr(self, '_{}'.format(attrib), None)
for attrib in _METADATA_ATTRIBUTES:
setattr(self, '_{}'.format(attrib), None)
self._worksheets = set()
if column_xml is not None:
self._initialize_from_column_xml(column_xml)
# This isn't currently never called because of the way we get the data from the xml,
# but during the refactor, we might need it. This is commented out as a reminder
# if metadata_xml is not None:
# self.apply_metadata(metadata_xml)
elif metadata_xml is not None:
self._initialize_from_metadata_xml(metadata_xml)
else:
raise AttributeError('column_xml or metadata_xml needed to initialize field')
def _initialize_from_column_xml(self, xmldata):
for attrib in _ATTRIBUTES:
self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None))
def _initialize_from_metadata_xml(self, xmldata):
for metadata_name, field_name in _METADATA_TO_FIELD_MAP:
self._apply_attribute(xmldata, field_name, lambda x: xmldata.find('.//{}'.format(metadata_name)).text,
read_name=metadata_name)
self.apply_metadata(xmldata)
########################################
# Special Case methods for construction fields from various sources
# not intended for client use
########################################
def apply_metadata(self, metadata_record):
for attrib in _METADATA_ATTRIBUTES:
self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record))
def add_used_in(self, name):
self._worksheets.add(name)
@classmethod
def from_column_xml(cls, xmldata):
return cls(column_xml=xmldata)
@classmethod
def from_metadata_xml(cls, xmldata):
return cls(metadata_xml=xmldata)
def _apply_attribute(self, xmldata, attrib, default_func, read_name=None):
if read_name is None:
read_name = attrib
if hasattr(self, '_read_{}'.format(read_name)):
value = getattr(self, '_read_{}'.format(read_name))(xmldata)
else:
value = default_func(attrib)
setattr(self, '_{}'.format(attrib), value)
@property
def name(self):
""" Provides a nice name for the field which is derived from the alias, caption, or the id.
The name resolves as either the alias if it's defined, or the caption if alias is not defined,
and finally the id which is the underlying name if neither of the fields exist. """
alias = getattr(self, 'alias', None)
if alias:
return alias
caption = getattr(self, 'caption', None)
if caption:
return caption
return self.id
@property
def id(self):
""" Name of the field as specified in the file, usually surrounded by [ ] """
return self._id
@property
def caption(self):
""" Name of the field as displayed in Tableau unless an aliases is defined """
return self._caption
@property
def alias(self):
""" Name of the field as displayed in Tableau if the default name isn't wanted """
return self._alias
@property
def datatype(self):
""" Type of the field within Tableau (string, integer, etc) """
return self._datatype
@property
def role(self):
""" Dimension or Measure """
return self._role
@property
def is_quantitative(self):
""" A dependent value, usually a measure of something
e.g. Profit, Gross Sales """
return self._type == 'quantitative'
@property
def is_ordinal(self):
""" Is this field a categorical field that has a specific order
e.g. How do you feel? 1 - awful, 2 - ok, 3 - fantastic """
return self._type == 'ordinal'
@property
def is_nominal(self):
""" Is this field a categorical field that does not have a specific order
e.g. What color is your hair? """
return self._type == 'nominal'
@property
def calculation(self):
""" If this field is a calculated field, this will be the formula """
return self._calculation
@property
def default_aggregation(self):
""" The default type of aggregation on the field (e.g Sum, Avg)"""
return self._aggregation
@property
def description(self):
""" The contents of the <desc> tag on a field """
return self._description
@property
def worksheets(self):
return list(self._worksheets)
######################################
# Special Case handling methods for reading the values from the XML
######################################
@staticmethod
def _read_id(xmldata):
# ID is actually the name of the field, but to provide a nice name, we call this ID
return xmldata.attrib.get('name', None)
@staticmethod
def _read_calculation(xmldata):
# The formula for a calculation is stored in a child element, so we need to pull it out separately.
calc = xmldata.find('.//calculation')
if calc is None:
return None
return calc.attrib.get('formula', None)
@staticmethod
def _read_description(xmldata):
description = xmldata.find('.//desc')
if description is None:
return None
description_string = ET.tostring(description, encoding='utf-8')
# Format expects a unicode string so in Python 2 we have to do the explicit conversion
if isinstance(description_string, bytes):
description_string = description_string.decode('utf-8')
return description_string