Skip to content

Commit fee36ba

Browse files
committed
Merge pull request #16 from mbauman/sortedvector
Add a SortedVector for keyed axis indexes and hierarchical indexing
2 parents 4805f3c + 5f895d4 commit fee36ba

File tree

6 files changed

+126
-1
lines changed

6 files changed

+126
-1
lines changed

src/AxisArrays.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export AxisArray, Axis, Interval, axisnames, axisvalues, axisdim, axes
77
include("core.jl")
88
include("intervals.jl")
99
include("indexing.jl")
10+
include("sortedvector.jl")
1011
include("utils.jl")
1112

1213
end

src/core.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ checkaxis(ax) = checkaxis(axistrait(ax), ax)
291291
checkaxis(::Type{Unsupported}, ax) = nothing # TODO: warn or error?
292292
# Dimensional axes must be monotonically increasing
293293
checkaxis{T}(::Type{Dimensional}, ax::Range{T}) = step(ax) > zero(T) || error("Dimensional axes must be monotonically increasing")
294-
checkaxis(::Type{Dimensional}, ax) = issorted(ax, lt=(<=)) || error("Dimensional axes must be monotonically increasing")
294+
checkaxis(::Type{Dimensional}, ax) = issorted(ax) || error("Dimensional axes must be monotonically increasing")
295295
# Categorical axes must simply be unique
296296
function checkaxis(::Type{Categorical}, ax)
297297
seen = Set{eltype(ax)}()

src/indexing.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ axisindexes(ax, idx) = axisindexes(axistrait(ax.val), ax.val, idx)
7878
axisindexes(::Type{Unsupported}, ax, idx) = error("elementwise indexing is not supported for axes of type $(typeof(ax))")
7979
# Dimensional axes may be indexed by intervals of their elements
8080
axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::Interval{T}) = searchsorted(ax, idx)
81+
# Dimensional axes may also be indexed directy by their elements
82+
axisindexes{T}(::Type{Dimensional}, ax::AbstractVector{T}, idx::T) = searchsorted(ax, Interval(idx,idx))
8183
# Categorical axes may be indexed by their elements
8284
function axisindexes{T}(::Type{Categorical}, ax::AbstractVector{T}, idx::T)
8385
i = findfirst(ax, idx)

src/sortedvector.jl

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
2+
export SortedVector
3+
4+
@doc """
5+
6+
A SortedVector is an AbstractVector where the underlying data is
7+
ordered (monotonically increasing).
8+
9+
Indexing that would unsort the data is prohibited. A SortedVector is a
10+
Dimensional axis, and no checking is done to ensure that the data is
11+
sorted. Duplicate values are allowed.
12+
13+
A SortedVector axis can be indexed with an Interval, with a value, or
14+
with a vector of values. Use of a SortedVector{Tuple} axis allows
15+
indexing similar to the hierarchical index of the Python Pandas
16+
package or the R data.table package.
17+
18+
### Constructors
19+
20+
```julia
21+
SortedVector(x::AbstractVector)
22+
```
23+
24+
### Keyword Arguments
25+
26+
* `x::AbstractVector` : the wrapped vector
27+
28+
### Examples
29+
30+
```julia
31+
v = SortedVector(collect([1.; 10.; 10:15.]))
32+
A = AxisArray(reshape(1:16, 8, 2), v, [:a, :b])
33+
A[Interval(8.,12.), :]
34+
A[1., :]
35+
A[10., :]
36+
37+
## Hierarchical index example with three key levels
38+
39+
data = reshape(1.:40., 20, 2)
40+
v = collect(zip([:a, :b, :c][rand(1:3,20)], [:x,:y][rand(1:2,20)], [:x,:y][rand(1:2,20)]))
41+
idx = sortperm(v)
42+
A = AxisArray(data[idx,:], SortedVector(v[idx]), [:a, :b])
43+
A[:b, :]
44+
A[[:a,:c], :]
45+
A[(:a,:x), :]
46+
A[(:a,:x,:x), :]
47+
A[Interval(:a,:b), :]
48+
A[Interval((:a,:x),(:b,:x)), :]
49+
```
50+
51+
""" ->
52+
immutable SortedVector{T} <: AbstractVector{T}
53+
data::AbstractVector{T}
54+
end
55+
56+
Base.getindex(v::SortedVector, idx::Int) = v.data[idx]
57+
Base.getindex(v::SortedVector, idx::Range1) = SortedVector(v.data[idx])
58+
Base.getindex(v::SortedVector, idx::StepRange) =
59+
step(idx) > 0 ? SortedVector(v.data[idx]) : error("step must be positive to index a SortedVector")
60+
Base.getindex(v::SortedVector, idx::AbstractVector) =
61+
issorted(idx) ? SortedVector(v.data[idx]) : error("index must be monotonically increasing to index a SortedVector")
62+
63+
Base.length(v::SortedVector) = length(v.data)
64+
Base.size(v::SortedVector) = size(v.data)
65+
Base.size(v::SortedVector, i) = size(v.data, i)
66+
67+
axistrait(::SortedVector) = Dimensional
68+
checkaxis(::SortedVector) = nothing
69+
70+
71+
## Add some special indexing for SortedVector{Tuple}'s to achieve something like
72+
## Panda's hierarchical indexing
73+
74+
axisindexes{T<:Tuple,S}(ax::Axis{S,SortedVector{T}}, idx) =
75+
searchsorted(ax.val, idx, 1, length(ax.val), Base.ord(_isless,identity,false,Base.Forward))
76+
77+
axisindexes{T<:Tuple,S}(ax::Axis{S,SortedVector{T}}, idx::AbstractArray) =
78+
vcat([axisindexes(ax, i) for i in idx]...)
79+
80+
81+
## Use a modification of `isless`, so that (:a,) is not less than (:a, :b).
82+
## This allows for more natural indexing.
83+
84+
_isless(x,y) = isless(x,y)
85+
86+
function _isless(t1::Tuple, t2::Tuple)
87+
n1, n2 = length(t1), length(t2)
88+
for i = 1:min(n1, n2)
89+
a, b = t1[i], t2[i]
90+
if !isequal(a, b)
91+
return _isless(a, b)
92+
end
93+
end
94+
return false
95+
end
96+
_isless{T<:Tuple}(t1::Interval{T}, t2::Tuple) = _isless(t1, Interval(t2,t2))
97+
_isless{T<:Tuple}(t1::Tuple, t2::Interval{T}) = _isless(Interval(t1,t1), t2)
98+
_isless(t1::Tuple, t2) = _isless(t1,(t2,))
99+
_isless(t1, t2::Tuple) = _isless((t1,),t2)
100+
_isless(a::Interval, b::Interval) = _isless(a.hi, b.lo)

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ using Base.Test
33

44
include("core.jl")
55
include("indexing.jl")
6+
include("sortedvector.jl")

test/sortedvector.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
# Test SortedVector
3+
v = SortedVector(collect([1.; 10.; 10:15.]))
4+
A = AxisArray(reshape(1:16, 8, 2), v, [:a, :b])
5+
@test A[Interval(8.,12.), :] == A[2:5, :]
6+
@test A[1., :] == A[1, :]
7+
@test A[10., :] == A[2:3, :]
8+
9+
# Test SortedVector with a hierarchical index (indexed using Tuples)
10+
srand(1234)
11+
data = reshape(1.:40., 20, 2)
12+
v = collect(zip([:a, :b, :c][rand(1:3,20)], [:x,:y][rand(1:2,20)], [:x,:y][rand(1:2,20)]))
13+
idx = sortperm(v)
14+
A = AxisArray(data[idx,:], SortedVector(v[idx]), [:a, :b])
15+
@test A[:b, :] == A[5:12, :]
16+
@test A[[:a,:c], :] == A[[1:4;13:end], :]
17+
@test A[(:a,:y), :] == A[2:4, :]
18+
@test A[(:c,:y,:y), :] == A[16:end, :]
19+
@test A[Interval(:a,:b), :] == A[1:12, :]
20+
@test A[Interval((:a,:x),(:b,:x)), :] == A[1:9, :]
21+
@test A[[Interval((:a,:x),(:b,:x)),:c], :] == A[[1:9;13:end], :]

0 commit comments

Comments
 (0)