Skip to content

Commit

Permalink
Rewrite the contains syntax to look a bit more like a map.
Browse files Browse the repository at this point in the history
This map allow repeated elements inside the same brackets
  • Loading branch information
AngledLuffa committed Mar 6, 2025
1 parent 29fa00a commit a350aaa
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 53 deletions.
6 changes: 4 additions & 2 deletions src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,10 @@ public NodePattern(GraphRelation r, boolean negDesc,

if (!descString.equals("{"))
descString += ";";
String separator = negated ? "!=" : "=";
descString += (annotation + "@" + key + separator + value);
String separator = negated ? "!:" : ":";
// TODO: the descString might look nicer if multiple contains
// for the same attribute were collapsed into the same map
descString += (annotation + ":{" + key + ":" + value + "}");
}

if (attrs.root()) {
Expand Down
119 changes: 84 additions & 35 deletions src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
case 11:
case 15:
case 17:
case 24:{
case 26:{
node = SubNode(GraphRelation.ROOT);
children.add(node);
label_1:
Expand Down Expand Up @@ -135,7 +135,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
}
case 15:
case 17:
case 24:{
case 26:{
result = ModNode(r);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case RELATION:
Expand Down Expand Up @@ -397,7 +397,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 15:
case 17:
case 24:{
case 26:{
node = ModNode(reln);
break;
}
Expand Down Expand Up @@ -454,7 +454,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
case 14:
case 15:
case 17:
case 24:{
case 26:{
;
break;
}
Expand Down Expand Up @@ -485,7 +485,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
boolean startUnderNeg;
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 17:
case 24:{
case 26:{
child = Child(r);
break;
}
Expand All @@ -512,7 +512,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
child = NodeDisj(r);
break;
}
case 24:{
case 26:{
child = Description(r);
break;
}
Expand All @@ -529,6 +529,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
Token key = null;
Token value = null;
Token attrType = null;
boolean negated = false;
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:{
attr = jj_consume_token(IDENTIFIER);
Expand Down Expand Up @@ -564,29 +565,28 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
throw new ParseException();
}
if (attr != null && value != null) {
boolean negated = attrType.image.equals("!:");
negated = attrType.image.equals("!:");
attributes.setAttribute(attr.image, value.image, negated);
}
break;
}
case ALIGNRELN:
case 23:{
jj_consume_token(23);
key = jj_consume_token(IDENTIFIER);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case ALIGNRELN:{
attrType = jj_consume_token(ALIGNRELN);
case 10:{
attrType = jj_consume_token(10);
break;
}
case 23:{
attrType = jj_consume_token(23);
case 22:{
attrType = jj_consume_token(22);
break;
}
default:
jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
key = jj_consume_token(IDENTIFIER);
jj_consume_token(21);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:{
value = jj_consume_token(IDENTIFIER);
Expand All @@ -605,12 +605,61 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);}
}
boolean negated = attrType.image.equals("!@");
negated = attrType.image.equals("!:");
attributes.addContains(attr.image, key.image, value.image, negated);
label_6:
while (true) {
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 24:{
;
break;
}
default:
jj_la1[27] = jj_gen;
break label_6;
}
jj_consume_token(24);
key = jj_consume_token(IDENTIFIER);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 10:{
attrType = jj_consume_token(10);
break;
}
case 22:{
attrType = jj_consume_token(22);
break;
}
default:
jj_la1[28] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:{
value = jj_consume_token(IDENTIFIER);
break;
}
case REGEX:{
value = jj_consume_token(REGEX);
break;
}
default:
jj_la1[29] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
if (attr == null || key == null || value == null) {
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);}
}
negated = attrType.image.equals("!:");
attributes.addContains(attr.image, key.image, value.image, negated);
}
jj_consume_token(25);
break;
}
default:
jj_la1[27] = jj_gen;
jj_la1[30] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
Expand All @@ -627,7 +676,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[28] = jj_gen;
jj_la1[31] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
Expand All @@ -637,33 +686,33 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
boolean link = false;
NodeAttributes attributes = new NodeAttributes();
NodePattern pat;
jj_consume_token(24);
jj_consume_token(26);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case IDENTIFIER:
case EMPTY:
case ROOT:{
AddAttribute(attributes);
label_6:
label_7:
while (true) {
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 25:{
case 24:{
;
break;
}
default:
jj_la1[29] = jj_gen;
break label_6;
jj_la1[32] = jj_gen;
break label_7;
}
jj_consume_token(25);
jj_consume_token(24);
AddAttribute(attributes);
}
break;
}
default:
jj_la1[30] = jj_gen;
jj_la1[33] = jj_gen;
;
}
jj_consume_token(26);
jj_consume_token(25);
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
case 21:{
jj_consume_token(21);
Expand All @@ -680,7 +729,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
break;
}
default:
jj_la1[31] = jj_gen;
jj_la1[34] = jj_gen;
;
}
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null);
Expand All @@ -697,13 +746,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
public Token jj_nt;
private int jj_ntk;
private int jj_gen;
final private int[] jj_la1 = new int[32];
final private int[] jj_la1 = new int[35];
static private int[] jj_la1_0;
static {
jj_la1_init_0();
}
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x400,0x1028808,0x3801c,0x3801c,0x1028800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x1028800,0x2000,0x102c000,0x4000,0x1028000,0x1020000,0x400400,0x110,0x800008,0x110,0xc00408,0xd0,0x2000000,0xd0,0x200000,};
jj_la1_0 = new int[] {0x400,0x4028808,0x3801c,0x3801c,0x4028800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x4028800,0x2000,0x402c000,0x4000,0x4028000,0x4020000,0x400400,0x110,0x400400,0x110,0x1000000,0x400400,0x110,0xc00400,0xd0,0x1000000,0xd0,0x200000,};
}

/** Constructor with InputStream. */
Expand All @@ -717,7 +766,7 @@ public SemgrexParser(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -731,7 +780,7 @@ public void ReInit(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
}

/** Constructor. */
Expand All @@ -741,7 +790,7 @@ public SemgrexParser(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -759,7 +808,7 @@ public void ReInit(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
}

/** Constructor with generated Token Manager. */
Expand All @@ -768,7 +817,7 @@ public SemgrexParser(SemgrexParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
}

/** Reinitialise. */
Expand All @@ -777,7 +826,7 @@ public void ReInit(SemgrexParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
}

private Token jj_consume_token(int kind) throws ParseException {
Expand Down Expand Up @@ -833,7 +882,7 @@ public ParseException generateParseException() {
la1tokens[jj_kind] = true;
jj_kind = -1;
}
for (int i = 0; i < 32; i++) {
for (int i = 0; i < 35; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<<j)) != 0) {
Expand Down
18 changes: 15 additions & 3 deletions src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -274,24 +274,36 @@ void AddAttribute(NodeAttributes attributes) : {
Token key = null;
Token value = null;
Token attrType = null;
boolean negated = false;
} {
(attr = <IDENTIFIER>
(( (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>) {
if (attr != null && value != null) {
boolean negated = attrType.image.equals("!:");
negated = attrType.image.equals("!:");
attributes.setAttribute(attr.image, value.image, negated);
}
})
|
(attrType = "@" | attrType = "!@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>)
( ":{"
((key = <IDENTIFIER>) (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>)
{
if (attr == null || key == null || value == null) {
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);
}
boolean negated = attrType.image.equals("!@");
negated = attrType.image.equals("!:");
attributes.addContains(attr.image, key.image, value.image, negated);
})
( ";" (key = <IDENTIFIER>) (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>)
{
if (attr == null || key == null || value == null) {
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
" key=" + key + " value=" + value);
}
negated = attrType.image.equals("!:");
attributes.addContains(attr.image, key.image, value.image, negated);
})*
"}" ))
)
|
( attr = <ROOT> { attributes.setRoot(true); } )
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@ interface SemgrexParserConstants {
"\"~\"",
"\"=\"",
"\"!:\"",
"\"!@\"",
"\"{\"",
"\":{\"",
"\";\"",
"\"}\"",
"\"{\"",
};

}
Loading

0 comments on commit a350aaa

Please sign in to comment.